Cranelift AArch64: Add initial support for the Armv8.1 atomics
This commit enables Cranelift's AArch64 backend to generate code for instruction set extensions (previously only the base Armv8-A architecture was supported); also, it makes it possible to detect the extensions supported by the host when JIT compiling. The new functionality is applied to the IR instruction `AtomicCas`. Copyright (c) 2021, Arm Limited.
This commit is contained in:
@@ -8,7 +8,10 @@ use crate::cdsl::settings::{SettingGroup, SettingGroupBuilder};
|
||||
use crate::shared::Definitions as SharedDefinitions;
|
||||
|
||||
fn define_settings(_shared: &SettingGroup) -> SettingGroup {
|
||||
let setting = SettingGroupBuilder::new("arm64");
|
||||
let mut setting = SettingGroupBuilder::new("arm64");
|
||||
let has_lse = setting.add_bool("has_lse", "Large System Extensions", false);
|
||||
|
||||
setting.add_predicate("use_lse", predicate!(has_lse));
|
||||
setting.build()
|
||||
}
|
||||
|
||||
|
||||
@@ -462,6 +462,16 @@ fn enc_stxr(ty: Type, rs: Writable<Reg>, rt: Reg, rn: Reg) -> u32 {
|
||||
| machreg_to_gpr(rt)
|
||||
}
|
||||
|
||||
fn enc_cas(size: u32, rs: Writable<Reg>, rt: Reg, rn: Reg) -> u32 {
|
||||
debug_assert_eq!(size & 0b11, size);
|
||||
|
||||
0b00_0010001_1_1_00000_1_11111_00000_00000
|
||||
| size << 30
|
||||
| machreg_to_gpr(rs.to_reg()) << 16
|
||||
| machreg_to_gpr(rn) << 5
|
||||
| machreg_to_gpr(rt)
|
||||
}
|
||||
|
||||
fn enc_asimd_mod_imm(rd: Writable<Reg>, q_op: u32, cmode: u32, imm: u8) -> u32 {
|
||||
let abc = (imm >> 5) as u32;
|
||||
let defgh = (imm & 0b11111) as u32;
|
||||
@@ -1164,7 +1174,18 @@ impl MachInstEmit for Inst {
|
||||
|
||||
sink.put4(enc_dmb_ish()); // dmb ish
|
||||
}
|
||||
&Inst::AtomicCAS { ty } => {
|
||||
&Inst::AtomicCAS { rs, rt, rn, ty } => {
|
||||
let size = match ty {
|
||||
I8 => 0b00,
|
||||
I16 => 0b01,
|
||||
I32 => 0b10,
|
||||
I64 => 0b11,
|
||||
_ => panic!("Unsupported type: {}", ty),
|
||||
};
|
||||
|
||||
sink.put4(enc_cas(size, rs, rt, rn));
|
||||
}
|
||||
&Inst::AtomicCASLoop { ty } => {
|
||||
/* Emit this:
|
||||
dmb ish
|
||||
again:
|
||||
|
||||
@@ -5235,9 +5235,48 @@ fn test_aarch64_binemit() {
|
||||
"BF3B03D53B7F5F88FC031AAA3C7F1888B8FFFFB5BF3B03D5",
|
||||
"atomically { 32_bits_at_[x25]) Xchg= x26 ; x27 = old_value_at_[x25]; x24,x28 = trash }",
|
||||
));
|
||||
|
||||
insns.push((
|
||||
Inst::AtomicCAS {
|
||||
rs: writable_xreg(28),
|
||||
rt: xreg(20),
|
||||
rn: xreg(10),
|
||||
ty: I8,
|
||||
},
|
||||
"54FDFC08",
|
||||
"casalb w28, w20, [x10]",
|
||||
));
|
||||
insns.push((
|
||||
Inst::AtomicCAS {
|
||||
rs: writable_xreg(2),
|
||||
rt: xreg(19),
|
||||
rn: xreg(23),
|
||||
ty: I16,
|
||||
},
|
||||
"F3FEE248",
|
||||
"casalh w2, w19, [x23]",
|
||||
));
|
||||
insns.push((
|
||||
Inst::AtomicCAS {
|
||||
rs: writable_xreg(0),
|
||||
rt: zero_reg(),
|
||||
rn: stack_reg(),
|
||||
ty: I32,
|
||||
},
|
||||
"FFFFE088",
|
||||
"casal w0, wzr, [sp]",
|
||||
));
|
||||
insns.push((
|
||||
Inst::AtomicCAS {
|
||||
rs: writable_xreg(7),
|
||||
rt: xreg(15),
|
||||
rn: xreg(27),
|
||||
ty: I64,
|
||||
},
|
||||
"6FFFE7C8",
|
||||
"casal x7, x15, [x27]",
|
||||
));
|
||||
insns.push((
|
||||
Inst::AtomicCASLoop {
|
||||
ty: I8,
|
||||
},
|
||||
"BF3B03D53B7F5F08581F40927F0318EB610000543C7F180878FFFFB5BF3B03D5",
|
||||
@@ -5245,7 +5284,7 @@ fn test_aarch64_binemit() {
|
||||
));
|
||||
|
||||
insns.push((
|
||||
Inst::AtomicCAS {
|
||||
Inst::AtomicCASLoop {
|
||||
ty: I64,
|
||||
},
|
||||
"BF3B03D53B7F5FC8F8031AAA7F0318EB610000543C7F18C878FFFFB5BF3B03D5",
|
||||
|
||||
@@ -696,19 +696,26 @@ pub enum Inst {
|
||||
op: inst_common::AtomicRmwOp,
|
||||
},
|
||||
|
||||
/// An atomic compare-and-swap operation. This instruction is sequentially consistent.
|
||||
AtomicCAS {
|
||||
rs: Writable<Reg>,
|
||||
rt: Reg,
|
||||
rn: Reg,
|
||||
ty: Type,
|
||||
},
|
||||
|
||||
/// Similar to AtomicRMW, a compare-and-swap operation implemented using a load-linked
|
||||
/// store-conditional loop. (Although we could possibly implement it more directly using
|
||||
/// CAS insns that are available in some revisions of AArch64 above 8.0). The sequence is
|
||||
/// both preceded and followed by a fence which is at least as comprehensive as that of the
|
||||
/// `Fence` instruction below. This instruction is sequentially consistent. Note that the
|
||||
/// operand conventions, although very similar to AtomicRMW, are different:
|
||||
/// store-conditional loop. The sequence is both preceded and followed by a fence which is
|
||||
/// at least as comprehensive as that of the `Fence` instruction below. This instruction
|
||||
/// is sequentially consistent. Note that the operand conventions, although very similar
|
||||
/// to AtomicRMW, are different:
|
||||
///
|
||||
/// x25 (rd) address
|
||||
/// x26 (rd) expected value
|
||||
/// x28 (rd) replacement value
|
||||
/// x27 (wr) old value
|
||||
/// x24 (wr) scratch reg; value afterwards has no meaning
|
||||
AtomicCAS {
|
||||
AtomicCASLoop {
|
||||
ty: Type, // I8, I16, I32 or I64
|
||||
},
|
||||
|
||||
@@ -1755,7 +1762,12 @@ fn aarch64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) {
|
||||
collector.add_def(writable_xreg(27));
|
||||
collector.add_def(writable_xreg(28));
|
||||
}
|
||||
&Inst::AtomicCAS { .. } => {
|
||||
&Inst::AtomicCAS { rs, rt, rn, .. } => {
|
||||
collector.add_mod(rs);
|
||||
collector.add_use(rt);
|
||||
collector.add_use(rn);
|
||||
}
|
||||
&Inst::AtomicCASLoop { .. } => {
|
||||
collector.add_use(xreg(25));
|
||||
collector.add_use(xreg(26));
|
||||
collector.add_use(xreg(28));
|
||||
@@ -2330,7 +2342,17 @@ fn aarch64_map_regs<RUM: RegUsageMapper>(inst: &mut Inst, mapper: &RUM) {
|
||||
&mut Inst::AtomicRMW { .. } => {
|
||||
// There are no vregs to map in this insn.
|
||||
}
|
||||
&mut Inst::AtomicCAS { .. } => {
|
||||
&mut Inst::AtomicCAS {
|
||||
ref mut rs,
|
||||
ref mut rt,
|
||||
ref mut rn,
|
||||
..
|
||||
} => {
|
||||
map_mod(mapper, rs);
|
||||
map_use(mapper, rt);
|
||||
map_use(mapper, rn);
|
||||
}
|
||||
&mut Inst::AtomicCASLoop { .. } => {
|
||||
// There are no vregs to map in this insn.
|
||||
}
|
||||
&mut Inst::AtomicLoad {
|
||||
@@ -3302,7 +3324,21 @@ impl Inst {
|
||||
"atomically {{ {}_bits_at_[x25]) {:?}= x26 ; x27 = old_value_at_[x25]; x24,x28 = trash }}",
|
||||
ty.bits(), op)
|
||||
}
|
||||
&Inst::AtomicCAS { ty, .. } => {
|
||||
&Inst::AtomicCAS { rs, rt, rn, ty } => {
|
||||
let op = match ty {
|
||||
I8 => "casalb",
|
||||
I16 => "casalh",
|
||||
I32 | I64 => "casal",
|
||||
_ => panic!("Unsupported type: {}", ty),
|
||||
};
|
||||
let size = OperandSize::from_ty(ty);
|
||||
let rs = show_ireg_sized(rs.to_reg(), mb_rru, size);
|
||||
let rt = show_ireg_sized(rt, mb_rru, size);
|
||||
let rn = rn.show_rru(mb_rru);
|
||||
|
||||
format!("{} {}, {}, [{}]", op, rs, rt, rn)
|
||||
}
|
||||
&Inst::AtomicCASLoop { ty } => {
|
||||
format!(
|
||||
"atomically {{ compare-and-swap({}_bits_at_[x25], x26 -> x28), x27 = old_value_at_[x25]; x24 = trash }}",
|
||||
ty.bits())
|
||||
|
||||
@@ -1231,7 +1231,7 @@ impl LowerBackend for AArch64Backend {
|
||||
type MInst = Inst;
|
||||
|
||||
fn lower<C: LowerCtx<I = Inst>>(&self, ctx: &mut C, ir_inst: IRInst) -> CodegenResult<()> {
|
||||
lower_inst::lower_insn_to_regs(ctx, ir_inst, &self.flags)
|
||||
lower_inst::lower_insn_to_regs(ctx, ir_inst, &self.flags, &self.isa_flags)
|
||||
}
|
||||
|
||||
fn lower_branch_group<C: LowerCtx<I = Inst>>(
|
||||
|
||||
@@ -5,6 +5,7 @@ use crate::ir::condcodes::FloatCC;
|
||||
use crate::ir::types::*;
|
||||
use crate::ir::Inst as IRInst;
|
||||
use crate::ir::{InstructionData, Opcode, TrapCode};
|
||||
use crate::isa::aarch64::settings as aarch64_settings;
|
||||
use crate::machinst::lower::*;
|
||||
use crate::machinst::*;
|
||||
use crate::settings::Flags;
|
||||
@@ -26,6 +27,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
||||
ctx: &mut C,
|
||||
insn: IRInst,
|
||||
flags: &Flags,
|
||||
isa_flags: &aarch64_settings::Flags,
|
||||
) -> CodegenResult<()> {
|
||||
let op = ctx.data(insn).opcode();
|
||||
let inputs = insn_inputs(ctx, insn);
|
||||
@@ -1183,37 +1185,48 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
||||
}
|
||||
|
||||
Opcode::AtomicCas => {
|
||||
// This is very similar to, but not identical to, the AtomicRmw case. Note
|
||||
// that the AtomicCAS sequence does its own masking, so we don't need to worry
|
||||
// about zero-extending narrow (I8/I16/I32) values here.
|
||||
let r_dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
|
||||
let mut r_addr = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
|
||||
let mut r_expected = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None);
|
||||
let mut r_replacement = put_input_in_reg(ctx, inputs[2], NarrowValueMode::None);
|
||||
let ty_access = ty.unwrap();
|
||||
assert!(is_valid_atomic_transaction_ty(ty_access));
|
||||
// Make sure that all three args are in virtual regs. See corresponding comment
|
||||
// for `Opcode::AtomicRmw` above.
|
||||
r_addr = ctx.ensure_in_vreg(r_addr, I64);
|
||||
r_expected = ctx.ensure_in_vreg(r_expected, I64);
|
||||
r_replacement = ctx.ensure_in_vreg(r_replacement, I64);
|
||||
// Move the args to the preordained AtomicCAS input regs
|
||||
ctx.emit(Inst::gen_move(Writable::from_reg(xreg(25)), r_addr, I64));
|
||||
ctx.emit(Inst::gen_move(
|
||||
Writable::from_reg(xreg(26)),
|
||||
r_expected,
|
||||
I64,
|
||||
));
|
||||
ctx.emit(Inst::gen_move(
|
||||
Writable::from_reg(xreg(28)),
|
||||
r_replacement,
|
||||
I64,
|
||||
));
|
||||
// Now the AtomicCAS itself, implemented in the normal way, with an LL-SC loop
|
||||
ctx.emit(Inst::AtomicCAS { ty: ty_access });
|
||||
// And finally, copy the preordained AtomicCAS output reg to its destination.
|
||||
ctx.emit(Inst::gen_move(r_dst, xreg(27), I64));
|
||||
// Also, x24 and x28 are trashed. `fn aarch64_get_regs` must mention that.
|
||||
|
||||
if isa_flags.use_lse() {
|
||||
ctx.emit(Inst::gen_move(r_dst, r_expected, ty_access));
|
||||
ctx.emit(Inst::AtomicCAS {
|
||||
rs: r_dst,
|
||||
rt: r_replacement,
|
||||
rn: r_addr,
|
||||
ty: ty_access,
|
||||
});
|
||||
} else {
|
||||
// This is very similar to, but not identical to, the AtomicRmw case. Note
|
||||
// that the AtomicCASLoop sequence does its own masking, so we don't need to worry
|
||||
// about zero-extending narrow (I8/I16/I32) values here.
|
||||
// Make sure that all three args are in virtual regs. See corresponding comment
|
||||
// for `Opcode::AtomicRmw` above.
|
||||
r_addr = ctx.ensure_in_vreg(r_addr, I64);
|
||||
r_expected = ctx.ensure_in_vreg(r_expected, I64);
|
||||
r_replacement = ctx.ensure_in_vreg(r_replacement, I64);
|
||||
// Move the args to the preordained AtomicCASLoop input regs
|
||||
ctx.emit(Inst::gen_move(Writable::from_reg(xreg(25)), r_addr, I64));
|
||||
ctx.emit(Inst::gen_move(
|
||||
Writable::from_reg(xreg(26)),
|
||||
r_expected,
|
||||
I64,
|
||||
));
|
||||
ctx.emit(Inst::gen_move(
|
||||
Writable::from_reg(xreg(28)),
|
||||
r_replacement,
|
||||
I64,
|
||||
));
|
||||
// Now the AtomicCASLoop itself, implemented in the normal way, with an LL-SC loop
|
||||
ctx.emit(Inst::AtomicCASLoop { ty: ty_access });
|
||||
// And finally, copy the preordained AtomicCASLoop output reg to its destination.
|
||||
ctx.emit(Inst::gen_move(r_dst, xreg(27), I64));
|
||||
// Also, x24 and x28 are trashed. `fn aarch64_get_regs` must mention that.
|
||||
}
|
||||
}
|
||||
|
||||
Opcode::AtomicLoad => {
|
||||
|
||||
@@ -2,10 +2,11 @@
|
||||
|
||||
use crate::ir::condcodes::IntCC;
|
||||
use crate::ir::Function;
|
||||
use crate::isa::aarch64::settings as aarch64_settings;
|
||||
use crate::isa::Builder as IsaBuilder;
|
||||
use crate::machinst::{compile, MachBackend, MachCompileResult, TargetIsaAdapter, VCode};
|
||||
use crate::result::CodegenResult;
|
||||
use crate::settings;
|
||||
use crate::settings as shared_settings;
|
||||
|
||||
use alloc::boxed::Box;
|
||||
use core::hash::{Hash, Hasher};
|
||||
@@ -18,6 +19,7 @@ mod abi;
|
||||
pub(crate) mod inst;
|
||||
mod lower;
|
||||
mod lower_inst;
|
||||
mod settings;
|
||||
|
||||
use inst::create_reg_universe;
|
||||
|
||||
@@ -26,17 +28,23 @@ use self::inst::EmitInfo;
|
||||
/// An AArch64 backend.
|
||||
pub struct AArch64Backend {
|
||||
triple: Triple,
|
||||
flags: settings::Flags,
|
||||
flags: shared_settings::Flags,
|
||||
isa_flags: aarch64_settings::Flags,
|
||||
reg_universe: RealRegUniverse,
|
||||
}
|
||||
|
||||
impl AArch64Backend {
|
||||
/// Create a new AArch64 backend with the given (shared) flags.
|
||||
pub fn new_with_flags(triple: Triple, flags: settings::Flags) -> AArch64Backend {
|
||||
pub fn new_with_flags(
|
||||
triple: Triple,
|
||||
flags: shared_settings::Flags,
|
||||
isa_flags: aarch64_settings::Flags,
|
||||
) -> AArch64Backend {
|
||||
let reg_universe = create_reg_universe(&flags);
|
||||
AArch64Backend {
|
||||
triple,
|
||||
flags,
|
||||
isa_flags,
|
||||
reg_universe,
|
||||
}
|
||||
}
|
||||
@@ -46,7 +54,7 @@ impl AArch64Backend {
|
||||
fn compile_vcode(
|
||||
&self,
|
||||
func: &Function,
|
||||
flags: settings::Flags,
|
||||
flags: shared_settings::Flags,
|
||||
) -> CodegenResult<VCode<inst::Inst>> {
|
||||
let emit_info = EmitInfo::new(flags.clone());
|
||||
let abi = Box::new(abi::AArch64ABICallee::new(func, flags)?);
|
||||
@@ -92,12 +100,13 @@ impl MachBackend for AArch64Backend {
|
||||
self.triple.clone()
|
||||
}
|
||||
|
||||
fn flags(&self) -> &settings::Flags {
|
||||
fn flags(&self) -> &shared_settings::Flags {
|
||||
&self.flags
|
||||
}
|
||||
|
||||
fn hash_all_flags(&self, mut hasher: &mut dyn Hasher) {
|
||||
self.flags.hash(&mut hasher);
|
||||
self.isa_flags.hash(&mut hasher);
|
||||
}
|
||||
|
||||
fn reg_universe(&self) -> &RealRegUniverse {
|
||||
@@ -155,9 +164,10 @@ pub fn isa_builder(triple: Triple) -> IsaBuilder {
|
||||
assert!(triple.architecture == Architecture::Aarch64(Aarch64Architecture::Aarch64));
|
||||
IsaBuilder {
|
||||
triple,
|
||||
setup: settings::builder(),
|
||||
constructor: |triple, shared_flags, _| {
|
||||
let backend = AArch64Backend::new_with_flags(triple, shared_flags);
|
||||
setup: aarch64_settings::builder(),
|
||||
constructor: |triple, shared_flags, builder| {
|
||||
let isa_flags = aarch64_settings::Flags::new(&shared_flags, builder);
|
||||
let backend = AArch64Backend::new_with_flags(triple, shared_flags, isa_flags);
|
||||
Box::new(TargetIsaAdapter::new(backend))
|
||||
},
|
||||
}
|
||||
@@ -192,11 +202,14 @@ mod test {
|
||||
let v1 = pos.ins().iadd(arg0, v0);
|
||||
pos.ins().return_(&[v1]);
|
||||
|
||||
let mut shared_flags = settings::builder();
|
||||
shared_flags.set("opt_level", "none").unwrap();
|
||||
let mut shared_flags_builder = settings::builder();
|
||||
shared_flags_builder.set("opt_level", "none").unwrap();
|
||||
let shared_flags = settings::Flags::new(shared_flags_builder);
|
||||
let isa_flags = aarch64_settings::Flags::new(&shared_flags, aarch64_settings::builder());
|
||||
let backend = AArch64Backend::new_with_flags(
|
||||
Triple::from_str("aarch64").unwrap(),
|
||||
settings::Flags::new(shared_flags),
|
||||
shared_flags,
|
||||
isa_flags,
|
||||
);
|
||||
let buffer = backend.compile_function(&mut func, false).unwrap().buffer;
|
||||
let code = &buffer.data[..];
|
||||
@@ -246,11 +259,14 @@ mod test {
|
||||
let v3 = pos.ins().isub(v1, v0);
|
||||
pos.ins().return_(&[v3]);
|
||||
|
||||
let mut shared_flags = settings::builder();
|
||||
shared_flags.set("opt_level", "none").unwrap();
|
||||
let mut shared_flags_builder = settings::builder();
|
||||
shared_flags_builder.set("opt_level", "none").unwrap();
|
||||
let shared_flags = settings::Flags::new(shared_flags_builder);
|
||||
let isa_flags = aarch64_settings::Flags::new(&shared_flags, aarch64_settings::builder());
|
||||
let backend = AArch64Backend::new_with_flags(
|
||||
Triple::from_str("aarch64").unwrap(),
|
||||
settings::Flags::new(shared_flags),
|
||||
shared_flags,
|
||||
isa_flags,
|
||||
);
|
||||
let result = backend
|
||||
.compile_function(&mut func, /* want_disasm = */ false)
|
||||
|
||||
9
cranelift/codegen/src/isa/aarch64/settings.rs
Normal file
9
cranelift/codegen/src/isa/aarch64/settings.rs
Normal file
@@ -0,0 +1,9 @@
|
||||
//! AArch64 Settings.
|
||||
|
||||
use crate::settings::{self, detail, Builder};
|
||||
use core::fmt;
|
||||
|
||||
// Include code generated by `cranelift-codegen/meta/src/gen_settings.rs:`. This file contains a
|
||||
// public `Flags` struct with an impl for all of the settings defined in
|
||||
// `cranelift-codegen/meta/src/isa/arm64/settings.rs`.
|
||||
include!(concat!(env!("OUT_DIR"), "/settings-arm64.rs"));
|
||||
@@ -105,6 +105,20 @@ pub fn builder_with_options(
|
||||
}
|
||||
}
|
||||
|
||||
// `stdsimd` is necessary for std::is_aarch64_feature_detected!().
|
||||
#[cfg(all(target_arch = "aarch64", feature = "stdsimd"))]
|
||||
{
|
||||
use cranelift_codegen::settings::Configurable;
|
||||
|
||||
if !infer_native_flags {
|
||||
return Ok(isa_builder);
|
||||
}
|
||||
|
||||
if std::is_aarch64_feature_detected!("lse") {
|
||||
isa_builder.enable("has_lse").unwrap();
|
||||
}
|
||||
}
|
||||
|
||||
// squelch warnings about unused mut/variables on some platforms.
|
||||
drop(&mut isa_builder);
|
||||
drop(infer_native_flags);
|
||||
|
||||
Reference in New Issue
Block a user