Cranelift AArch64: Add initial support for the Armv8.1 atomics

This commit enables Cranelift's AArch64 backend to generate code
for instruction set extensions (previously only the base Armv8-A
architecture was supported); also, it makes it possible to detect
the extensions supported by the host when JIT compiling. The new
functionality is applied to the IR instruction `AtomicCas`.

Copyright (c) 2021, Arm Limited.
This commit is contained in:
Anton Kirilov
2021-03-02 18:35:40 +00:00
parent df6812b855
commit 07c27039b1
9 changed files with 204 additions and 53 deletions

View File

@@ -462,6 +462,16 @@ fn enc_stxr(ty: Type, rs: Writable<Reg>, rt: Reg, rn: Reg) -> u32 {
| machreg_to_gpr(rt)
}
fn enc_cas(size: u32, rs: Writable<Reg>, rt: Reg, rn: Reg) -> u32 {
debug_assert_eq!(size & 0b11, size);
0b00_0010001_1_1_00000_1_11111_00000_00000
| size << 30
| machreg_to_gpr(rs.to_reg()) << 16
| machreg_to_gpr(rn) << 5
| machreg_to_gpr(rt)
}
fn enc_asimd_mod_imm(rd: Writable<Reg>, q_op: u32, cmode: u32, imm: u8) -> u32 {
let abc = (imm >> 5) as u32;
let defgh = (imm & 0b11111) as u32;
@@ -1164,7 +1174,18 @@ impl MachInstEmit for Inst {
sink.put4(enc_dmb_ish()); // dmb ish
}
&Inst::AtomicCAS { ty } => {
&Inst::AtomicCAS { rs, rt, rn, ty } => {
let size = match ty {
I8 => 0b00,
I16 => 0b01,
I32 => 0b10,
I64 => 0b11,
_ => panic!("Unsupported type: {}", ty),
};
sink.put4(enc_cas(size, rs, rt, rn));
}
&Inst::AtomicCASLoop { ty } => {
/* Emit this:
dmb ish
again:

View File

@@ -5235,9 +5235,48 @@ fn test_aarch64_binemit() {
"BF3B03D53B7F5F88FC031AAA3C7F1888B8FFFFB5BF3B03D5",
"atomically { 32_bits_at_[x25]) Xchg= x26 ; x27 = old_value_at_[x25]; x24,x28 = trash }",
));
insns.push((
Inst::AtomicCAS {
rs: writable_xreg(28),
rt: xreg(20),
rn: xreg(10),
ty: I8,
},
"54FDFC08",
"casalb w28, w20, [x10]",
));
insns.push((
Inst::AtomicCAS {
rs: writable_xreg(2),
rt: xreg(19),
rn: xreg(23),
ty: I16,
},
"F3FEE248",
"casalh w2, w19, [x23]",
));
insns.push((
Inst::AtomicCAS {
rs: writable_xreg(0),
rt: zero_reg(),
rn: stack_reg(),
ty: I32,
},
"FFFFE088",
"casal w0, wzr, [sp]",
));
insns.push((
Inst::AtomicCAS {
rs: writable_xreg(7),
rt: xreg(15),
rn: xreg(27),
ty: I64,
},
"6FFFE7C8",
"casal x7, x15, [x27]",
));
insns.push((
Inst::AtomicCASLoop {
ty: I8,
},
"BF3B03D53B7F5F08581F40927F0318EB610000543C7F180878FFFFB5BF3B03D5",
@@ -5245,7 +5284,7 @@ fn test_aarch64_binemit() {
));
insns.push((
Inst::AtomicCAS {
Inst::AtomicCASLoop {
ty: I64,
},
"BF3B03D53B7F5FC8F8031AAA7F0318EB610000543C7F18C878FFFFB5BF3B03D5",

View File

@@ -696,19 +696,26 @@ pub enum Inst {
op: inst_common::AtomicRmwOp,
},
/// An atomic compare-and-swap operation. This instruction is sequentially consistent.
AtomicCAS {
rs: Writable<Reg>,
rt: Reg,
rn: Reg,
ty: Type,
},
/// Similar to AtomicRMW, a compare-and-swap operation implemented using a load-linked
/// store-conditional loop. (Although we could possibly implement it more directly using
/// CAS insns that are available in some revisions of AArch64 above 8.0). The sequence is
/// both preceded and followed by a fence which is at least as comprehensive as that of the
/// `Fence` instruction below. This instruction is sequentially consistent. Note that the
/// operand conventions, although very similar to AtomicRMW, are different:
/// store-conditional loop. The sequence is both preceded and followed by a fence which is
/// at least as comprehensive as that of the `Fence` instruction below. This instruction
/// is sequentially consistent. Note that the operand conventions, although very similar
/// to AtomicRMW, are different:
///
/// x25 (rd) address
/// x26 (rd) expected value
/// x28 (rd) replacement value
/// x27 (wr) old value
/// x24 (wr) scratch reg; value afterwards has no meaning
AtomicCAS {
AtomicCASLoop {
ty: Type, // I8, I16, I32 or I64
},
@@ -1755,7 +1762,12 @@ fn aarch64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) {
collector.add_def(writable_xreg(27));
collector.add_def(writable_xreg(28));
}
&Inst::AtomicCAS { .. } => {
&Inst::AtomicCAS { rs, rt, rn, .. } => {
collector.add_mod(rs);
collector.add_use(rt);
collector.add_use(rn);
}
&Inst::AtomicCASLoop { .. } => {
collector.add_use(xreg(25));
collector.add_use(xreg(26));
collector.add_use(xreg(28));
@@ -2330,7 +2342,17 @@ fn aarch64_map_regs<RUM: RegUsageMapper>(inst: &mut Inst, mapper: &RUM) {
&mut Inst::AtomicRMW { .. } => {
// There are no vregs to map in this insn.
}
&mut Inst::AtomicCAS { .. } => {
&mut Inst::AtomicCAS {
ref mut rs,
ref mut rt,
ref mut rn,
..
} => {
map_mod(mapper, rs);
map_use(mapper, rt);
map_use(mapper, rn);
}
&mut Inst::AtomicCASLoop { .. } => {
// There are no vregs to map in this insn.
}
&mut Inst::AtomicLoad {
@@ -3302,7 +3324,21 @@ impl Inst {
"atomically {{ {}_bits_at_[x25]) {:?}= x26 ; x27 = old_value_at_[x25]; x24,x28 = trash }}",
ty.bits(), op)
}
&Inst::AtomicCAS { ty, .. } => {
&Inst::AtomicCAS { rs, rt, rn, ty } => {
let op = match ty {
I8 => "casalb",
I16 => "casalh",
I32 | I64 => "casal",
_ => panic!("Unsupported type: {}", ty),
};
let size = OperandSize::from_ty(ty);
let rs = show_ireg_sized(rs.to_reg(), mb_rru, size);
let rt = show_ireg_sized(rt, mb_rru, size);
let rn = rn.show_rru(mb_rru);
format!("{} {}, {}, [{}]", op, rs, rt, rn)
}
&Inst::AtomicCASLoop { ty } => {
format!(
"atomically {{ compare-and-swap({}_bits_at_[x25], x26 -> x28), x27 = old_value_at_[x25]; x24 = trash }}",
ty.bits())