Cranelift AArch64: Add initial support for the Armv8.1 atomics

This commit enables Cranelift's AArch64 backend to generate code
for instruction set extensions (previously only the base Armv8-A
architecture was supported); also, it makes it possible to detect
the extensions supported by the host when JIT compiling. The new
functionality is applied to the IR instruction `AtomicCas`.

Copyright (c) 2021, Arm Limited.
This commit is contained in:
Anton Kirilov
2021-03-02 18:35:40 +00:00
parent df6812b855
commit 07c27039b1
9 changed files with 204 additions and 53 deletions

View File

@@ -5,6 +5,7 @@ use crate::ir::condcodes::FloatCC;
use crate::ir::types::*;
use crate::ir::Inst as IRInst;
use crate::ir::{InstructionData, Opcode, TrapCode};
use crate::isa::aarch64::settings as aarch64_settings;
use crate::machinst::lower::*;
use crate::machinst::*;
use crate::settings::Flags;
@@ -26,6 +27,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
ctx: &mut C,
insn: IRInst,
flags: &Flags,
isa_flags: &aarch64_settings::Flags,
) -> CodegenResult<()> {
let op = ctx.data(insn).opcode();
let inputs = insn_inputs(ctx, insn);
@@ -1183,37 +1185,48 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
}
Opcode::AtomicCas => {
// This is very similar to, but not identical to, the AtomicRmw case. Note
// that the AtomicCAS sequence does its own masking, so we don't need to worry
// about zero-extending narrow (I8/I16/I32) values here.
let r_dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
let mut r_addr = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
let mut r_expected = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None);
let mut r_replacement = put_input_in_reg(ctx, inputs[2], NarrowValueMode::None);
let ty_access = ty.unwrap();
assert!(is_valid_atomic_transaction_ty(ty_access));
// Make sure that all three args are in virtual regs. See corresponding comment
// for `Opcode::AtomicRmw` above.
r_addr = ctx.ensure_in_vreg(r_addr, I64);
r_expected = ctx.ensure_in_vreg(r_expected, I64);
r_replacement = ctx.ensure_in_vreg(r_replacement, I64);
// Move the args to the preordained AtomicCAS input regs
ctx.emit(Inst::gen_move(Writable::from_reg(xreg(25)), r_addr, I64));
ctx.emit(Inst::gen_move(
Writable::from_reg(xreg(26)),
r_expected,
I64,
));
ctx.emit(Inst::gen_move(
Writable::from_reg(xreg(28)),
r_replacement,
I64,
));
// Now the AtomicCAS itself, implemented in the normal way, with an LL-SC loop
ctx.emit(Inst::AtomicCAS { ty: ty_access });
// And finally, copy the preordained AtomicCAS output reg to its destination.
ctx.emit(Inst::gen_move(r_dst, xreg(27), I64));
// Also, x24 and x28 are trashed. `fn aarch64_get_regs` must mention that.
if isa_flags.use_lse() {
ctx.emit(Inst::gen_move(r_dst, r_expected, ty_access));
ctx.emit(Inst::AtomicCAS {
rs: r_dst,
rt: r_replacement,
rn: r_addr,
ty: ty_access,
});
} else {
// This is very similar to, but not identical to, the AtomicRmw case. Note
// that the AtomicCASLoop sequence does its own masking, so we don't need to worry
// about zero-extending narrow (I8/I16/I32) values here.
// Make sure that all three args are in virtual regs. See corresponding comment
// for `Opcode::AtomicRmw` above.
r_addr = ctx.ensure_in_vreg(r_addr, I64);
r_expected = ctx.ensure_in_vreg(r_expected, I64);
r_replacement = ctx.ensure_in_vreg(r_replacement, I64);
// Move the args to the preordained AtomicCASLoop input regs
ctx.emit(Inst::gen_move(Writable::from_reg(xreg(25)), r_addr, I64));
ctx.emit(Inst::gen_move(
Writable::from_reg(xreg(26)),
r_expected,
I64,
));
ctx.emit(Inst::gen_move(
Writable::from_reg(xreg(28)),
r_replacement,
I64,
));
// Now the AtomicCASLoop itself, implemented in the normal way, with an LL-SC loop
ctx.emit(Inst::AtomicCASLoop { ty: ty_access });
// And finally, copy the preordained AtomicCASLoop output reg to its destination.
ctx.emit(Inst::gen_move(r_dst, xreg(27), I64));
// Also, x24 and x28 are trashed. `fn aarch64_get_regs` must mention that.
}
}
Opcode::AtomicLoad => {