Cranelift AArch64: Add initial support for the Armv8.1 atomics
This commit enables Cranelift's AArch64 backend to generate code for instruction set extensions (previously only the base Armv8-A architecture was supported); also, it makes it possible to detect the extensions supported by the host when JIT compiling. The new functionality is applied to the IR instruction `AtomicCas`. Copyright (c) 2021, Arm Limited.
This commit is contained in:
@@ -5,6 +5,7 @@ use crate::ir::condcodes::FloatCC;
|
||||
use crate::ir::types::*;
|
||||
use crate::ir::Inst as IRInst;
|
||||
use crate::ir::{InstructionData, Opcode, TrapCode};
|
||||
use crate::isa::aarch64::settings as aarch64_settings;
|
||||
use crate::machinst::lower::*;
|
||||
use crate::machinst::*;
|
||||
use crate::settings::Flags;
|
||||
@@ -26,6 +27,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
||||
ctx: &mut C,
|
||||
insn: IRInst,
|
||||
flags: &Flags,
|
||||
isa_flags: &aarch64_settings::Flags,
|
||||
) -> CodegenResult<()> {
|
||||
let op = ctx.data(insn).opcode();
|
||||
let inputs = insn_inputs(ctx, insn);
|
||||
@@ -1183,37 +1185,48 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
||||
}
|
||||
|
||||
Opcode::AtomicCas => {
|
||||
// This is very similar to, but not identical to, the AtomicRmw case. Note
|
||||
// that the AtomicCAS sequence does its own masking, so we don't need to worry
|
||||
// about zero-extending narrow (I8/I16/I32) values here.
|
||||
let r_dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
|
||||
let mut r_addr = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
|
||||
let mut r_expected = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None);
|
||||
let mut r_replacement = put_input_in_reg(ctx, inputs[2], NarrowValueMode::None);
|
||||
let ty_access = ty.unwrap();
|
||||
assert!(is_valid_atomic_transaction_ty(ty_access));
|
||||
// Make sure that all three args are in virtual regs. See corresponding comment
|
||||
// for `Opcode::AtomicRmw` above.
|
||||
r_addr = ctx.ensure_in_vreg(r_addr, I64);
|
||||
r_expected = ctx.ensure_in_vreg(r_expected, I64);
|
||||
r_replacement = ctx.ensure_in_vreg(r_replacement, I64);
|
||||
// Move the args to the preordained AtomicCAS input regs
|
||||
ctx.emit(Inst::gen_move(Writable::from_reg(xreg(25)), r_addr, I64));
|
||||
ctx.emit(Inst::gen_move(
|
||||
Writable::from_reg(xreg(26)),
|
||||
r_expected,
|
||||
I64,
|
||||
));
|
||||
ctx.emit(Inst::gen_move(
|
||||
Writable::from_reg(xreg(28)),
|
||||
r_replacement,
|
||||
I64,
|
||||
));
|
||||
// Now the AtomicCAS itself, implemented in the normal way, with an LL-SC loop
|
||||
ctx.emit(Inst::AtomicCAS { ty: ty_access });
|
||||
// And finally, copy the preordained AtomicCAS output reg to its destination.
|
||||
ctx.emit(Inst::gen_move(r_dst, xreg(27), I64));
|
||||
// Also, x24 and x28 are trashed. `fn aarch64_get_regs` must mention that.
|
||||
|
||||
if isa_flags.use_lse() {
|
||||
ctx.emit(Inst::gen_move(r_dst, r_expected, ty_access));
|
||||
ctx.emit(Inst::AtomicCAS {
|
||||
rs: r_dst,
|
||||
rt: r_replacement,
|
||||
rn: r_addr,
|
||||
ty: ty_access,
|
||||
});
|
||||
} else {
|
||||
// This is very similar to, but not identical to, the AtomicRmw case. Note
|
||||
// that the AtomicCASLoop sequence does its own masking, so we don't need to worry
|
||||
// about zero-extending narrow (I8/I16/I32) values here.
|
||||
// Make sure that all three args are in virtual regs. See corresponding comment
|
||||
// for `Opcode::AtomicRmw` above.
|
||||
r_addr = ctx.ensure_in_vreg(r_addr, I64);
|
||||
r_expected = ctx.ensure_in_vreg(r_expected, I64);
|
||||
r_replacement = ctx.ensure_in_vreg(r_replacement, I64);
|
||||
// Move the args to the preordained AtomicCASLoop input regs
|
||||
ctx.emit(Inst::gen_move(Writable::from_reg(xreg(25)), r_addr, I64));
|
||||
ctx.emit(Inst::gen_move(
|
||||
Writable::from_reg(xreg(26)),
|
||||
r_expected,
|
||||
I64,
|
||||
));
|
||||
ctx.emit(Inst::gen_move(
|
||||
Writable::from_reg(xreg(28)),
|
||||
r_replacement,
|
||||
I64,
|
||||
));
|
||||
// Now the AtomicCASLoop itself, implemented in the normal way, with an LL-SC loop
|
||||
ctx.emit(Inst::AtomicCASLoop { ty: ty_access });
|
||||
// And finally, copy the preordained AtomicCASLoop output reg to its destination.
|
||||
ctx.emit(Inst::gen_move(r_dst, xreg(27), I64));
|
||||
// Also, x24 and x28 are trashed. `fn aarch64_get_regs` must mention that.
|
||||
}
|
||||
}
|
||||
|
||||
Opcode::AtomicLoad => {
|
||||
|
||||
Reference in New Issue
Block a user