[AArch64] Port AtomicCAS to isle (#4140)
Copyright (c) 2022, Arm Limited.
This commit is contained in:
@@ -244,7 +244,9 @@
|
|||||||
(rn Reg)
|
(rn Reg)
|
||||||
(ty Type))
|
(ty Type))
|
||||||
|
|
||||||
;; An atomic compare-and-swap operation. This instruction is sequentially consistent.
|
;; An atomic compare-and-swap operation. These instructions require the
|
||||||
|
;; Large System Extension (LSE) ISA support (FEAT_LSE). The instructions have
|
||||||
|
;; acquire-release semantics.
|
||||||
(AtomicCAS
|
(AtomicCAS
|
||||||
(rs WritableReg)
|
(rs WritableReg)
|
||||||
(rt Reg)
|
(rt Reg)
|
||||||
@@ -2129,6 +2131,16 @@
|
|||||||
)
|
)
|
||||||
dst))
|
dst))
|
||||||
|
|
||||||
|
;; Helper for emitting `MInst.AtomicCAS` instructions.
|
||||||
|
(decl lse_atomic_cas (Reg Reg Reg Type) Reg)
|
||||||
|
(rule (lse_atomic_cas addr expect replace ty)
|
||||||
|
(let (
|
||||||
|
(dst WritableReg (temp_writable_reg ty))
|
||||||
|
(_1 Unit (emit (MInst.Mov (operand_size ty) dst expect)))
|
||||||
|
(_2 Unit (emit (MInst.AtomicCAS dst replace addr ty)))
|
||||||
|
)
|
||||||
|
dst))
|
||||||
|
|
||||||
;; Helper for emitting `MInst.AtomicRMWLoop` instructions.
|
;; Helper for emitting `MInst.AtomicRMWLoop` instructions.
|
||||||
;; - Make sure that both args are in virtual regs, since in effect
|
;; - Make sure that both args are in virtual regs, since in effect
|
||||||
;; we have to do a parallel copy to get them safely to the AtomicRMW input
|
;; we have to do a parallel copy to get them safely to the AtomicRMW input
|
||||||
@@ -2145,3 +2157,27 @@
|
|||||||
(_ Unit (emit (MInst.AtomicRMWLoop ty op)))
|
(_ Unit (emit (MInst.AtomicRMWLoop ty op)))
|
||||||
)
|
)
|
||||||
(mov64_from_real 27)))
|
(mov64_from_real 27)))
|
||||||
|
|
||||||
|
;; Helper for emitting `MInst.AtomicCASLoop` instructions.
|
||||||
|
;; This is very similar to, but not identical to, the AtomicRmw case. Note
|
||||||
|
;; that the AtomicCASLoop sequence does its own masking, so we don't need to worry
|
||||||
|
;; about zero-extending narrow (I8/I16/I32) values here.
|
||||||
|
;; Make sure that all three args are in virtual regs. See corresponding comment
|
||||||
|
;; for `atomic_rmw_loop` above.
|
||||||
|
(decl atomic_cas_loop (Reg Reg Reg Type) Reg)
|
||||||
|
(rule (atomic_cas_loop addr expect replace ty)
|
||||||
|
(let (
|
||||||
|
(v_addr Reg (ensure_in_vreg addr $I64))
|
||||||
|
(v_exp Reg (ensure_in_vreg expect $I64))
|
||||||
|
(v_rep Reg (ensure_in_vreg replace $I64))
|
||||||
|
;; Move the args to the preordained AtomicCASLoop input regs
|
||||||
|
(r_addr Reg (mov64_to_real 25 v_addr))
|
||||||
|
(r_exp Reg (mov64_to_real 26 v_exp))
|
||||||
|
(r_rep Reg (mov64_to_real 28 v_rep))
|
||||||
|
;; Now the AtomicCASLoop itself, implemented in the normal way, with a
|
||||||
|
;; load-exclusive, store-exclusive loop
|
||||||
|
(_ Unit (emit (MInst.AtomicCASLoop ty)))
|
||||||
|
)
|
||||||
|
;; And finally, copy the preordained AtomicCASLoop output reg to its destination.
|
||||||
|
;; Also, x24 and x28 are trashed.
|
||||||
|
(mov64_from_real 27)))
|
||||||
|
|||||||
@@ -1225,39 +1225,39 @@
|
|||||||
|
|
||||||
;;;; Rules for `AtomicRMW` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
;;;; Rules for `AtomicRMW` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
|
||||||
(rule (lower (and (use_lse)
|
(rule 1 (lower (and (use_lse)
|
||||||
(has_type (valid_atomic_transaction ty)
|
(has_type (valid_atomic_transaction ty)
|
||||||
(atomic_rmw flags (AtomicRmwOp.Add) addr src))))
|
(atomic_rmw flags (AtomicRmwOp.Add) addr src))))
|
||||||
(lse_atomic_rmw (AtomicRMWOp.Add) addr src ty))
|
(lse_atomic_rmw (AtomicRMWOp.Add) addr src ty))
|
||||||
(rule (lower (and (use_lse)
|
(rule 1 (lower (and (use_lse)
|
||||||
(has_type (valid_atomic_transaction ty)
|
(has_type (valid_atomic_transaction ty)
|
||||||
(atomic_rmw flags (AtomicRmwOp.Xor) addr src))))
|
(atomic_rmw flags (AtomicRmwOp.Xor) addr src))))
|
||||||
(lse_atomic_rmw (AtomicRMWOp.Eor) addr src ty))
|
(lse_atomic_rmw (AtomicRMWOp.Eor) addr src ty))
|
||||||
(rule (lower (and (use_lse)
|
(rule 1 (lower (and (use_lse)
|
||||||
(has_type (valid_atomic_transaction ty)
|
(has_type (valid_atomic_transaction ty)
|
||||||
(atomic_rmw flags (AtomicRmwOp.Or) addr src))))
|
(atomic_rmw flags (AtomicRmwOp.Or) addr src))))
|
||||||
(lse_atomic_rmw (AtomicRMWOp.Set) addr src ty))
|
(lse_atomic_rmw (AtomicRMWOp.Set) addr src ty))
|
||||||
(rule (lower (and (use_lse)
|
(rule 1 (lower (and (use_lse)
|
||||||
(has_type (valid_atomic_transaction ty)
|
(has_type (valid_atomic_transaction ty)
|
||||||
(atomic_rmw flags (AtomicRmwOp.Smax) addr src))))
|
(atomic_rmw flags (AtomicRmwOp.Smax) addr src))))
|
||||||
(lse_atomic_rmw (AtomicRMWOp.Smax) addr src ty))
|
(lse_atomic_rmw (AtomicRMWOp.Smax) addr src ty))
|
||||||
(rule (lower (and (use_lse)
|
(rule 1 (lower (and (use_lse)
|
||||||
(has_type (valid_atomic_transaction ty)
|
(has_type (valid_atomic_transaction ty)
|
||||||
(atomic_rmw flags (AtomicRmwOp.Smin) addr src))))
|
(atomic_rmw flags (AtomicRmwOp.Smin) addr src))))
|
||||||
(lse_atomic_rmw (AtomicRMWOp.Smin) addr src ty))
|
(lse_atomic_rmw (AtomicRMWOp.Smin) addr src ty))
|
||||||
(rule (lower (and (use_lse)
|
(rule 1 (lower (and (use_lse)
|
||||||
(has_type (valid_atomic_transaction ty)
|
(has_type (valid_atomic_transaction ty)
|
||||||
(atomic_rmw flags (AtomicRmwOp.Umax) addr src))))
|
(atomic_rmw flags (AtomicRmwOp.Umax) addr src))))
|
||||||
(lse_atomic_rmw (AtomicRMWOp.Umax) addr src ty))
|
(lse_atomic_rmw (AtomicRMWOp.Umax) addr src ty))
|
||||||
(rule (lower (and (use_lse)
|
(rule 1 (lower (and (use_lse)
|
||||||
(has_type (valid_atomic_transaction ty)
|
(has_type (valid_atomic_transaction ty)
|
||||||
(atomic_rmw flags (AtomicRmwOp.Umin) addr src))))
|
(atomic_rmw flags (AtomicRmwOp.Umin) addr src))))
|
||||||
(lse_atomic_rmw (AtomicRMWOp.Umin) addr src ty))
|
(lse_atomic_rmw (AtomicRMWOp.Umin) addr src ty))
|
||||||
(rule (lower (and (use_lse)
|
(rule 1 (lower (and (use_lse)
|
||||||
(has_type (valid_atomic_transaction ty)
|
(has_type (valid_atomic_transaction ty)
|
||||||
(atomic_rmw flags (AtomicRmwOp.Sub) addr src))))
|
(atomic_rmw flags (AtomicRmwOp.Sub) addr src))))
|
||||||
(lse_atomic_rmw (AtomicRMWOp.Add) addr (sub ty (zero_reg) src) ty))
|
(lse_atomic_rmw (AtomicRMWOp.Add) addr (sub ty (zero_reg) src) ty))
|
||||||
(rule (lower (and (use_lse)
|
(rule 1 (lower (and (use_lse)
|
||||||
(has_type (valid_atomic_transaction ty)
|
(has_type (valid_atomic_transaction ty)
|
||||||
(atomic_rmw flags (AtomicRmwOp.And) addr src))))
|
(atomic_rmw flags (AtomicRmwOp.And) addr src))))
|
||||||
(lse_atomic_rmw (AtomicRMWOp.Clr) addr (eon ty src (zero_reg)) ty))
|
(lse_atomic_rmw (AtomicRMWOp.Clr) addr (eon ty src (zero_reg)) ty))
|
||||||
@@ -1296,3 +1296,13 @@
|
|||||||
(rule (lower (has_type (valid_atomic_transaction ty)
|
(rule (lower (has_type (valid_atomic_transaction ty)
|
||||||
(atomic_rmw flags (AtomicRmwOp.Xchg) addr src)))
|
(atomic_rmw flags (AtomicRmwOp.Xchg) addr src)))
|
||||||
(atomic_rmw_loop (AtomicRMWLoopOp.Xchg) addr src ty))
|
(atomic_rmw_loop (AtomicRMWLoopOp.Xchg) addr src ty))
|
||||||
|
|
||||||
|
;;;; Rules for `AtomicCAS` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
(rule 1 (lower (and (use_lse)
|
||||||
|
(has_type (valid_atomic_transaction ty)
|
||||||
|
(atomic_cas flags addr src1 src2))))
|
||||||
|
(lse_atomic_cas addr src1 src2 ty))
|
||||||
|
|
||||||
|
(rule (lower (and (has_type (valid_atomic_transaction ty)
|
||||||
|
(atomic_cas flags addr src1 src2))))
|
||||||
|
(atomic_cas_loop addr src1 src2 ty))
|
||||||
|
|||||||
@@ -239,50 +239,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
|||||||
|
|
||||||
Opcode::AtomicRmw => implemented_in_isle(ctx),
|
Opcode::AtomicRmw => implemented_in_isle(ctx),
|
||||||
|
|
||||||
Opcode::AtomicCas => {
|
Opcode::AtomicCas => implemented_in_isle(ctx),
|
||||||
let r_dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
|
|
||||||
let mut r_addr = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
|
|
||||||
let mut r_expected = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None);
|
|
||||||
let mut r_replacement = put_input_in_reg(ctx, inputs[2], NarrowValueMode::None);
|
|
||||||
let ty_access = ty.unwrap();
|
|
||||||
assert!(is_valid_atomic_transaction_ty(ty_access));
|
|
||||||
|
|
||||||
if isa_flags.use_lse() {
|
|
||||||
ctx.emit(Inst::gen_move(r_dst, r_expected, ty_access));
|
|
||||||
ctx.emit(Inst::AtomicCAS {
|
|
||||||
rs: r_dst,
|
|
||||||
rt: r_replacement,
|
|
||||||
rn: r_addr,
|
|
||||||
ty: ty_access,
|
|
||||||
});
|
|
||||||
} else {
|
|
||||||
// This is very similar to, but not identical to, the AtomicRmw case. Note
|
|
||||||
// that the AtomicCASLoop sequence does its own masking, so we don't need to worry
|
|
||||||
// about zero-extending narrow (I8/I16/I32) values here.
|
|
||||||
// Make sure that all three args are in virtual regs. See corresponding comment
|
|
||||||
// for `Opcode::AtomicRmw` above.
|
|
||||||
r_addr = ctx.ensure_in_vreg(r_addr, I64);
|
|
||||||
r_expected = ctx.ensure_in_vreg(r_expected, I64);
|
|
||||||
r_replacement = ctx.ensure_in_vreg(r_replacement, I64);
|
|
||||||
// Move the args to the preordained AtomicCASLoop input regs
|
|
||||||
ctx.emit(Inst::gen_move(Writable::from_reg(xreg(25)), r_addr, I64));
|
|
||||||
ctx.emit(Inst::gen_move(
|
|
||||||
Writable::from_reg(xreg(26)),
|
|
||||||
r_expected,
|
|
||||||
I64,
|
|
||||||
));
|
|
||||||
ctx.emit(Inst::gen_move(
|
|
||||||
Writable::from_reg(xreg(28)),
|
|
||||||
r_replacement,
|
|
||||||
I64,
|
|
||||||
));
|
|
||||||
// Now the AtomicCASLoop itself, implemented in the normal way, with an LL-SC loop
|
|
||||||
ctx.emit(Inst::AtomicCASLoop { ty: ty_access });
|
|
||||||
// And finally, copy the preordained AtomicCASLoop output reg to its destination.
|
|
||||||
ctx.emit(Inst::gen_move(r_dst, xreg(27), I64));
|
|
||||||
// Also, x24 and x28 are trashed. `fn aarch64_get_regs` must mention that.
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
Opcode::AtomicLoad => {
|
Opcode::AtomicLoad => {
|
||||||
let rt = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
|
let rt = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
|
||||||
|
|||||||
Reference in New Issue
Block a user