Cranelift AArch64: Fix the atomic memory operations (#4831)

Previously the implementations of the various atomic memory IR operations
ignored the memory operation flags that were passed.

Copyright (c) 2022, Arm Limited.

Co-authored-by: Chris Fallin <chris@cfallin.org>
This commit is contained in:
Anton Kirilov
2022-09-02 17:35:21 +01:00
committed by GitHub
parent d2e19b8d74
commit 48bf078c83
6 changed files with 194 additions and 99 deletions

View File

@@ -251,6 +251,7 @@
(AtomicRMWLoop
(ty Type) ;; I8, I16, I32 or I64
(op AtomicRMWLoopOp)
(flags MemFlags)
(addr Reg)
(operand Reg)
(oldval WritableReg)
@@ -268,6 +269,7 @@
;; x24 (wr) scratch reg; value afterwards has no meaning
(AtomicCASLoop
(ty Type) ;; I8, I16, I32 or I64
(flags MemFlags)
(addr Reg)
(expected Reg)
(replacement Reg)
@@ -282,7 +284,8 @@
(rs Reg)
(rt WritableReg)
(rn Reg)
(ty Type))
(ty Type)
(flags MemFlags))
;; An atomic compare-and-swap operation. These instructions require the
;; Large System Extension (LSE) ISA support (FEAT_LSE). The instructions have
@@ -294,7 +297,8 @@
(rs Reg)
(rt Reg)
(rn Reg)
(ty Type))
(ty Type)
(flags MemFlags))
;; Read `access_ty` bits from address `rt`, either 8, 16, 32 or 64-bits, and put
;; it in `rn`, optionally zero-extending to fill a word or double word result.
@@ -302,14 +306,16 @@
(LoadAcquire
(access_ty Type) ;; I8, I16, I32 or I64
(rt WritableReg)
(rn Reg))
(rn Reg)
(flags MemFlags))
;; Write the lowest `ty` bits of `rt` to address `rn`.
;; This instruction is sequentially consistent.
(StoreRelease
(access_ty Type) ;; I8, I16, I32 or I64
(rt Reg)
(rn Reg))
(rn Reg)
(flags MemFlags))
;; A memory fence. This must provide ordering to ensure that, at a minimum, neither loads
;; nor stores may move forwards or backwards across the fence. Currently emitted as "dmb
@@ -2124,16 +2130,16 @@
dst))
;; Helper for emitting `MInst.LoadAcquire` instructions.
(decl load_acquire (Type Reg) Reg)
(rule (load_acquire ty addr)
(decl load_acquire (Type MemFlags Reg) Reg)
(rule (load_acquire ty flags addr)
(let ((dst WritableReg (temp_writable_reg $I64))
(_ Unit (emit (MInst.LoadAcquire ty dst addr))))
(_ Unit (emit (MInst.LoadAcquire ty dst addr flags))))
dst))
;; Helper for emitting `MInst.StoreRelease` instructions.
(decl store_release (Type Reg Reg) SideEffectNoResult)
(rule (store_release ty src addr)
(SideEffectNoResult.Inst (MInst.StoreRelease ty src addr)))
(decl store_release (Type MemFlags Reg Reg) SideEffectNoResult)
(rule (store_release ty flags src addr)
(SideEffectNoResult.Inst (MInst.StoreRelease ty src addr flags)))
;; Helper for generating a `tst` instruction.
;;
@@ -2694,21 +2700,10 @@
)
x))
;; An atomic load that can be sunk into another operation.
(type SinkableAtomicLoad extern (enum))
;; Extract a `SinkableAtomicLoad` that works with `Reg` from a value
;; operand.
(decl sinkable_atomic_load (SinkableAtomicLoad) Value)
(extern extractor sinkable_atomic_load sinkable_atomic_load)
;; Sink a `SinkableAtomicLoad` into a `Reg`.
;;
;; This is a side-effectful operation that notifies the context that the
;; instruction that produced the `SinkableAtomicLoad` has been sunk into another
;; instruction, and no longer needs to be lowered.
(decl sink_atomic_load (SinkableAtomicLoad) Reg)
(extern constructor sink_atomic_load sink_atomic_load)
(decl sink_atomic_load (Inst) Reg)
(rule (sink_atomic_load x @ (atomic_load _ addr))
(let ((_ Unit (sink_inst x)))
(put_in_reg addr)))
;; Helper for generating either an `AluRRR`, `AluRRRShift`, or `AluRRImmLogic`
;; instruction depending on the input. Note that this requires that the `ALUOp`
@@ -2890,21 +2885,21 @@
(vec_misc (VecMisc2.Cmeq0) rn size))
;; Helper for emitting `MInst.AtomicRMW` instructions.
(decl lse_atomic_rmw (AtomicRMWOp Value Reg Type) Reg)
(rule (lse_atomic_rmw op p r_arg2 ty)
(decl lse_atomic_rmw (AtomicRMWOp Value Reg Type MemFlags) Reg)
(rule (lse_atomic_rmw op p r_arg2 ty flags)
(let (
(r_addr Reg p)
(dst WritableReg (temp_writable_reg ty))
(_ Unit (emit (MInst.AtomicRMW op r_arg2 dst r_addr ty)))
(_ Unit (emit (MInst.AtomicRMW op r_arg2 dst r_addr ty flags)))
)
dst))
;; Helper for emitting `MInst.AtomicCAS` instructions.
(decl lse_atomic_cas (Reg Reg Reg Type) Reg)
(rule (lse_atomic_cas addr expect replace ty)
(decl lse_atomic_cas (Reg Reg Reg Type MemFlags) Reg)
(rule (lse_atomic_cas addr expect replace ty flags)
(let (
(dst WritableReg (temp_writable_reg ty))
(_ Unit (emit (MInst.AtomicCAS dst expect replace addr ty)))
(_ Unit (emit (MInst.AtomicCAS dst expect replace addr ty flags)))
)
dst))
@@ -2914,12 +2909,12 @@
;; regs, and that's not guaranteed safe if either is in a real reg.
;; - Move the args to the preordained AtomicRMW input regs
;; - And finally, copy the preordained AtomicRMW output reg to its destination.
(decl atomic_rmw_loop (AtomicRMWLoopOp Reg Reg Type) Reg)
(rule (atomic_rmw_loop op addr operand ty)
(decl atomic_rmw_loop (AtomicRMWLoopOp Reg Reg Type MemFlags) Reg)
(rule (atomic_rmw_loop op addr operand ty flags)
(let ((dst WritableReg (temp_writable_reg $I64))
(scratch1 WritableReg (temp_writable_reg $I64))
(scratch2 WritableReg (temp_writable_reg $I64))
(_ Unit (emit (MInst.AtomicRMWLoop ty op addr operand dst scratch1 scratch2))))
(_ Unit (emit (MInst.AtomicRMWLoop ty op flags addr operand dst scratch1 scratch2))))
dst))
;; Helper for emitting `MInst.AtomicCASLoop` instructions.
@@ -2928,11 +2923,11 @@
;; about zero-extending narrow (I8/I16/I32) values here.
;; Make sure that all three args are in virtual regs. See corresponding comment
;; for `atomic_rmw_loop` above.
(decl atomic_cas_loop (Reg Reg Reg Type) Reg)
(rule (atomic_cas_loop addr expect replace ty)
(decl atomic_cas_loop (Reg Reg Reg Type MemFlags) Reg)
(rule (atomic_cas_loop addr expect replace ty flags)
(let ((dst WritableReg (temp_writable_reg $I64))
(scratch WritableReg (temp_writable_reg $I64))
(_ Unit (emit (MInst.AtomicCASLoop ty addr expect replace dst scratch))))
(_ Unit (emit (MInst.AtomicCASLoop ty flags addr expect replace dst scratch))))
dst))
;; Helper for emitting `MInst.MovPReg` instructions.

View File

@@ -1424,13 +1424,26 @@ impl MachInstEmit for Inst {
let rn = allocs.next(rn);
sink.put4(enc_ccmp_imm(size, rn, imm, nzcv, cond));
}
&Inst::AtomicRMW { ty, op, rs, rt, rn } => {
&Inst::AtomicRMW {
ty,
op,
rs,
rt,
rn,
flags,
} => {
let rs = allocs.next(rs);
let rt = allocs.next_writable(rt);
let rn = allocs.next(rn);
let srcloc = state.cur_srcloc();
if !srcloc.is_default() && !flags.notrap() {
sink.add_trap(TrapCode::HeapOutOfBounds);
}
sink.put4(enc_acq_rel(ty, op, rs, rt, rn));
}
&Inst::AtomicRMWLoop { ty, op, .. } => {
&Inst::AtomicRMWLoop { ty, op, flags, .. } => {
/* Emit this:
again:
ldaxr{,b,h} x/w27, [x25]
@@ -1463,10 +1476,12 @@ impl MachInstEmit for Inst {
// again:
sink.bind_label(again_label);
let srcloc = state.cur_srcloc();
if !srcloc.is_default() {
if !srcloc.is_default() && !flags.notrap() {
sink.add_trap(TrapCode::HeapOutOfBounds);
}
sink.put4(enc_ldaxr(ty, x27wr, x25)); // ldaxr x27, [x25]
let size = OperandSize::from_ty(ty);
let sign_ext = match op {
@@ -1588,7 +1603,7 @@ impl MachInstEmit for Inst {
}
let srcloc = state.cur_srcloc();
if !srcloc.is_default() {
if !srcloc.is_default() && !flags.notrap() {
sink.add_trap(TrapCode::HeapOutOfBounds);
}
if op == AtomicRMWLoopOp::Xchg {
@@ -1608,7 +1623,14 @@ impl MachInstEmit for Inst {
));
sink.use_label_at_offset(br_offset, again_label, LabelUse::Branch19);
}
&Inst::AtomicCAS { rd, rs, rt, rn, ty } => {
&Inst::AtomicCAS {
rd,
rs,
rt,
rn,
ty,
flags,
} => {
let rd = allocs.next_writable(rd);
let rs = allocs.next(rs);
debug_assert_eq!(rd.to_reg(), rs);
@@ -1622,9 +1644,14 @@ impl MachInstEmit for Inst {
_ => panic!("Unsupported type: {}", ty),
};
let srcloc = state.cur_srcloc();
if !srcloc.is_default() && !flags.notrap() {
sink.add_trap(TrapCode::HeapOutOfBounds);
}
sink.put4(enc_cas(size, rd, rt, rn));
}
&Inst::AtomicCASLoop { ty, .. } => {
&Inst::AtomicCASLoop { ty, flags, .. } => {
/* Emit this:
again:
ldaxr{,b,h} x/w27, [x25]
@@ -1651,10 +1678,12 @@ impl MachInstEmit for Inst {
// again:
sink.bind_label(again_label);
let srcloc = state.cur_srcloc();
if !srcloc.is_default() {
if !srcloc.is_default() && !flags.notrap() {
sink.add_trap(TrapCode::HeapOutOfBounds);
}
// ldaxr x27, [x25]
sink.put4(enc_ldaxr(ty, x27wr, x25));
@@ -1679,9 +1708,10 @@ impl MachInstEmit for Inst {
sink.use_label_at_offset(br_out_offset, out_label, LabelUse::Branch19);
let srcloc = state.cur_srcloc();
if !srcloc.is_default() {
if !srcloc.is_default() && !flags.notrap() {
sink.add_trap(TrapCode::HeapOutOfBounds);
}
sink.put4(enc_stlxr(ty, x24wr, x28, x25)); // stlxr w24, x28, [x25]
// cbnz w24, again.
@@ -1698,14 +1728,36 @@ impl MachInstEmit for Inst {
// out:
sink.bind_label(out_label);
}
&Inst::LoadAcquire { access_ty, rt, rn } => {
&Inst::LoadAcquire {
access_ty,
rt,
rn,
flags,
} => {
let rn = allocs.next(rn);
let rt = allocs.next_writable(rt);
let srcloc = state.cur_srcloc();
if !srcloc.is_default() && !flags.notrap() {
sink.add_trap(TrapCode::HeapOutOfBounds);
}
sink.put4(enc_ldar(access_ty, rt, rn));
}
&Inst::StoreRelease { access_ty, rt, rn } => {
&Inst::StoreRelease {
access_ty,
rt,
rn,
flags,
} => {
let rn = allocs.next(rn);
let rt = allocs.next(rt);
let srcloc = state.cur_srcloc();
if !srcloc.is_default() && !flags.notrap() {
sink.add_trap(TrapCode::HeapOutOfBounds);
}
sink.put4(enc_stlr(access_ty, rt, rn));
}
&Inst::Fence {} => {

View File

@@ -6926,6 +6926,7 @@ fn test_aarch64_binemit() {
Inst::AtomicRMWLoop {
ty: I8,
op: AtomicRMWLoopOp::Sub,
flags: MemFlags::trusted(),
addr: xreg(25),
operand: xreg(26),
oldval: writable_xreg(27),
@@ -6939,6 +6940,7 @@ fn test_aarch64_binemit() {
Inst::AtomicRMWLoop {
ty: I16,
op: AtomicRMWLoopOp::Eor,
flags: MemFlags::trusted(),
addr: xreg(25),
operand: xreg(26),
oldval: writable_xreg(27),
@@ -6952,6 +6954,7 @@ fn test_aarch64_binemit() {
Inst::AtomicRMWLoop {
ty: I8,
op: AtomicRMWLoopOp::Add,
flags: MemFlags::trusted(),
addr: xreg(25),
operand: xreg(26),
oldval: writable_xreg(27),
@@ -6965,6 +6968,7 @@ fn test_aarch64_binemit() {
Inst::AtomicRMWLoop {
ty: I32,
op: AtomicRMWLoopOp::Orr,
flags: MemFlags::trusted(),
addr: xreg(25),
operand: xreg(26),
oldval: writable_xreg(27),
@@ -6978,6 +6982,7 @@ fn test_aarch64_binemit() {
Inst::AtomicRMWLoop {
ty: I64,
op: AtomicRMWLoopOp::And,
flags: MemFlags::trusted(),
addr: xreg(25),
operand: xreg(26),
oldval: writable_xreg(27),
@@ -6991,6 +6996,7 @@ fn test_aarch64_binemit() {
Inst::AtomicRMWLoop {
ty: I8,
op: AtomicRMWLoopOp::Xchg,
flags: MemFlags::trusted(),
addr: xreg(25),
operand: xreg(26),
oldval: writable_xreg(27),
@@ -7004,6 +7010,7 @@ fn test_aarch64_binemit() {
Inst::AtomicRMWLoop {
ty: I16,
op: AtomicRMWLoopOp::Nand,
flags: MemFlags::trusted(),
addr: xreg(25),
operand: xreg(26),
oldval: writable_xreg(27),
@@ -7017,6 +7024,7 @@ fn test_aarch64_binemit() {
Inst::AtomicRMWLoop {
ty: I16,
op: AtomicRMWLoopOp::Smin,
flags: MemFlags::trusted(),
addr: xreg(25),
operand: xreg(26),
oldval: writable_xreg(27),
@@ -7030,6 +7038,7 @@ fn test_aarch64_binemit() {
Inst::AtomicRMWLoop {
ty: I32,
op: AtomicRMWLoopOp::Smin,
flags: MemFlags::trusted(),
addr: xreg(25),
operand: xreg(26),
oldval: writable_xreg(27),
@@ -7043,6 +7052,7 @@ fn test_aarch64_binemit() {
Inst::AtomicRMWLoop {
ty: I64,
op: AtomicRMWLoopOp::Smax,
flags: MemFlags::trusted(),
addr: xreg(25),
operand: xreg(26),
oldval: writable_xreg(27),
@@ -7056,6 +7066,7 @@ fn test_aarch64_binemit() {
Inst::AtomicRMWLoop {
ty: I8,
op: AtomicRMWLoopOp::Smax,
flags: MemFlags::trusted(),
addr: xreg(25),
operand: xreg(26),
oldval: writable_xreg(27),
@@ -7069,6 +7080,7 @@ fn test_aarch64_binemit() {
Inst::AtomicRMWLoop {
ty: I8,
op: AtomicRMWLoopOp::Umin,
flags: MemFlags::trusted(),
addr: xreg(25),
operand: xreg(26),
oldval: writable_xreg(27),
@@ -7082,6 +7094,7 @@ fn test_aarch64_binemit() {
Inst::AtomicRMWLoop {
ty: I16,
op: AtomicRMWLoopOp::Umax,
flags: MemFlags::trusted(),
addr: xreg(25),
operand: xreg(26),
oldval: writable_xreg(27),
@@ -7099,6 +7112,7 @@ fn test_aarch64_binemit() {
rs: xreg(1),
rt: writable_xreg(2),
rn: xreg(3),
flags: MemFlags::trusted(),
},
"6200E138",
"ldaddalb w1, w2, [x3]",
@@ -7110,6 +7124,7 @@ fn test_aarch64_binemit() {
rs: xreg(4),
rt: writable_xreg(5),
rn: xreg(6),
flags: MemFlags::trusted(),
},
"C500E478",
"ldaddalh w4, w5, [x6]",
@@ -7121,6 +7136,7 @@ fn test_aarch64_binemit() {
rs: xreg(7),
rt: writable_xreg(8),
rn: xreg(9),
flags: MemFlags::trusted(),
},
"2801E7B8",
"ldaddal w7, w8, [x9]",
@@ -7132,6 +7148,7 @@ fn test_aarch64_binemit() {
rs: xreg(10),
rt: writable_xreg(11),
rn: xreg(12),
flags: MemFlags::trusted(),
},
"8B01EAF8",
"ldaddal x10, x11, [x12]",
@@ -7143,6 +7160,7 @@ fn test_aarch64_binemit() {
rs: xreg(13),
rt: writable_xreg(14),
rn: xreg(15),
flags: MemFlags::trusted(),
},
"EE11ED38",
"ldclralb w13, w14, [x15]",
@@ -7154,6 +7172,7 @@ fn test_aarch64_binemit() {
rs: xreg(16),
rt: writable_xreg(17),
rn: xreg(18),
flags: MemFlags::trusted(),
},
"5112F078",
"ldclralh w16, w17, [x18]",
@@ -7165,6 +7184,7 @@ fn test_aarch64_binemit() {
rs: xreg(19),
rt: writable_xreg(20),
rn: xreg(21),
flags: MemFlags::trusted(),
},
"B412F3B8",
"ldclral w19, w20, [x21]",
@@ -7176,6 +7196,7 @@ fn test_aarch64_binemit() {
rs: xreg(22),
rt: writable_xreg(23),
rn: xreg(24),
flags: MemFlags::trusted(),
},
"1713F6F8",
"ldclral x22, x23, [x24]",
@@ -7187,6 +7208,7 @@ fn test_aarch64_binemit() {
rs: xreg(25),
rt: writable_xreg(26),
rn: xreg(27),
flags: MemFlags::trusted(),
},
"7A23F938",
"ldeoralb w25, w26, [x27]",
@@ -7198,6 +7220,7 @@ fn test_aarch64_binemit() {
rs: xreg(28),
rt: writable_xreg(29),
rn: xreg(30),
flags: MemFlags::trusted(),
},
"DD23FC78",
"ldeoralh w28, fp, [lr]",
@@ -7209,6 +7232,7 @@ fn test_aarch64_binemit() {
rs: xreg(29),
rt: writable_xreg(28),
rn: xreg(27),
flags: MemFlags::trusted(),
},
"7C23FDB8",
"ldeoral fp, w28, [x27]",
@@ -7220,6 +7244,7 @@ fn test_aarch64_binemit() {
rs: xreg(26),
rt: writable_xreg(25),
rn: xreg(24),
flags: MemFlags::trusted(),
},
"1923FAF8",
"ldeoral x26, x25, [x24]",
@@ -7231,6 +7256,7 @@ fn test_aarch64_binemit() {
rs: xreg(23),
rt: writable_xreg(22),
rn: xreg(21),
flags: MemFlags::trusted(),
},
"B632F738",
"ldsetalb w23, w22, [x21]",
@@ -7242,6 +7268,7 @@ fn test_aarch64_binemit() {
rs: xreg(20),
rt: writable_xreg(19),
rn: xreg(18),
flags: MemFlags::trusted(),
},
"5332F478",
"ldsetalh w20, w19, [x18]",
@@ -7253,6 +7280,7 @@ fn test_aarch64_binemit() {
rs: xreg(17),
rt: writable_xreg(16),
rn: xreg(15),
flags: MemFlags::trusted(),
},
"F031F1B8",
"ldsetal w17, w16, [x15]",
@@ -7264,6 +7292,7 @@ fn test_aarch64_binemit() {
rs: xreg(14),
rt: writable_xreg(13),
rn: xreg(12),
flags: MemFlags::trusted(),
},
"8D31EEF8",
"ldsetal x14, x13, [x12]",
@@ -7275,6 +7304,7 @@ fn test_aarch64_binemit() {
rs: xreg(11),
rt: writable_xreg(10),
rn: xreg(9),
flags: MemFlags::trusted(),
},
"2A41EB38",
"ldsmaxalb w11, w10, [x9]",
@@ -7286,6 +7316,7 @@ fn test_aarch64_binemit() {
rs: xreg(8),
rt: writable_xreg(7),
rn: xreg(6),
flags: MemFlags::trusted(),
},
"C740E878",
"ldsmaxalh w8, w7, [x6]",
@@ -7297,6 +7328,7 @@ fn test_aarch64_binemit() {
rs: xreg(5),
rt: writable_xreg(4),
rn: xreg(3),
flags: MemFlags::trusted(),
},
"6440E5B8",
"ldsmaxal w5, w4, [x3]",
@@ -7308,6 +7340,7 @@ fn test_aarch64_binemit() {
rs: xreg(2),
rt: writable_xreg(1),
rn: xreg(0),
flags: MemFlags::trusted(),
},
"0140E2F8",
"ldsmaxal x2, x1, [x0]",
@@ -7319,6 +7352,7 @@ fn test_aarch64_binemit() {
rs: xreg(1),
rt: writable_xreg(2),
rn: xreg(3),
flags: MemFlags::trusted(),
},
"6250E138",
"ldsminalb w1, w2, [x3]",
@@ -7330,6 +7364,7 @@ fn test_aarch64_binemit() {
rs: xreg(4),
rt: writable_xreg(5),
rn: xreg(6),
flags: MemFlags::trusted(),
},
"C550E478",
"ldsminalh w4, w5, [x6]",
@@ -7341,6 +7376,7 @@ fn test_aarch64_binemit() {
rs: xreg(7),
rt: writable_xreg(8),
rn: xreg(9),
flags: MemFlags::trusted(),
},
"2851E7B8",
"ldsminal w7, w8, [x9]",
@@ -7352,6 +7388,7 @@ fn test_aarch64_binemit() {
rs: xreg(10),
rt: writable_xreg(11),
rn: xreg(12),
flags: MemFlags::trusted(),
},
"8B51EAF8",
"ldsminal x10, x11, [x12]",
@@ -7363,6 +7400,7 @@ fn test_aarch64_binemit() {
rs: xreg(13),
rt: writable_xreg(14),
rn: xreg(15),
flags: MemFlags::trusted(),
},
"EE61ED38",
"ldumaxalb w13, w14, [x15]",
@@ -7374,6 +7412,7 @@ fn test_aarch64_binemit() {
rs: xreg(16),
rt: writable_xreg(17),
rn: xreg(18),
flags: MemFlags::trusted(),
},
"5162F078",
"ldumaxalh w16, w17, [x18]",
@@ -7385,6 +7424,7 @@ fn test_aarch64_binemit() {
rs: xreg(19),
rt: writable_xreg(20),
rn: xreg(21),
flags: MemFlags::trusted(),
},
"B462F3B8",
"ldumaxal w19, w20, [x21]",
@@ -7396,6 +7436,7 @@ fn test_aarch64_binemit() {
rs: xreg(22),
rt: writable_xreg(23),
rn: xreg(24),
flags: MemFlags::trusted(),
},
"1763F6F8",
"ldumaxal x22, x23, [x24]",
@@ -7407,6 +7448,7 @@ fn test_aarch64_binemit() {
rs: xreg(16),
rt: writable_xreg(17),
rn: xreg(18),
flags: MemFlags::trusted(),
},
"5172F038",
"lduminalb w16, w17, [x18]",
@@ -7418,6 +7460,7 @@ fn test_aarch64_binemit() {
rs: xreg(19),
rt: writable_xreg(20),
rn: xreg(21),
flags: MemFlags::trusted(),
},
"B472F378",
"lduminalh w19, w20, [x21]",
@@ -7429,6 +7472,7 @@ fn test_aarch64_binemit() {
rs: xreg(22),
rt: writable_xreg(23),
rn: xreg(24),
flags: MemFlags::trusted(),
},
"1773F6B8",
"lduminal w22, w23, [x24]",
@@ -7440,6 +7484,7 @@ fn test_aarch64_binemit() {
rs: xreg(25),
rt: writable_xreg(26),
rn: xreg(27),
flags: MemFlags::trusted(),
},
"7A73F9F8",
"lduminal x25, x26, [x27]",
@@ -7451,6 +7496,7 @@ fn test_aarch64_binemit() {
rs: xreg(28),
rt: writable_xreg(29),
rn: xreg(30),
flags: MemFlags::trusted(),
},
"DD83FC38",
"swpalb w28, fp, [lr]",
@@ -7462,6 +7508,7 @@ fn test_aarch64_binemit() {
rs: xreg(0),
rt: writable_xreg(1),
rn: xreg(2),
flags: MemFlags::trusted(),
},
"4180E078",
"swpalh w0, w1, [x2]",
@@ -7473,6 +7520,7 @@ fn test_aarch64_binemit() {
rs: xreg(3),
rt: writable_xreg(4),
rn: xreg(5),
flags: MemFlags::trusted(),
},
"A480E3B8",
"swpal w3, w4, [x5]",
@@ -7484,6 +7532,7 @@ fn test_aarch64_binemit() {
rs: xreg(6),
rt: writable_xreg(7),
rn: xreg(8),
flags: MemFlags::trusted(),
},
"0781E6F8",
"swpal x6, x7, [x8]",
@@ -7496,6 +7545,7 @@ fn test_aarch64_binemit() {
rt: xreg(20),
rn: xreg(10),
ty: I8,
flags: MemFlags::trusted(),
},
"54FDFC08",
"casalb w28, w28, w20, [x10]",
@@ -7507,6 +7557,7 @@ fn test_aarch64_binemit() {
rt: xreg(19),
rn: xreg(23),
ty: I16,
flags: MemFlags::trusted(),
},
"F3FEE248",
"casalh w2, w2, w19, [x23]",
@@ -7518,6 +7569,7 @@ fn test_aarch64_binemit() {
rt: zero_reg(),
rn: stack_reg(),
ty: I32,
flags: MemFlags::trusted(),
},
"FFFFE088",
"casal w0, w0, wzr, [sp]",
@@ -7529,6 +7581,7 @@ fn test_aarch64_binemit() {
rt: xreg(15),
rn: xreg(27),
ty: I64,
flags: MemFlags::trusted(),
},
"6FFFE7C8",
"casal x7, x7, x15, [x27]",
@@ -7536,6 +7589,7 @@ fn test_aarch64_binemit() {
insns.push((
Inst::AtomicCASLoop {
ty: I8,
flags: MemFlags::trusted(),
addr: xreg(25),
expected: xreg(26),
replacement: xreg(28),
@@ -7549,6 +7603,7 @@ fn test_aarch64_binemit() {
insns.push((
Inst::AtomicCASLoop {
ty: I16,
flags: MemFlags::trusted(),
addr: xreg(25),
expected: xreg(26),
replacement: xreg(28),
@@ -7562,6 +7617,7 @@ fn test_aarch64_binemit() {
insns.push((
Inst::AtomicCASLoop {
ty: I32,
flags: MemFlags::trusted(),
addr: xreg(25),
expected: xreg(26),
replacement: xreg(28),
@@ -7575,6 +7631,7 @@ fn test_aarch64_binemit() {
insns.push((
Inst::AtomicCASLoop {
ty: I64,
flags: MemFlags::trusted(),
addr: xreg(25),
expected: xreg(26),
replacement: xreg(28),
@@ -7590,6 +7647,7 @@ fn test_aarch64_binemit() {
access_ty: I8,
rt: writable_xreg(7),
rn: xreg(28),
flags: MemFlags::trusted(),
},
"87FFDF08",
"ldarb w7, [x28]",
@@ -7600,6 +7658,7 @@ fn test_aarch64_binemit() {
access_ty: I16,
rt: writable_xreg(2),
rn: xreg(3),
flags: MemFlags::trusted(),
},
"62FCDF48",
"ldarh w2, [x3]",
@@ -7610,6 +7669,7 @@ fn test_aarch64_binemit() {
access_ty: I32,
rt: writable_xreg(15),
rn: xreg(0),
flags: MemFlags::trusted(),
},
"0FFCDF88",
"ldar w15, [x0]",
@@ -7620,6 +7680,7 @@ fn test_aarch64_binemit() {
access_ty: I64,
rt: writable_xreg(28),
rn: xreg(7),
flags: MemFlags::trusted(),
},
"FCFCDFC8",
"ldar x28, [x7]",
@@ -7630,6 +7691,7 @@ fn test_aarch64_binemit() {
access_ty: I8,
rt: xreg(7),
rn: xreg(28),
flags: MemFlags::trusted(),
},
"87FF9F08",
"stlrb w7, [x28]",
@@ -7640,6 +7702,7 @@ fn test_aarch64_binemit() {
access_ty: I16,
rt: xreg(2),
rn: xreg(3),
flags: MemFlags::trusted(),
},
"62FC9F48",
"stlrh w2, [x3]",
@@ -7650,6 +7713,7 @@ fn test_aarch64_binemit() {
access_ty: I32,
rt: xreg(15),
rn: xreg(0),
flags: MemFlags::trusted(),
},
"0FFC9F88",
"stlr w15, [x0]",
@@ -7660,6 +7724,7 @@ fn test_aarch64_binemit() {
access_ty: I64,
rt: xreg(28),
rn: xreg(7),
flags: MemFlags::trusted(),
},
"FCFC9FC8",
"stlr x28, [x7]",

View File

@@ -1604,7 +1604,9 @@ impl Inst {
let cond = cond.pretty_print(0, allocs);
format!("ccmp {}, {}, {}, {}", rn, imm, nzcv, cond)
}
&Inst::AtomicRMW { rs, rt, rn, ty, op } => {
&Inst::AtomicRMW {
rs, rt, rn, ty, op, ..
} => {
let op = match op {
AtomicRMWOp::Add => "ldaddal",
AtomicRMWOp::Clr => "ldclral",
@@ -1637,6 +1639,7 @@ impl Inst {
oldval,
scratch1,
scratch2,
..
} => {
let op = match op {
AtomicRMWLoopOp::Add => "add",
@@ -1667,7 +1670,9 @@ impl Inst {
scratch2,
)
}
&Inst::AtomicCAS { rd, rs, rt, rn, ty } => {
&Inst::AtomicCAS {
rd, rs, rt, rn, ty, ..
} => {
let op = match ty {
I8 => "casalb",
I16 => "casalh",
@@ -1689,6 +1694,7 @@ impl Inst {
replacement,
oldval,
scratch,
..
} => {
let addr = pretty_print_ireg(addr, OperandSize::Size64, allocs);
let expected = pretty_print_ireg(expected, OperandSize::Size64, allocs);

View File

@@ -926,8 +926,9 @@
;; Atomic loads will also automatically zero their upper bits so the `uextend`
;; instruction can effectively get skipped here.
(rule (lower (has_type (fits_in_64 out)
(uextend (and (value_type in) (sinkable_atomic_load addr)))))
(load_acquire in (sink_atomic_load addr)))
(uextend x @ (and (value_type in) (atomic_load flags _)))))
(if-let mem_op (is_sinkable_inst x))
(load_acquire in flags (sink_atomic_load mem_op)))
;; Conversion to 128-bit needs a zero-extension of the lower bits and the upper
;; bits are all zero.
@@ -1780,98 +1781,98 @@
;;;; Rules for `AtomicLoad` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule (lower (has_type (valid_atomic_transaction ty) (atomic_load flags addr)))
(load_acquire ty addr))
(load_acquire ty flags addr))
;;;; Rules for `AtomicStore` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule (lower (atomic_store flags
src @ (value_type (valid_atomic_transaction ty))
addr))
(side_effect (store_release ty src addr)))
(side_effect (store_release ty flags src addr)))
;;;; Rules for `AtomicRMW` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule 1 (lower (and (use_lse)
(has_type (valid_atomic_transaction ty)
(atomic_rmw flags (AtomicRmwOp.Add) addr src))))
(lse_atomic_rmw (AtomicRMWOp.Add) addr src ty))
(lse_atomic_rmw (AtomicRMWOp.Add) addr src ty flags))
(rule 1 (lower (and (use_lse)
(has_type (valid_atomic_transaction ty)
(atomic_rmw flags (AtomicRmwOp.Xor) addr src))))
(lse_atomic_rmw (AtomicRMWOp.Eor) addr src ty))
(lse_atomic_rmw (AtomicRMWOp.Eor) addr src ty flags))
(rule 1 (lower (and (use_lse)
(has_type (valid_atomic_transaction ty)
(atomic_rmw flags (AtomicRmwOp.Or) addr src))))
(lse_atomic_rmw (AtomicRMWOp.Set) addr src ty))
(lse_atomic_rmw (AtomicRMWOp.Set) addr src ty flags))
(rule 1 (lower (and (use_lse)
(has_type (valid_atomic_transaction ty)
(atomic_rmw flags (AtomicRmwOp.Smax) addr src))))
(lse_atomic_rmw (AtomicRMWOp.Smax) addr src ty))
(lse_atomic_rmw (AtomicRMWOp.Smax) addr src ty flags))
(rule 1 (lower (and (use_lse)
(has_type (valid_atomic_transaction ty)
(atomic_rmw flags (AtomicRmwOp.Smin) addr src))))
(lse_atomic_rmw (AtomicRMWOp.Smin) addr src ty))
(lse_atomic_rmw (AtomicRMWOp.Smin) addr src ty flags))
(rule 1 (lower (and (use_lse)
(has_type (valid_atomic_transaction ty)
(atomic_rmw flags (AtomicRmwOp.Umax) addr src))))
(lse_atomic_rmw (AtomicRMWOp.Umax) addr src ty))
(lse_atomic_rmw (AtomicRMWOp.Umax) addr src ty flags))
(rule 1 (lower (and (use_lse)
(has_type (valid_atomic_transaction ty)
(atomic_rmw flags (AtomicRmwOp.Umin) addr src))))
(lse_atomic_rmw (AtomicRMWOp.Umin) addr src ty))
(lse_atomic_rmw (AtomicRMWOp.Umin) addr src ty flags))
(rule 1 (lower (and (use_lse)
(has_type (valid_atomic_transaction ty)
(atomic_rmw flags (AtomicRmwOp.Sub) addr src))))
(lse_atomic_rmw (AtomicRMWOp.Add) addr (sub ty (zero_reg) src) ty))
(lse_atomic_rmw (AtomicRMWOp.Add) addr (sub ty (zero_reg) src) ty flags))
(rule 1 (lower (and (use_lse)
(has_type (valid_atomic_transaction ty)
(atomic_rmw flags (AtomicRmwOp.And) addr src))))
(lse_atomic_rmw (AtomicRMWOp.Clr) addr (eon ty src (zero_reg)) ty))
(lse_atomic_rmw (AtomicRMWOp.Clr) addr (eon ty src (zero_reg)) ty flags))
(rule (lower (has_type (valid_atomic_transaction ty)
(atomic_rmw flags (AtomicRmwOp.Add) addr src)))
(atomic_rmw_loop (AtomicRMWLoopOp.Add) addr src ty))
(atomic_rmw_loop (AtomicRMWLoopOp.Add) addr src ty flags))
(rule (lower (has_type (valid_atomic_transaction ty)
(atomic_rmw flags (AtomicRmwOp.Sub) addr src)))
(atomic_rmw_loop (AtomicRMWLoopOp.Sub) addr src ty))
(atomic_rmw_loop (AtomicRMWLoopOp.Sub) addr src ty flags))
(rule (lower (has_type (valid_atomic_transaction ty)
(atomic_rmw flags (AtomicRmwOp.And) addr src)))
(atomic_rmw_loop (AtomicRMWLoopOp.And) addr src ty))
(atomic_rmw_loop (AtomicRMWLoopOp.And) addr src ty flags))
(rule (lower (has_type (valid_atomic_transaction ty)
(atomic_rmw flags (AtomicRmwOp.Nand) addr src)))
(atomic_rmw_loop (AtomicRMWLoopOp.Nand) addr src ty))
(atomic_rmw_loop (AtomicRMWLoopOp.Nand) addr src ty flags))
(rule (lower (has_type (valid_atomic_transaction ty)
(atomic_rmw flags (AtomicRmwOp.Or) addr src)))
(atomic_rmw_loop (AtomicRMWLoopOp.Orr) addr src ty))
(atomic_rmw_loop (AtomicRMWLoopOp.Orr) addr src ty flags))
(rule (lower (has_type (valid_atomic_transaction ty)
(atomic_rmw flags (AtomicRmwOp.Xor) addr src)))
(atomic_rmw_loop (AtomicRMWLoopOp.Eor) addr src ty))
(atomic_rmw_loop (AtomicRMWLoopOp.Eor) addr src ty flags))
(rule (lower (has_type (valid_atomic_transaction ty)
(atomic_rmw flags (AtomicRmwOp.Smin) addr src)))
(atomic_rmw_loop (AtomicRMWLoopOp.Smin) addr src ty))
(atomic_rmw_loop (AtomicRMWLoopOp.Smin) addr src ty flags))
(rule (lower (has_type (valid_atomic_transaction ty)
(atomic_rmw flags (AtomicRmwOp.Smax) addr src)))
(atomic_rmw_loop (AtomicRMWLoopOp.Smax) addr src ty))
(atomic_rmw_loop (AtomicRMWLoopOp.Smax) addr src ty flags))
(rule (lower (has_type (valid_atomic_transaction ty)
(atomic_rmw flags (AtomicRmwOp.Umin) addr src)))
(atomic_rmw_loop (AtomicRMWLoopOp.Umin) addr src ty))
(atomic_rmw_loop (AtomicRMWLoopOp.Umin) addr src ty flags))
(rule (lower (has_type (valid_atomic_transaction ty)
(atomic_rmw flags (AtomicRmwOp.Umax) addr src)))
(atomic_rmw_loop (AtomicRMWLoopOp.Umax) addr src ty))
(atomic_rmw_loop (AtomicRMWLoopOp.Umax) addr src ty flags))
(rule (lower (has_type (valid_atomic_transaction ty)
(atomic_rmw flags (AtomicRmwOp.Xchg) addr src)))
(atomic_rmw_loop (AtomicRMWLoopOp.Xchg) addr src ty))
(atomic_rmw_loop (AtomicRMWLoopOp.Xchg) addr src ty flags))
;;;; Rules for `AtomicCAS` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule 1 (lower (and (use_lse)
(has_type (valid_atomic_transaction ty)
(atomic_cas flags addr src1 src2))))
(lse_atomic_cas addr src1 src2 ty))
(lse_atomic_cas addr src1 src2 ty flags))
(rule (lower (and (has_type (valid_atomic_transaction ty)
(atomic_cas flags addr src1 src2))))
(atomic_cas_loop addr src1 src2 ty))
(atomic_cas_loop addr src1 src2 ty flags))
;;;; Rules for 'fvdemote' ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule (lower (fvdemote x))

View File

@@ -68,11 +68,6 @@ pub struct ExtendedValue {
extend: ExtendOp,
}
pub struct SinkableAtomicLoad {
atomic_load: Inst,
atomic_addr: Value,
}
impl IsleContext<'_, '_, MInst, Flags, IsaFlags, 6> {
isle_prelude_method_helpers!(AArch64Caller);
}
@@ -366,25 +361,6 @@ impl Context for IsleContext<'_, '_, MInst, Flags, IsaFlags, 6> {
}
}
fn sinkable_atomic_load(&mut self, val: Value) -> Option<SinkableAtomicLoad> {
let input = self.lower_ctx.get_value_as_source_or_const(val);
if let InputSourceInst::UniqueUse(atomic_load, 0) = input.inst {
if self.lower_ctx.data(atomic_load).opcode() == Opcode::AtomicLoad {
let atomic_addr = self.lower_ctx.input_as_value(atomic_load, 0);
return Some(SinkableAtomicLoad {
atomic_load,
atomic_addr,
});
}
}
None
}
fn sink_atomic_load(&mut self, load: &SinkableAtomicLoad) -> Reg {
self.lower_ctx.sink_inst(load.atomic_load);
self.put_in_reg(load.atomic_addr)
}
fn shift_mask(&mut self, ty: Type) -> ImmLogic {
debug_assert!(ty.lane_bits().is_power_of_two());