From 48bf078c83fa413c75ccb94c577489e1d3e23023 Mon Sep 17 00:00:00 2001 From: Anton Kirilov Date: Fri, 2 Sep 2022 17:35:21 +0100 Subject: [PATCH] Cranelift AArch64: Fix the atomic memory operations (#4831) Previously the implementations of the various atomic memory IR operations ignored the memory operation flags that were passed. Copyright (c) 2022, Arm Limited. Co-authored-by: Chris Fallin --- cranelift/codegen/src/isa/aarch64/inst.isle | 69 +++++++++--------- .../codegen/src/isa/aarch64/inst/emit.rs | 72 ++++++++++++++++--- .../src/isa/aarch64/inst/emit_tests.rs | 65 +++++++++++++++++ cranelift/codegen/src/isa/aarch64/inst/mod.rs | 10 ++- cranelift/codegen/src/isa/aarch64/lower.isle | 53 +++++++------- .../codegen/src/isa/aarch64/lower/isle.rs | 24 ------- 6 files changed, 194 insertions(+), 99 deletions(-) diff --git a/cranelift/codegen/src/isa/aarch64/inst.isle b/cranelift/codegen/src/isa/aarch64/inst.isle index b097b0e84d..7a9e9c0173 100644 --- a/cranelift/codegen/src/isa/aarch64/inst.isle +++ b/cranelift/codegen/src/isa/aarch64/inst.isle @@ -251,6 +251,7 @@ (AtomicRMWLoop (ty Type) ;; I8, I16, I32 or I64 (op AtomicRMWLoopOp) + (flags MemFlags) (addr Reg) (operand Reg) (oldval WritableReg) @@ -268,6 +269,7 @@ ;; x24 (wr) scratch reg; value afterwards has no meaning (AtomicCASLoop (ty Type) ;; I8, I16, I32 or I64 + (flags MemFlags) (addr Reg) (expected Reg) (replacement Reg) @@ -282,7 +284,8 @@ (rs Reg) (rt WritableReg) (rn Reg) - (ty Type)) + (ty Type) + (flags MemFlags)) ;; An atomic compare-and-swap operation. These instructions require the ;; Large System Extension (LSE) ISA support (FEAT_LSE). The instructions have @@ -294,7 +297,8 @@ (rs Reg) (rt Reg) (rn Reg) - (ty Type)) + (ty Type) + (flags MemFlags)) ;; Read `access_ty` bits from address `rt`, either 8, 16, 32 or 64-bits, and put ;; it in `rn`, optionally zero-extending to fill a word or double word result. @@ -302,14 +306,16 @@ (LoadAcquire (access_ty Type) ;; I8, I16, I32 or I64 (rt WritableReg) - (rn Reg)) + (rn Reg) + (flags MemFlags)) ;; Write the lowest `ty` bits of `rt` to address `rn`. ;; This instruction is sequentially consistent. (StoreRelease (access_ty Type) ;; I8, I16, I32 or I64 (rt Reg) - (rn Reg)) + (rn Reg) + (flags MemFlags)) ;; A memory fence. This must provide ordering to ensure that, at a minimum, neither loads ;; nor stores may move forwards or backwards across the fence. Currently emitted as "dmb @@ -2124,16 +2130,16 @@ dst)) ;; Helper for emitting `MInst.LoadAcquire` instructions. -(decl load_acquire (Type Reg) Reg) -(rule (load_acquire ty addr) +(decl load_acquire (Type MemFlags Reg) Reg) +(rule (load_acquire ty flags addr) (let ((dst WritableReg (temp_writable_reg $I64)) - (_ Unit (emit (MInst.LoadAcquire ty dst addr)))) + (_ Unit (emit (MInst.LoadAcquire ty dst addr flags)))) dst)) ;; Helper for emitting `MInst.StoreRelease` instructions. -(decl store_release (Type Reg Reg) SideEffectNoResult) -(rule (store_release ty src addr) - (SideEffectNoResult.Inst (MInst.StoreRelease ty src addr))) +(decl store_release (Type MemFlags Reg Reg) SideEffectNoResult) +(rule (store_release ty flags src addr) + (SideEffectNoResult.Inst (MInst.StoreRelease ty src addr flags))) ;; Helper for generating a `tst` instruction. ;; @@ -2694,21 +2700,10 @@ ) x)) -;; An atomic load that can be sunk into another operation. -(type SinkableAtomicLoad extern (enum)) - -;; Extract a `SinkableAtomicLoad` that works with `Reg` from a value -;; operand. -(decl sinkable_atomic_load (SinkableAtomicLoad) Value) -(extern extractor sinkable_atomic_load sinkable_atomic_load) - -;; Sink a `SinkableAtomicLoad` into a `Reg`. -;; -;; This is a side-effectful operation that notifies the context that the -;; instruction that produced the `SinkableAtomicLoad` has been sunk into another -;; instruction, and no longer needs to be lowered. -(decl sink_atomic_load (SinkableAtomicLoad) Reg) -(extern constructor sink_atomic_load sink_atomic_load) +(decl sink_atomic_load (Inst) Reg) +(rule (sink_atomic_load x @ (atomic_load _ addr)) + (let ((_ Unit (sink_inst x))) + (put_in_reg addr))) ;; Helper for generating either an `AluRRR`, `AluRRRShift`, or `AluRRImmLogic` ;; instruction depending on the input. Note that this requires that the `ALUOp` @@ -2890,21 +2885,21 @@ (vec_misc (VecMisc2.Cmeq0) rn size)) ;; Helper for emitting `MInst.AtomicRMW` instructions. -(decl lse_atomic_rmw (AtomicRMWOp Value Reg Type) Reg) -(rule (lse_atomic_rmw op p r_arg2 ty) +(decl lse_atomic_rmw (AtomicRMWOp Value Reg Type MemFlags) Reg) +(rule (lse_atomic_rmw op p r_arg2 ty flags) (let ( (r_addr Reg p) (dst WritableReg (temp_writable_reg ty)) - (_ Unit (emit (MInst.AtomicRMW op r_arg2 dst r_addr ty))) + (_ Unit (emit (MInst.AtomicRMW op r_arg2 dst r_addr ty flags))) ) dst)) ;; Helper for emitting `MInst.AtomicCAS` instructions. -(decl lse_atomic_cas (Reg Reg Reg Type) Reg) -(rule (lse_atomic_cas addr expect replace ty) +(decl lse_atomic_cas (Reg Reg Reg Type MemFlags) Reg) +(rule (lse_atomic_cas addr expect replace ty flags) (let ( (dst WritableReg (temp_writable_reg ty)) - (_ Unit (emit (MInst.AtomicCAS dst expect replace addr ty))) + (_ Unit (emit (MInst.AtomicCAS dst expect replace addr ty flags))) ) dst)) @@ -2914,12 +2909,12 @@ ;; regs, and that's not guaranteed safe if either is in a real reg. ;; - Move the args to the preordained AtomicRMW input regs ;; - And finally, copy the preordained AtomicRMW output reg to its destination. -(decl atomic_rmw_loop (AtomicRMWLoopOp Reg Reg Type) Reg) -(rule (atomic_rmw_loop op addr operand ty) +(decl atomic_rmw_loop (AtomicRMWLoopOp Reg Reg Type MemFlags) Reg) +(rule (atomic_rmw_loop op addr operand ty flags) (let ((dst WritableReg (temp_writable_reg $I64)) (scratch1 WritableReg (temp_writable_reg $I64)) (scratch2 WritableReg (temp_writable_reg $I64)) - (_ Unit (emit (MInst.AtomicRMWLoop ty op addr operand dst scratch1 scratch2)))) + (_ Unit (emit (MInst.AtomicRMWLoop ty op flags addr operand dst scratch1 scratch2)))) dst)) ;; Helper for emitting `MInst.AtomicCASLoop` instructions. @@ -2928,11 +2923,11 @@ ;; about zero-extending narrow (I8/I16/I32) values here. ;; Make sure that all three args are in virtual regs. See corresponding comment ;; for `atomic_rmw_loop` above. -(decl atomic_cas_loop (Reg Reg Reg Type) Reg) -(rule (atomic_cas_loop addr expect replace ty) +(decl atomic_cas_loop (Reg Reg Reg Type MemFlags) Reg) +(rule (atomic_cas_loop addr expect replace ty flags) (let ((dst WritableReg (temp_writable_reg $I64)) (scratch WritableReg (temp_writable_reg $I64)) - (_ Unit (emit (MInst.AtomicCASLoop ty addr expect replace dst scratch)))) + (_ Unit (emit (MInst.AtomicCASLoop ty flags addr expect replace dst scratch)))) dst)) ;; Helper for emitting `MInst.MovPReg` instructions. diff --git a/cranelift/codegen/src/isa/aarch64/inst/emit.rs b/cranelift/codegen/src/isa/aarch64/inst/emit.rs index 891e1b0bb6..8ba60b59ad 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/emit.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/emit.rs @@ -1424,13 +1424,26 @@ impl MachInstEmit for Inst { let rn = allocs.next(rn); sink.put4(enc_ccmp_imm(size, rn, imm, nzcv, cond)); } - &Inst::AtomicRMW { ty, op, rs, rt, rn } => { + &Inst::AtomicRMW { + ty, + op, + rs, + rt, + rn, + flags, + } => { let rs = allocs.next(rs); let rt = allocs.next_writable(rt); let rn = allocs.next(rn); + + let srcloc = state.cur_srcloc(); + if !srcloc.is_default() && !flags.notrap() { + sink.add_trap(TrapCode::HeapOutOfBounds); + } + sink.put4(enc_acq_rel(ty, op, rs, rt, rn)); } - &Inst::AtomicRMWLoop { ty, op, .. } => { + &Inst::AtomicRMWLoop { ty, op, flags, .. } => { /* Emit this: again: ldaxr{,b,h} x/w27, [x25] @@ -1463,10 +1476,12 @@ impl MachInstEmit for Inst { // again: sink.bind_label(again_label); + let srcloc = state.cur_srcloc(); - if !srcloc.is_default() { + if !srcloc.is_default() && !flags.notrap() { sink.add_trap(TrapCode::HeapOutOfBounds); } + sink.put4(enc_ldaxr(ty, x27wr, x25)); // ldaxr x27, [x25] let size = OperandSize::from_ty(ty); let sign_ext = match op { @@ -1588,7 +1603,7 @@ impl MachInstEmit for Inst { } let srcloc = state.cur_srcloc(); - if !srcloc.is_default() { + if !srcloc.is_default() && !flags.notrap() { sink.add_trap(TrapCode::HeapOutOfBounds); } if op == AtomicRMWLoopOp::Xchg { @@ -1608,7 +1623,14 @@ impl MachInstEmit for Inst { )); sink.use_label_at_offset(br_offset, again_label, LabelUse::Branch19); } - &Inst::AtomicCAS { rd, rs, rt, rn, ty } => { + &Inst::AtomicCAS { + rd, + rs, + rt, + rn, + ty, + flags, + } => { let rd = allocs.next_writable(rd); let rs = allocs.next(rs); debug_assert_eq!(rd.to_reg(), rs); @@ -1622,9 +1644,14 @@ impl MachInstEmit for Inst { _ => panic!("Unsupported type: {}", ty), }; + let srcloc = state.cur_srcloc(); + if !srcloc.is_default() && !flags.notrap() { + sink.add_trap(TrapCode::HeapOutOfBounds); + } + sink.put4(enc_cas(size, rd, rt, rn)); } - &Inst::AtomicCASLoop { ty, .. } => { + &Inst::AtomicCASLoop { ty, flags, .. } => { /* Emit this: again: ldaxr{,b,h} x/w27, [x25] @@ -1651,10 +1678,12 @@ impl MachInstEmit for Inst { // again: sink.bind_label(again_label); + let srcloc = state.cur_srcloc(); - if !srcloc.is_default() { + if !srcloc.is_default() && !flags.notrap() { sink.add_trap(TrapCode::HeapOutOfBounds); } + // ldaxr x27, [x25] sink.put4(enc_ldaxr(ty, x27wr, x25)); @@ -1679,9 +1708,10 @@ impl MachInstEmit for Inst { sink.use_label_at_offset(br_out_offset, out_label, LabelUse::Branch19); let srcloc = state.cur_srcloc(); - if !srcloc.is_default() { + if !srcloc.is_default() && !flags.notrap() { sink.add_trap(TrapCode::HeapOutOfBounds); } + sink.put4(enc_stlxr(ty, x24wr, x28, x25)); // stlxr w24, x28, [x25] // cbnz w24, again. @@ -1698,14 +1728,36 @@ impl MachInstEmit for Inst { // out: sink.bind_label(out_label); } - &Inst::LoadAcquire { access_ty, rt, rn } => { + &Inst::LoadAcquire { + access_ty, + rt, + rn, + flags, + } => { let rn = allocs.next(rn); let rt = allocs.next_writable(rt); + + let srcloc = state.cur_srcloc(); + if !srcloc.is_default() && !flags.notrap() { + sink.add_trap(TrapCode::HeapOutOfBounds); + } + sink.put4(enc_ldar(access_ty, rt, rn)); } - &Inst::StoreRelease { access_ty, rt, rn } => { + &Inst::StoreRelease { + access_ty, + rt, + rn, + flags, + } => { let rn = allocs.next(rn); let rt = allocs.next(rt); + + let srcloc = state.cur_srcloc(); + if !srcloc.is_default() && !flags.notrap() { + sink.add_trap(TrapCode::HeapOutOfBounds); + } + sink.put4(enc_stlr(access_ty, rt, rn)); } &Inst::Fence {} => { diff --git a/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs b/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs index 627c0bd48a..55e1a8f82f 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs @@ -6926,6 +6926,7 @@ fn test_aarch64_binemit() { Inst::AtomicRMWLoop { ty: I8, op: AtomicRMWLoopOp::Sub, + flags: MemFlags::trusted(), addr: xreg(25), operand: xreg(26), oldval: writable_xreg(27), @@ -6939,6 +6940,7 @@ fn test_aarch64_binemit() { Inst::AtomicRMWLoop { ty: I16, op: AtomicRMWLoopOp::Eor, + flags: MemFlags::trusted(), addr: xreg(25), operand: xreg(26), oldval: writable_xreg(27), @@ -6952,6 +6954,7 @@ fn test_aarch64_binemit() { Inst::AtomicRMWLoop { ty: I8, op: AtomicRMWLoopOp::Add, + flags: MemFlags::trusted(), addr: xreg(25), operand: xreg(26), oldval: writable_xreg(27), @@ -6965,6 +6968,7 @@ fn test_aarch64_binemit() { Inst::AtomicRMWLoop { ty: I32, op: AtomicRMWLoopOp::Orr, + flags: MemFlags::trusted(), addr: xreg(25), operand: xreg(26), oldval: writable_xreg(27), @@ -6978,6 +6982,7 @@ fn test_aarch64_binemit() { Inst::AtomicRMWLoop { ty: I64, op: AtomicRMWLoopOp::And, + flags: MemFlags::trusted(), addr: xreg(25), operand: xreg(26), oldval: writable_xreg(27), @@ -6991,6 +6996,7 @@ fn test_aarch64_binemit() { Inst::AtomicRMWLoop { ty: I8, op: AtomicRMWLoopOp::Xchg, + flags: MemFlags::trusted(), addr: xreg(25), operand: xreg(26), oldval: writable_xreg(27), @@ -7004,6 +7010,7 @@ fn test_aarch64_binemit() { Inst::AtomicRMWLoop { ty: I16, op: AtomicRMWLoopOp::Nand, + flags: MemFlags::trusted(), addr: xreg(25), operand: xreg(26), oldval: writable_xreg(27), @@ -7017,6 +7024,7 @@ fn test_aarch64_binemit() { Inst::AtomicRMWLoop { ty: I16, op: AtomicRMWLoopOp::Smin, + flags: MemFlags::trusted(), addr: xreg(25), operand: xreg(26), oldval: writable_xreg(27), @@ -7030,6 +7038,7 @@ fn test_aarch64_binemit() { Inst::AtomicRMWLoop { ty: I32, op: AtomicRMWLoopOp::Smin, + flags: MemFlags::trusted(), addr: xreg(25), operand: xreg(26), oldval: writable_xreg(27), @@ -7043,6 +7052,7 @@ fn test_aarch64_binemit() { Inst::AtomicRMWLoop { ty: I64, op: AtomicRMWLoopOp::Smax, + flags: MemFlags::trusted(), addr: xreg(25), operand: xreg(26), oldval: writable_xreg(27), @@ -7056,6 +7066,7 @@ fn test_aarch64_binemit() { Inst::AtomicRMWLoop { ty: I8, op: AtomicRMWLoopOp::Smax, + flags: MemFlags::trusted(), addr: xreg(25), operand: xreg(26), oldval: writable_xreg(27), @@ -7069,6 +7080,7 @@ fn test_aarch64_binemit() { Inst::AtomicRMWLoop { ty: I8, op: AtomicRMWLoopOp::Umin, + flags: MemFlags::trusted(), addr: xreg(25), operand: xreg(26), oldval: writable_xreg(27), @@ -7082,6 +7094,7 @@ fn test_aarch64_binemit() { Inst::AtomicRMWLoop { ty: I16, op: AtomicRMWLoopOp::Umax, + flags: MemFlags::trusted(), addr: xreg(25), operand: xreg(26), oldval: writable_xreg(27), @@ -7099,6 +7112,7 @@ fn test_aarch64_binemit() { rs: xreg(1), rt: writable_xreg(2), rn: xreg(3), + flags: MemFlags::trusted(), }, "6200E138", "ldaddalb w1, w2, [x3]", @@ -7110,6 +7124,7 @@ fn test_aarch64_binemit() { rs: xreg(4), rt: writable_xreg(5), rn: xreg(6), + flags: MemFlags::trusted(), }, "C500E478", "ldaddalh w4, w5, [x6]", @@ -7121,6 +7136,7 @@ fn test_aarch64_binemit() { rs: xreg(7), rt: writable_xreg(8), rn: xreg(9), + flags: MemFlags::trusted(), }, "2801E7B8", "ldaddal w7, w8, [x9]", @@ -7132,6 +7148,7 @@ fn test_aarch64_binemit() { rs: xreg(10), rt: writable_xreg(11), rn: xreg(12), + flags: MemFlags::trusted(), }, "8B01EAF8", "ldaddal x10, x11, [x12]", @@ -7143,6 +7160,7 @@ fn test_aarch64_binemit() { rs: xreg(13), rt: writable_xreg(14), rn: xreg(15), + flags: MemFlags::trusted(), }, "EE11ED38", "ldclralb w13, w14, [x15]", @@ -7154,6 +7172,7 @@ fn test_aarch64_binemit() { rs: xreg(16), rt: writable_xreg(17), rn: xreg(18), + flags: MemFlags::trusted(), }, "5112F078", "ldclralh w16, w17, [x18]", @@ -7165,6 +7184,7 @@ fn test_aarch64_binemit() { rs: xreg(19), rt: writable_xreg(20), rn: xreg(21), + flags: MemFlags::trusted(), }, "B412F3B8", "ldclral w19, w20, [x21]", @@ -7176,6 +7196,7 @@ fn test_aarch64_binemit() { rs: xreg(22), rt: writable_xreg(23), rn: xreg(24), + flags: MemFlags::trusted(), }, "1713F6F8", "ldclral x22, x23, [x24]", @@ -7187,6 +7208,7 @@ fn test_aarch64_binemit() { rs: xreg(25), rt: writable_xreg(26), rn: xreg(27), + flags: MemFlags::trusted(), }, "7A23F938", "ldeoralb w25, w26, [x27]", @@ -7198,6 +7220,7 @@ fn test_aarch64_binemit() { rs: xreg(28), rt: writable_xreg(29), rn: xreg(30), + flags: MemFlags::trusted(), }, "DD23FC78", "ldeoralh w28, fp, [lr]", @@ -7209,6 +7232,7 @@ fn test_aarch64_binemit() { rs: xreg(29), rt: writable_xreg(28), rn: xreg(27), + flags: MemFlags::trusted(), }, "7C23FDB8", "ldeoral fp, w28, [x27]", @@ -7220,6 +7244,7 @@ fn test_aarch64_binemit() { rs: xreg(26), rt: writable_xreg(25), rn: xreg(24), + flags: MemFlags::trusted(), }, "1923FAF8", "ldeoral x26, x25, [x24]", @@ -7231,6 +7256,7 @@ fn test_aarch64_binemit() { rs: xreg(23), rt: writable_xreg(22), rn: xreg(21), + flags: MemFlags::trusted(), }, "B632F738", "ldsetalb w23, w22, [x21]", @@ -7242,6 +7268,7 @@ fn test_aarch64_binemit() { rs: xreg(20), rt: writable_xreg(19), rn: xreg(18), + flags: MemFlags::trusted(), }, "5332F478", "ldsetalh w20, w19, [x18]", @@ -7253,6 +7280,7 @@ fn test_aarch64_binemit() { rs: xreg(17), rt: writable_xreg(16), rn: xreg(15), + flags: MemFlags::trusted(), }, "F031F1B8", "ldsetal w17, w16, [x15]", @@ -7264,6 +7292,7 @@ fn test_aarch64_binemit() { rs: xreg(14), rt: writable_xreg(13), rn: xreg(12), + flags: MemFlags::trusted(), }, "8D31EEF8", "ldsetal x14, x13, [x12]", @@ -7275,6 +7304,7 @@ fn test_aarch64_binemit() { rs: xreg(11), rt: writable_xreg(10), rn: xreg(9), + flags: MemFlags::trusted(), }, "2A41EB38", "ldsmaxalb w11, w10, [x9]", @@ -7286,6 +7316,7 @@ fn test_aarch64_binemit() { rs: xreg(8), rt: writable_xreg(7), rn: xreg(6), + flags: MemFlags::trusted(), }, "C740E878", "ldsmaxalh w8, w7, [x6]", @@ -7297,6 +7328,7 @@ fn test_aarch64_binemit() { rs: xreg(5), rt: writable_xreg(4), rn: xreg(3), + flags: MemFlags::trusted(), }, "6440E5B8", "ldsmaxal w5, w4, [x3]", @@ -7308,6 +7340,7 @@ fn test_aarch64_binemit() { rs: xreg(2), rt: writable_xreg(1), rn: xreg(0), + flags: MemFlags::trusted(), }, "0140E2F8", "ldsmaxal x2, x1, [x0]", @@ -7319,6 +7352,7 @@ fn test_aarch64_binemit() { rs: xreg(1), rt: writable_xreg(2), rn: xreg(3), + flags: MemFlags::trusted(), }, "6250E138", "ldsminalb w1, w2, [x3]", @@ -7330,6 +7364,7 @@ fn test_aarch64_binemit() { rs: xreg(4), rt: writable_xreg(5), rn: xreg(6), + flags: MemFlags::trusted(), }, "C550E478", "ldsminalh w4, w5, [x6]", @@ -7341,6 +7376,7 @@ fn test_aarch64_binemit() { rs: xreg(7), rt: writable_xreg(8), rn: xreg(9), + flags: MemFlags::trusted(), }, "2851E7B8", "ldsminal w7, w8, [x9]", @@ -7352,6 +7388,7 @@ fn test_aarch64_binemit() { rs: xreg(10), rt: writable_xreg(11), rn: xreg(12), + flags: MemFlags::trusted(), }, "8B51EAF8", "ldsminal x10, x11, [x12]", @@ -7363,6 +7400,7 @@ fn test_aarch64_binemit() { rs: xreg(13), rt: writable_xreg(14), rn: xreg(15), + flags: MemFlags::trusted(), }, "EE61ED38", "ldumaxalb w13, w14, [x15]", @@ -7374,6 +7412,7 @@ fn test_aarch64_binemit() { rs: xreg(16), rt: writable_xreg(17), rn: xreg(18), + flags: MemFlags::trusted(), }, "5162F078", "ldumaxalh w16, w17, [x18]", @@ -7385,6 +7424,7 @@ fn test_aarch64_binemit() { rs: xreg(19), rt: writable_xreg(20), rn: xreg(21), + flags: MemFlags::trusted(), }, "B462F3B8", "ldumaxal w19, w20, [x21]", @@ -7396,6 +7436,7 @@ fn test_aarch64_binemit() { rs: xreg(22), rt: writable_xreg(23), rn: xreg(24), + flags: MemFlags::trusted(), }, "1763F6F8", "ldumaxal x22, x23, [x24]", @@ -7407,6 +7448,7 @@ fn test_aarch64_binemit() { rs: xreg(16), rt: writable_xreg(17), rn: xreg(18), + flags: MemFlags::trusted(), }, "5172F038", "lduminalb w16, w17, [x18]", @@ -7418,6 +7460,7 @@ fn test_aarch64_binemit() { rs: xreg(19), rt: writable_xreg(20), rn: xreg(21), + flags: MemFlags::trusted(), }, "B472F378", "lduminalh w19, w20, [x21]", @@ -7429,6 +7472,7 @@ fn test_aarch64_binemit() { rs: xreg(22), rt: writable_xreg(23), rn: xreg(24), + flags: MemFlags::trusted(), }, "1773F6B8", "lduminal w22, w23, [x24]", @@ -7440,6 +7484,7 @@ fn test_aarch64_binemit() { rs: xreg(25), rt: writable_xreg(26), rn: xreg(27), + flags: MemFlags::trusted(), }, "7A73F9F8", "lduminal x25, x26, [x27]", @@ -7451,6 +7496,7 @@ fn test_aarch64_binemit() { rs: xreg(28), rt: writable_xreg(29), rn: xreg(30), + flags: MemFlags::trusted(), }, "DD83FC38", "swpalb w28, fp, [lr]", @@ -7462,6 +7508,7 @@ fn test_aarch64_binemit() { rs: xreg(0), rt: writable_xreg(1), rn: xreg(2), + flags: MemFlags::trusted(), }, "4180E078", "swpalh w0, w1, [x2]", @@ -7473,6 +7520,7 @@ fn test_aarch64_binemit() { rs: xreg(3), rt: writable_xreg(4), rn: xreg(5), + flags: MemFlags::trusted(), }, "A480E3B8", "swpal w3, w4, [x5]", @@ -7484,6 +7532,7 @@ fn test_aarch64_binemit() { rs: xreg(6), rt: writable_xreg(7), rn: xreg(8), + flags: MemFlags::trusted(), }, "0781E6F8", "swpal x6, x7, [x8]", @@ -7496,6 +7545,7 @@ fn test_aarch64_binemit() { rt: xreg(20), rn: xreg(10), ty: I8, + flags: MemFlags::trusted(), }, "54FDFC08", "casalb w28, w28, w20, [x10]", @@ -7507,6 +7557,7 @@ fn test_aarch64_binemit() { rt: xreg(19), rn: xreg(23), ty: I16, + flags: MemFlags::trusted(), }, "F3FEE248", "casalh w2, w2, w19, [x23]", @@ -7518,6 +7569,7 @@ fn test_aarch64_binemit() { rt: zero_reg(), rn: stack_reg(), ty: I32, + flags: MemFlags::trusted(), }, "FFFFE088", "casal w0, w0, wzr, [sp]", @@ -7529,6 +7581,7 @@ fn test_aarch64_binemit() { rt: xreg(15), rn: xreg(27), ty: I64, + flags: MemFlags::trusted(), }, "6FFFE7C8", "casal x7, x7, x15, [x27]", @@ -7536,6 +7589,7 @@ fn test_aarch64_binemit() { insns.push(( Inst::AtomicCASLoop { ty: I8, + flags: MemFlags::trusted(), addr: xreg(25), expected: xreg(26), replacement: xreg(28), @@ -7549,6 +7603,7 @@ fn test_aarch64_binemit() { insns.push(( Inst::AtomicCASLoop { ty: I16, + flags: MemFlags::trusted(), addr: xreg(25), expected: xreg(26), replacement: xreg(28), @@ -7562,6 +7617,7 @@ fn test_aarch64_binemit() { insns.push(( Inst::AtomicCASLoop { ty: I32, + flags: MemFlags::trusted(), addr: xreg(25), expected: xreg(26), replacement: xreg(28), @@ -7575,6 +7631,7 @@ fn test_aarch64_binemit() { insns.push(( Inst::AtomicCASLoop { ty: I64, + flags: MemFlags::trusted(), addr: xreg(25), expected: xreg(26), replacement: xreg(28), @@ -7590,6 +7647,7 @@ fn test_aarch64_binemit() { access_ty: I8, rt: writable_xreg(7), rn: xreg(28), + flags: MemFlags::trusted(), }, "87FFDF08", "ldarb w7, [x28]", @@ -7600,6 +7658,7 @@ fn test_aarch64_binemit() { access_ty: I16, rt: writable_xreg(2), rn: xreg(3), + flags: MemFlags::trusted(), }, "62FCDF48", "ldarh w2, [x3]", @@ -7610,6 +7669,7 @@ fn test_aarch64_binemit() { access_ty: I32, rt: writable_xreg(15), rn: xreg(0), + flags: MemFlags::trusted(), }, "0FFCDF88", "ldar w15, [x0]", @@ -7620,6 +7680,7 @@ fn test_aarch64_binemit() { access_ty: I64, rt: writable_xreg(28), rn: xreg(7), + flags: MemFlags::trusted(), }, "FCFCDFC8", "ldar x28, [x7]", @@ -7630,6 +7691,7 @@ fn test_aarch64_binemit() { access_ty: I8, rt: xreg(7), rn: xreg(28), + flags: MemFlags::trusted(), }, "87FF9F08", "stlrb w7, [x28]", @@ -7640,6 +7702,7 @@ fn test_aarch64_binemit() { access_ty: I16, rt: xreg(2), rn: xreg(3), + flags: MemFlags::trusted(), }, "62FC9F48", "stlrh w2, [x3]", @@ -7650,6 +7713,7 @@ fn test_aarch64_binemit() { access_ty: I32, rt: xreg(15), rn: xreg(0), + flags: MemFlags::trusted(), }, "0FFC9F88", "stlr w15, [x0]", @@ -7660,6 +7724,7 @@ fn test_aarch64_binemit() { access_ty: I64, rt: xreg(28), rn: xreg(7), + flags: MemFlags::trusted(), }, "FCFC9FC8", "stlr x28, [x7]", diff --git a/cranelift/codegen/src/isa/aarch64/inst/mod.rs b/cranelift/codegen/src/isa/aarch64/inst/mod.rs index 7ab47a72e7..22b034d4e2 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/mod.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/mod.rs @@ -1604,7 +1604,9 @@ impl Inst { let cond = cond.pretty_print(0, allocs); format!("ccmp {}, {}, {}, {}", rn, imm, nzcv, cond) } - &Inst::AtomicRMW { rs, rt, rn, ty, op } => { + &Inst::AtomicRMW { + rs, rt, rn, ty, op, .. + } => { let op = match op { AtomicRMWOp::Add => "ldaddal", AtomicRMWOp::Clr => "ldclral", @@ -1637,6 +1639,7 @@ impl Inst { oldval, scratch1, scratch2, + .. } => { let op = match op { AtomicRMWLoopOp::Add => "add", @@ -1667,7 +1670,9 @@ impl Inst { scratch2, ) } - &Inst::AtomicCAS { rd, rs, rt, rn, ty } => { + &Inst::AtomicCAS { + rd, rs, rt, rn, ty, .. + } => { let op = match ty { I8 => "casalb", I16 => "casalh", @@ -1689,6 +1694,7 @@ impl Inst { replacement, oldval, scratch, + .. } => { let addr = pretty_print_ireg(addr, OperandSize::Size64, allocs); let expected = pretty_print_ireg(expected, OperandSize::Size64, allocs); diff --git a/cranelift/codegen/src/isa/aarch64/lower.isle b/cranelift/codegen/src/isa/aarch64/lower.isle index d820238a8e..4979fcb74d 100644 --- a/cranelift/codegen/src/isa/aarch64/lower.isle +++ b/cranelift/codegen/src/isa/aarch64/lower.isle @@ -926,8 +926,9 @@ ;; Atomic loads will also automatically zero their upper bits so the `uextend` ;; instruction can effectively get skipped here. (rule (lower (has_type (fits_in_64 out) - (uextend (and (value_type in) (sinkable_atomic_load addr))))) - (load_acquire in (sink_atomic_load addr))) + (uextend x @ (and (value_type in) (atomic_load flags _))))) + (if-let mem_op (is_sinkable_inst x)) + (load_acquire in flags (sink_atomic_load mem_op))) ;; Conversion to 128-bit needs a zero-extension of the lower bits and the upper ;; bits are all zero. @@ -1780,98 +1781,98 @@ ;;;; Rules for `AtomicLoad` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (rule (lower (has_type (valid_atomic_transaction ty) (atomic_load flags addr))) - (load_acquire ty addr)) + (load_acquire ty flags addr)) ;;;; Rules for `AtomicStore` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (rule (lower (atomic_store flags src @ (value_type (valid_atomic_transaction ty)) addr)) - (side_effect (store_release ty src addr))) + (side_effect (store_release ty flags src addr))) ;;;; Rules for `AtomicRMW` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (rule 1 (lower (and (use_lse) (has_type (valid_atomic_transaction ty) (atomic_rmw flags (AtomicRmwOp.Add) addr src)))) - (lse_atomic_rmw (AtomicRMWOp.Add) addr src ty)) + (lse_atomic_rmw (AtomicRMWOp.Add) addr src ty flags)) (rule 1 (lower (and (use_lse) (has_type (valid_atomic_transaction ty) (atomic_rmw flags (AtomicRmwOp.Xor) addr src)))) - (lse_atomic_rmw (AtomicRMWOp.Eor) addr src ty)) + (lse_atomic_rmw (AtomicRMWOp.Eor) addr src ty flags)) (rule 1 (lower (and (use_lse) (has_type (valid_atomic_transaction ty) (atomic_rmw flags (AtomicRmwOp.Or) addr src)))) - (lse_atomic_rmw (AtomicRMWOp.Set) addr src ty)) + (lse_atomic_rmw (AtomicRMWOp.Set) addr src ty flags)) (rule 1 (lower (and (use_lse) (has_type (valid_atomic_transaction ty) (atomic_rmw flags (AtomicRmwOp.Smax) addr src)))) - (lse_atomic_rmw (AtomicRMWOp.Smax) addr src ty)) + (lse_atomic_rmw (AtomicRMWOp.Smax) addr src ty flags)) (rule 1 (lower (and (use_lse) (has_type (valid_atomic_transaction ty) (atomic_rmw flags (AtomicRmwOp.Smin) addr src)))) - (lse_atomic_rmw (AtomicRMWOp.Smin) addr src ty)) + (lse_atomic_rmw (AtomicRMWOp.Smin) addr src ty flags)) (rule 1 (lower (and (use_lse) (has_type (valid_atomic_transaction ty) (atomic_rmw flags (AtomicRmwOp.Umax) addr src)))) - (lse_atomic_rmw (AtomicRMWOp.Umax) addr src ty)) + (lse_atomic_rmw (AtomicRMWOp.Umax) addr src ty flags)) (rule 1 (lower (and (use_lse) (has_type (valid_atomic_transaction ty) (atomic_rmw flags (AtomicRmwOp.Umin) addr src)))) - (lse_atomic_rmw (AtomicRMWOp.Umin) addr src ty)) + (lse_atomic_rmw (AtomicRMWOp.Umin) addr src ty flags)) (rule 1 (lower (and (use_lse) (has_type (valid_atomic_transaction ty) (atomic_rmw flags (AtomicRmwOp.Sub) addr src)))) - (lse_atomic_rmw (AtomicRMWOp.Add) addr (sub ty (zero_reg) src) ty)) + (lse_atomic_rmw (AtomicRMWOp.Add) addr (sub ty (zero_reg) src) ty flags)) (rule 1 (lower (and (use_lse) (has_type (valid_atomic_transaction ty) (atomic_rmw flags (AtomicRmwOp.And) addr src)))) - (lse_atomic_rmw (AtomicRMWOp.Clr) addr (eon ty src (zero_reg)) ty)) + (lse_atomic_rmw (AtomicRMWOp.Clr) addr (eon ty src (zero_reg)) ty flags)) (rule (lower (has_type (valid_atomic_transaction ty) (atomic_rmw flags (AtomicRmwOp.Add) addr src))) - (atomic_rmw_loop (AtomicRMWLoopOp.Add) addr src ty)) + (atomic_rmw_loop (AtomicRMWLoopOp.Add) addr src ty flags)) (rule (lower (has_type (valid_atomic_transaction ty) (atomic_rmw flags (AtomicRmwOp.Sub) addr src))) - (atomic_rmw_loop (AtomicRMWLoopOp.Sub) addr src ty)) + (atomic_rmw_loop (AtomicRMWLoopOp.Sub) addr src ty flags)) (rule (lower (has_type (valid_atomic_transaction ty) (atomic_rmw flags (AtomicRmwOp.And) addr src))) - (atomic_rmw_loop (AtomicRMWLoopOp.And) addr src ty)) + (atomic_rmw_loop (AtomicRMWLoopOp.And) addr src ty flags)) (rule (lower (has_type (valid_atomic_transaction ty) (atomic_rmw flags (AtomicRmwOp.Nand) addr src))) - (atomic_rmw_loop (AtomicRMWLoopOp.Nand) addr src ty)) + (atomic_rmw_loop (AtomicRMWLoopOp.Nand) addr src ty flags)) (rule (lower (has_type (valid_atomic_transaction ty) (atomic_rmw flags (AtomicRmwOp.Or) addr src))) - (atomic_rmw_loop (AtomicRMWLoopOp.Orr) addr src ty)) + (atomic_rmw_loop (AtomicRMWLoopOp.Orr) addr src ty flags)) (rule (lower (has_type (valid_atomic_transaction ty) (atomic_rmw flags (AtomicRmwOp.Xor) addr src))) - (atomic_rmw_loop (AtomicRMWLoopOp.Eor) addr src ty)) + (atomic_rmw_loop (AtomicRMWLoopOp.Eor) addr src ty flags)) (rule (lower (has_type (valid_atomic_transaction ty) (atomic_rmw flags (AtomicRmwOp.Smin) addr src))) - (atomic_rmw_loop (AtomicRMWLoopOp.Smin) addr src ty)) + (atomic_rmw_loop (AtomicRMWLoopOp.Smin) addr src ty flags)) (rule (lower (has_type (valid_atomic_transaction ty) (atomic_rmw flags (AtomicRmwOp.Smax) addr src))) - (atomic_rmw_loop (AtomicRMWLoopOp.Smax) addr src ty)) + (atomic_rmw_loop (AtomicRMWLoopOp.Smax) addr src ty flags)) (rule (lower (has_type (valid_atomic_transaction ty) (atomic_rmw flags (AtomicRmwOp.Umin) addr src))) - (atomic_rmw_loop (AtomicRMWLoopOp.Umin) addr src ty)) + (atomic_rmw_loop (AtomicRMWLoopOp.Umin) addr src ty flags)) (rule (lower (has_type (valid_atomic_transaction ty) (atomic_rmw flags (AtomicRmwOp.Umax) addr src))) - (atomic_rmw_loop (AtomicRMWLoopOp.Umax) addr src ty)) + (atomic_rmw_loop (AtomicRMWLoopOp.Umax) addr src ty flags)) (rule (lower (has_type (valid_atomic_transaction ty) (atomic_rmw flags (AtomicRmwOp.Xchg) addr src))) - (atomic_rmw_loop (AtomicRMWLoopOp.Xchg) addr src ty)) + (atomic_rmw_loop (AtomicRMWLoopOp.Xchg) addr src ty flags)) ;;;; Rules for `AtomicCAS` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (rule 1 (lower (and (use_lse) (has_type (valid_atomic_transaction ty) (atomic_cas flags addr src1 src2)))) - (lse_atomic_cas addr src1 src2 ty)) + (lse_atomic_cas addr src1 src2 ty flags)) (rule (lower (and (has_type (valid_atomic_transaction ty) (atomic_cas flags addr src1 src2)))) - (atomic_cas_loop addr src1 src2 ty)) + (atomic_cas_loop addr src1 src2 ty flags)) ;;;; Rules for 'fvdemote' ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (rule (lower (fvdemote x)) diff --git a/cranelift/codegen/src/isa/aarch64/lower/isle.rs b/cranelift/codegen/src/isa/aarch64/lower/isle.rs index 18d6cdd7e9..f8791d31c8 100644 --- a/cranelift/codegen/src/isa/aarch64/lower/isle.rs +++ b/cranelift/codegen/src/isa/aarch64/lower/isle.rs @@ -68,11 +68,6 @@ pub struct ExtendedValue { extend: ExtendOp, } -pub struct SinkableAtomicLoad { - atomic_load: Inst, - atomic_addr: Value, -} - impl IsleContext<'_, '_, MInst, Flags, IsaFlags, 6> { isle_prelude_method_helpers!(AArch64Caller); } @@ -366,25 +361,6 @@ impl Context for IsleContext<'_, '_, MInst, Flags, IsaFlags, 6> { } } - fn sinkable_atomic_load(&mut self, val: Value) -> Option { - let input = self.lower_ctx.get_value_as_source_or_const(val); - if let InputSourceInst::UniqueUse(atomic_load, 0) = input.inst { - if self.lower_ctx.data(atomic_load).opcode() == Opcode::AtomicLoad { - let atomic_addr = self.lower_ctx.input_as_value(atomic_load, 0); - return Some(SinkableAtomicLoad { - atomic_load, - atomic_addr, - }); - } - } - None - } - - fn sink_atomic_load(&mut self, load: &SinkableAtomicLoad) -> Reg { - self.lower_ctx.sink_inst(load.atomic_load); - self.put_in_reg(load.atomic_addr) - } - fn shift_mask(&mut self, ty: Type) -> ImmLogic { debug_assert!(ty.lane_bits().is_power_of_two());