[AArch64] Port atomic rmw to ISLE (#4021)
Also fix and extend the current implementation: - AtomicRMWOp::Clr != AtomicRmwOp::And, as the input needs to be inverted first. - Inputs to the cmp for the RMWLoop case are sign-extended when needed. - Lower Xchg to Swp. - Lower Sub to Add with a negated input. - Added more runtests. Copyright (c) 2022, Arm Limited.
This commit is contained in:
@@ -209,10 +209,8 @@
|
||||
;; effect of atomically modifying a memory location in a particular way. Because we have
|
||||
;; no way to explain to the regalloc about earlyclobber registers, this instruction has
|
||||
;; completely fixed operand registers, and we rely on the RA's coalescing to remove copies
|
||||
;; in the surrounding code to the extent it can. The sequence is both preceded and
|
||||
;; followed by a fence which is at least as comprehensive as that of the `Fence`
|
||||
;; instruction below. This instruction is sequentially consistent. The operand
|
||||
;; conventions are:
|
||||
;; in the surrounding code to the extent it can. Load- and store-exclusive instructions,
|
||||
;; with acquire-release semantics, are used to access memory. The operand conventions are:
|
||||
;;
|
||||
;; x25 (rd) address
|
||||
;; x26 (rd) second operand for `op`
|
||||
@@ -221,28 +219,10 @@
|
||||
;; x28 (wr) scratch reg; value afterwards has no meaning
|
||||
(AtomicRMWLoop
|
||||
(ty Type) ;; I8, I16, I32 or I64
|
||||
(op AtomicRmwOp))
|
||||
|
||||
;; An atomic read-modify-write operation. These instructions require the
|
||||
;; Large System Extension (LSE) ISA support (FEAT_LSE). The instructions have
|
||||
;; acquire-release semantics.
|
||||
(AtomicRMW
|
||||
(op AtomicRMWOp)
|
||||
(rs Reg)
|
||||
(rt WritableReg)
|
||||
(rn Reg)
|
||||
(ty Type))
|
||||
|
||||
;; An atomic compare-and-swap operation. This instruction is sequentially consistent.
|
||||
(AtomicCAS
|
||||
(rs WritableReg)
|
||||
(rt Reg)
|
||||
(rn Reg)
|
||||
(ty Type))
|
||||
(op AtomicRMWLoopOp))
|
||||
|
||||
;; Similar to AtomicRMWLoop, a compare-and-swap operation implemented using a load-linked
|
||||
;; store-conditional loop.
|
||||
;; This instruction is sequentially consistent.
|
||||
;; store-conditional loop, with acquire-release semantics.
|
||||
;; Note that the operand conventions, although very similar to AtomicRMWLoop, are different:
|
||||
;;
|
||||
;; x25 (rd) address
|
||||
@@ -254,6 +234,23 @@
|
||||
(ty Type) ;; I8, I16, I32 or I64
|
||||
)
|
||||
|
||||
;; An atomic read-modify-write operation. These instructions require the
|
||||
;; Large System Extension (LSE) ISA support (FEAT_LSE). The instructions have
|
||||
;; acquire-release semantics.
|
||||
(AtomicRMW
|
||||
(op AtomicRMWOp)
|
||||
(rs Reg)
|
||||
(rt WritableReg)
|
||||
(rn Reg)
|
||||
(ty Type))
|
||||
|
||||
;; An atomic compare-and-swap operation. This instruction is sequentially consistent.
|
||||
(AtomicCAS
|
||||
(rs WritableReg)
|
||||
(rt Reg)
|
||||
(rn Reg)
|
||||
(ty Type))
|
||||
|
||||
;; Read `access_ty` bits from address `rt`, either 8, 16, 32 or 64-bits, and put
|
||||
;; it in `rn`, optionally zero-extending to fill a word or double word result.
|
||||
;; This instruction is sequentially consistent.
|
||||
@@ -1261,8 +1258,30 @@
|
||||
(Smin)
|
||||
(Umax)
|
||||
(Umin)
|
||||
(Swp)
|
||||
))
|
||||
|
||||
;; Atomic read-modify-write operations, with acquire-release semantics,
|
||||
;; implemented with a loop.
|
||||
(type AtomicRMWLoopOp
|
||||
(enum
|
||||
(Add)
|
||||
(Sub)
|
||||
(And)
|
||||
(Nand)
|
||||
(Eor)
|
||||
(Orr)
|
||||
(Smax)
|
||||
(Smin)
|
||||
(Umax)
|
||||
(Umin)
|
||||
(Xchg)
|
||||
))
|
||||
|
||||
;; Extractors for target features ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
(decl use_lse () Inst)
|
||||
(extern extractor use_lse use_lse)
|
||||
|
||||
;; Extractor helpers for various immmediate constants ;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
(decl move_wide_const_from_u64 (MoveWideConst) u64)
|
||||
@@ -1304,6 +1323,9 @@
|
||||
(decl integral_ty (Type) Type)
|
||||
(extern extractor integral_ty integral_ty)
|
||||
|
||||
(decl valid_atomic_transaction (Type) Type)
|
||||
(extern extractor valid_atomic_transaction valid_atomic_transaction)
|
||||
|
||||
;; Helper to go directly from a `Value`, when it's an `iconst`, to an `Imm12`.
|
||||
(decl imm12_from_value (Imm12) Value)
|
||||
(extractor
|
||||
@@ -1345,6 +1367,26 @@
|
||||
(decl writable_zero_reg () WritableReg)
|
||||
(extern constructor writable_zero_reg writable_zero_reg)
|
||||
|
||||
;; Helpers for getting a particular real register
|
||||
(decl xreg (u8) Reg)
|
||||
(extern constructor xreg xreg)
|
||||
|
||||
(decl writable_xreg (u8) WritableReg)
|
||||
(extern constructor writable_xreg writable_xreg)
|
||||
|
||||
;; Helper for emitting `MInst.Mov64` instructions.
|
||||
(decl mov64_to_real (u8 Reg) Reg)
|
||||
(rule (mov64_to_real num src)
|
||||
(let ((dst WritableReg (writable_xreg num))
|
||||
(_ Unit (emit (MInst.Mov (operand_size $I64) dst src))))
|
||||
dst))
|
||||
|
||||
(decl mov64_from_real (u8) Reg)
|
||||
(rule (mov64_from_real num)
|
||||
(let ((dst WritableReg (temp_writable_reg $I64))
|
||||
(_ Unit (emit (MInst.Mov (operand_size $I64) dst (xreg num)))))
|
||||
dst))
|
||||
|
||||
;; Helper for emitting `MInst.MovZ` instructions.
|
||||
(decl movz (MoveWideConst OperandSize) Reg)
|
||||
(rule (movz imm size)
|
||||
@@ -2053,3 +2095,30 @@
|
||||
(decl cmeq0 (Reg VectorSize) Reg)
|
||||
(rule (cmeq0 rn size)
|
||||
(vec_misc (VecMisc2.Cmeq0) rn size))
|
||||
|
||||
;; Helper for emitting `MInst.AtomicRMW` instructions.
|
||||
(decl lse_atomic_rmw (AtomicRMWOp Value Reg Type) Reg)
|
||||
(rule (lse_atomic_rmw op p r_arg2 ty)
|
||||
(let (
|
||||
(r_addr Reg p)
|
||||
(dst WritableReg (temp_writable_reg ty))
|
||||
(_ Unit (emit (MInst.AtomicRMW op r_arg2 dst r_addr ty)))
|
||||
)
|
||||
dst))
|
||||
|
||||
;; Helper for emitting `MInst.AtomicRMWLoop` instructions.
|
||||
;; - Make sure that both args are in virtual regs, since in effect
|
||||
;; we have to do a parallel copy to get them safely to the AtomicRMW input
|
||||
;; regs, and that's not guaranteed safe if either is in a real reg.
|
||||
;; - Move the args to the preordained AtomicRMW input regs
|
||||
;; - And finally, copy the preordained AtomicRMW output reg to its destination.
|
||||
(decl atomic_rmw_loop (AtomicRMWLoopOp Value Value Type) Reg)
|
||||
(rule (atomic_rmw_loop op p arg2 ty)
|
||||
(let (
|
||||
(v_addr Reg (ensure_in_vreg p $I64))
|
||||
(v_arg2 Reg (ensure_in_vreg arg2 $I64))
|
||||
(r_addr Reg (mov64_to_real 25 v_addr))
|
||||
(r_arg2 Reg (mov64_to_real 26 v_arg2))
|
||||
(_ Unit (emit (MInst.AtomicRMWLoop ty op)))
|
||||
)
|
||||
(mov64_from_real 27)))
|
||||
|
||||
@@ -583,6 +583,13 @@ impl OperandSize {
|
||||
}
|
||||
}
|
||||
|
||||
pub fn bits(&self) -> u8 {
|
||||
match self {
|
||||
OperandSize::Size32 => 32,
|
||||
OperandSize::Size64 => 64,
|
||||
}
|
||||
}
|
||||
|
||||
/// Convert from an integer type into the smallest size that fits.
|
||||
pub fn from_ty(ty: Type) -> OperandSize {
|
||||
debug_assert!(!ty.is_vector());
|
||||
|
||||
@@ -7,6 +7,7 @@ use crate::ir::constant::ConstantData;
|
||||
use crate::ir::types::*;
|
||||
use crate::ir::{LibCall, MemFlags, TrapCode};
|
||||
use crate::isa::aarch64::inst::*;
|
||||
use crate::isa::aarch64::lower::is_valid_atomic_transaction_ty;
|
||||
use crate::machinst::{ty_bits, Reg, RegClass, Writable};
|
||||
use core::convert::TryFrom;
|
||||
|
||||
@@ -505,7 +506,7 @@ fn enc_dmb_ish() -> u32 {
|
||||
0xD5033BBF
|
||||
}
|
||||
|
||||
fn enc_ldal(ty: Type, op: AtomicRMWOp, rs: Reg, rt: Writable<Reg>, rn: Reg) -> u32 {
|
||||
fn enc_acq_rel(ty: Type, op: AtomicRMWOp, rs: Reg, rt: Writable<Reg>, rn: Reg) -> u32 {
|
||||
assert!(machreg_to_gpr(rt.to_reg()) != 31);
|
||||
let sz = match ty {
|
||||
I64 => 0b11,
|
||||
@@ -514,6 +515,10 @@ fn enc_ldal(ty: Type, op: AtomicRMWOp, rs: Reg, rt: Writable<Reg>, rn: Reg) -> u
|
||||
I8 => 0b00,
|
||||
_ => unreachable!(),
|
||||
};
|
||||
let bit15 = match op {
|
||||
AtomicRMWOp::Swp => 0b1,
|
||||
_ => 0b0,
|
||||
};
|
||||
let op = match op {
|
||||
AtomicRMWOp::Add => 0b000,
|
||||
AtomicRMWOp::Clr => 0b001,
|
||||
@@ -523,10 +528,12 @@ fn enc_ldal(ty: Type, op: AtomicRMWOp, rs: Reg, rt: Writable<Reg>, rn: Reg) -> u
|
||||
AtomicRMWOp::Smin => 0b101,
|
||||
AtomicRMWOp::Umax => 0b110,
|
||||
AtomicRMWOp::Umin => 0b111,
|
||||
AtomicRMWOp::Swp => 0b000,
|
||||
};
|
||||
0b00_111_000_111_00000_0_000_00_00000_00000
|
||||
| (sz << 30)
|
||||
| (machreg_to_gpr(rs) << 16)
|
||||
| bit15 << 15
|
||||
| (op << 12)
|
||||
| (machreg_to_gpr(rn) << 5)
|
||||
| machreg_to_gpr(rt.to_reg())
|
||||
@@ -1371,15 +1378,18 @@ impl MachInstEmit for Inst {
|
||||
sink.put4(enc_ccmp_imm(size, rn, imm, nzcv, cond));
|
||||
}
|
||||
&Inst::AtomicRMW { ty, op, rs, rt, rn } => {
|
||||
assert!(is_valid_atomic_transaction_ty(ty));
|
||||
let rs = allocs.next(rs);
|
||||
let rt = allocs.next_writable(rt);
|
||||
let rn = allocs.next(rn);
|
||||
sink.put4(enc_ldal(ty, op, rs, rt, rn));
|
||||
sink.put4(enc_acq_rel(ty, op, rs, rt, rn));
|
||||
}
|
||||
&Inst::AtomicRMWLoop { ty, op } => {
|
||||
assert!(is_valid_atomic_transaction_ty(ty));
|
||||
/* Emit this:
|
||||
again:
|
||||
ldaxr{,b,h} x/w27, [x25]
|
||||
// maybe sign extend
|
||||
op x28, x27, x26 // op is add,sub,and,orr,eor
|
||||
stlxr{,b,h} w24, x/w28, [x25]
|
||||
cbnz x24, again
|
||||
@@ -1414,10 +1424,31 @@ impl MachInstEmit for Inst {
|
||||
}
|
||||
sink.put4(enc_ldaxr(ty, x27wr, x25)); // ldaxr x27, [x25]
|
||||
let size = OperandSize::from_ty(ty);
|
||||
let sign_ext = match op {
|
||||
AtomicRMWLoopOp::Smin | AtomicRMWLoopOp::Smax => match ty {
|
||||
I16 => Some((ExtendOp::SXTH, 16)),
|
||||
I8 => Some((ExtendOp::SXTB, 8)),
|
||||
_ => None,
|
||||
},
|
||||
_ => None,
|
||||
};
|
||||
|
||||
// sxt{b|h} the loaded result if necessary.
|
||||
if sign_ext.is_some() {
|
||||
let (_, from_bits) = sign_ext.unwrap();
|
||||
Inst::Extend {
|
||||
rd: x27wr,
|
||||
rn: x27,
|
||||
signed: true,
|
||||
from_bits,
|
||||
to_bits: size.bits(),
|
||||
}
|
||||
.emit(&[], sink, emit_info, state);
|
||||
}
|
||||
|
||||
match op {
|
||||
AtomicRmwOp::Xchg => {} // do nothing
|
||||
AtomicRmwOp::Nand => {
|
||||
AtomicRMWLoopOp::Xchg => {} // do nothing
|
||||
AtomicRMWLoopOp::Nand => {
|
||||
// and x28, x27, x26
|
||||
// mvn x28, x28
|
||||
|
||||
@@ -1439,29 +1470,42 @@ impl MachInstEmit for Inst {
|
||||
}
|
||||
.emit(&[], sink, emit_info, state);
|
||||
}
|
||||
AtomicRmwOp::Umin
|
||||
| AtomicRmwOp::Umax
|
||||
| AtomicRmwOp::Smin
|
||||
| AtomicRmwOp::Smax => {
|
||||
// cmp x27, x26
|
||||
AtomicRMWLoopOp::Umin
|
||||
| AtomicRMWLoopOp::Umax
|
||||
| AtomicRMWLoopOp::Smin
|
||||
| AtomicRMWLoopOp::Smax => {
|
||||
// cmp x27, x26 {?sxt}
|
||||
// csel.op x28, x27, x26
|
||||
|
||||
let cond = match op {
|
||||
AtomicRmwOp::Umin => Cond::Lo,
|
||||
AtomicRmwOp::Umax => Cond::Hi,
|
||||
AtomicRmwOp::Smin => Cond::Lt,
|
||||
AtomicRmwOp::Smax => Cond::Gt,
|
||||
AtomicRMWLoopOp::Umin => Cond::Lo,
|
||||
AtomicRMWLoopOp::Umax => Cond::Hi,
|
||||
AtomicRMWLoopOp::Smin => Cond::Lt,
|
||||
AtomicRMWLoopOp::Smax => Cond::Gt,
|
||||
_ => unreachable!(),
|
||||
};
|
||||
|
||||
Inst::AluRRR {
|
||||
alu_op: ALUOp::SubS,
|
||||
size,
|
||||
rd: writable_zero_reg(),
|
||||
rn: x27,
|
||||
rm: x26,
|
||||
if sign_ext.is_some() {
|
||||
let (extendop, _) = sign_ext.unwrap();
|
||||
Inst::AluRRRExtend {
|
||||
alu_op: ALUOp::SubS,
|
||||
size,
|
||||
rd: writable_zero_reg(),
|
||||
rn: x27,
|
||||
rm: x26,
|
||||
extendop,
|
||||
}
|
||||
.emit(&[], sink, emit_info, state);
|
||||
} else {
|
||||
Inst::AluRRR {
|
||||
alu_op: ALUOp::SubS,
|
||||
size,
|
||||
rd: writable_zero_reg(),
|
||||
rn: x27,
|
||||
rm: x26,
|
||||
}
|
||||
.emit(&[], sink, emit_info, state);
|
||||
}
|
||||
.emit(&[], sink, emit_info, state);
|
||||
|
||||
Inst::CSel {
|
||||
cond,
|
||||
@@ -1474,17 +1518,17 @@ impl MachInstEmit for Inst {
|
||||
_ => {
|
||||
// add/sub/and/orr/eor x28, x27, x26
|
||||
let alu_op = match op {
|
||||
AtomicRmwOp::Add => ALUOp::Add,
|
||||
AtomicRmwOp::Sub => ALUOp::Sub,
|
||||
AtomicRmwOp::And => ALUOp::And,
|
||||
AtomicRmwOp::Or => ALUOp::Orr,
|
||||
AtomicRmwOp::Xor => ALUOp::Eor,
|
||||
AtomicRmwOp::Nand
|
||||
| AtomicRmwOp::Umin
|
||||
| AtomicRmwOp::Umax
|
||||
| AtomicRmwOp::Smin
|
||||
| AtomicRmwOp::Smax
|
||||
| AtomicRmwOp::Xchg => unreachable!(),
|
||||
AtomicRMWLoopOp::Add => ALUOp::Add,
|
||||
AtomicRMWLoopOp::Sub => ALUOp::Sub,
|
||||
AtomicRMWLoopOp::And => ALUOp::And,
|
||||
AtomicRMWLoopOp::Orr => ALUOp::Orr,
|
||||
AtomicRMWLoopOp::Eor => ALUOp::Eor,
|
||||
AtomicRMWLoopOp::Nand
|
||||
| AtomicRMWLoopOp::Umin
|
||||
| AtomicRMWLoopOp::Umax
|
||||
| AtomicRMWLoopOp::Smin
|
||||
| AtomicRMWLoopOp::Smax
|
||||
| AtomicRMWLoopOp::Xchg => unreachable!(),
|
||||
};
|
||||
|
||||
Inst::AluRRR {
|
||||
@@ -1502,7 +1546,7 @@ impl MachInstEmit for Inst {
|
||||
if srcloc != SourceLoc::default() {
|
||||
sink.add_trap(srcloc, TrapCode::HeapOutOfBounds);
|
||||
}
|
||||
if op == AtomicRmwOp::Xchg {
|
||||
if op == AtomicRMWLoopOp::Xchg {
|
||||
sink.put4(enc_stlxr(ty, x24wr, x26, x25)); // stlxr w24, x26, [x25]
|
||||
} else {
|
||||
sink.put4(enc_stlxr(ty, x24wr, x28, x25)); // stlxr w24, x28, [x25]
|
||||
|
||||
@@ -6205,10 +6205,18 @@ fn test_aarch64_binemit() {
|
||||
"frintn d23, d24",
|
||||
));
|
||||
|
||||
insns.push((
|
||||
Inst::AtomicRMWLoop {
|
||||
ty: I8,
|
||||
op: AtomicRMWLoopOp::Sub,
|
||||
},
|
||||
"3BFF5F087C031A4B3CFF1808B8FFFFB5",
|
||||
"1: ldaxrb w27, [x25]; sub w28, w27, w26; stlxrb w24, w28, [x25]; cbnz w24, 1b",
|
||||
));
|
||||
insns.push((
|
||||
Inst::AtomicRMWLoop {
|
||||
ty: I16,
|
||||
op: inst_common::AtomicRmwOp::Xor,
|
||||
op: AtomicRMWLoopOp::Eor,
|
||||
},
|
||||
"3BFF5F487C031A4A3CFF1848B8FFFFB5",
|
||||
"1: ldaxrh w27, [x25]; eor w28, w27, w26; stlxrh w24, w28, [x25]; cbnz w24, 1b",
|
||||
@@ -6216,7 +6224,7 @@ fn test_aarch64_binemit() {
|
||||
insns.push((
|
||||
Inst::AtomicRMWLoop {
|
||||
ty: I8,
|
||||
op: inst_common::AtomicRmwOp::Add,
|
||||
op: AtomicRMWLoopOp::Add,
|
||||
},
|
||||
"3BFF5F087C031A0B3CFF1808B8FFFFB5",
|
||||
"1: ldaxrb w27, [x25]; add w28, w27, w26; stlxrb w24, w28, [x25]; cbnz w24, 1b",
|
||||
@@ -6224,7 +6232,7 @@ fn test_aarch64_binemit() {
|
||||
insns.push((
|
||||
Inst::AtomicRMWLoop {
|
||||
ty: I32,
|
||||
op: inst_common::AtomicRmwOp::Or,
|
||||
op: AtomicRMWLoopOp::Orr,
|
||||
},
|
||||
"3BFF5F887C031A2A3CFF1888B8FFFFB5",
|
||||
"1: ldaxr w27, [x25]; orr w28, w27, w26; stlxr w24, w28, [x25]; cbnz w24, 1b",
|
||||
@@ -6232,7 +6240,7 @@ fn test_aarch64_binemit() {
|
||||
insns.push((
|
||||
Inst::AtomicRMWLoop {
|
||||
ty: I64,
|
||||
op: inst_common::AtomicRmwOp::And,
|
||||
op: AtomicRMWLoopOp::And,
|
||||
},
|
||||
"3BFF5FC87C031A8A3CFF18C8B8FFFFB5",
|
||||
"1: ldaxr x27, [x25]; and x28, x27, x26; stlxr w24, x28, [x25]; cbnz w24, 1b",
|
||||
@@ -6240,7 +6248,7 @@ fn test_aarch64_binemit() {
|
||||
insns.push((
|
||||
Inst::AtomicRMWLoop {
|
||||
ty: I8,
|
||||
op: inst_common::AtomicRmwOp::Xchg,
|
||||
op: AtomicRMWLoopOp::Xchg,
|
||||
},
|
||||
"3BFF5F083AFF1808D8FFFFB5",
|
||||
"1: ldaxrb w27, [x25]; stlxrb w24, w26, [x25]; cbnz w24, 1b",
|
||||
@@ -6248,15 +6256,23 @@ fn test_aarch64_binemit() {
|
||||
insns.push((
|
||||
Inst::AtomicRMWLoop {
|
||||
ty: I16,
|
||||
op: inst_common::AtomicRmwOp::Nand,
|
||||
op: AtomicRMWLoopOp::Nand,
|
||||
},
|
||||
"3BFF5F487C031A0AFC033C2A3CFF184898FFFFB5",
|
||||
"1: ldaxrh w27, [x25]; and w28, w27, w26; mvn w28, w28; stlxrh w24, w28, [x25]; cbnz w24, 1b",
|
||||
));
|
||||
insns.push((
|
||||
Inst::AtomicRMWLoop {
|
||||
ty: I16,
|
||||
op: AtomicRMWLoopOp::Smin,
|
||||
},
|
||||
"3BFF5F487B3F00137FA33A6B7CB39A9A3CFF184878FFFFB5",
|
||||
"1: ldaxrh w27, [x25]; sxth w27, w27; cmp w27, w26, sxth; csel w28, w27, w26, lt; stlxrh w24, w28, [x25]; cbnz w24, 1b",
|
||||
));
|
||||
insns.push((
|
||||
Inst::AtomicRMWLoop {
|
||||
ty: I32,
|
||||
op: inst_common::AtomicRmwOp::Smin,
|
||||
op: AtomicRMWLoopOp::Smin,
|
||||
},
|
||||
"3BFF5F887F031A6B7CB39A9A3CFF188898FFFFB5",
|
||||
"1: ldaxr w27, [x25]; cmp w27, w26; csel w28, w27, w26, lt; stlxr w24, w28, [x25]; cbnz w24, 1b",
|
||||
@@ -6264,7 +6280,7 @@ fn test_aarch64_binemit() {
|
||||
insns.push((
|
||||
Inst::AtomicRMWLoop {
|
||||
ty: I64,
|
||||
op: inst_common::AtomicRmwOp::Smax,
|
||||
op: AtomicRMWLoopOp::Smax,
|
||||
},
|
||||
"3BFF5FC87F031AEB7CC39A9A3CFF18C898FFFFB5",
|
||||
"1: ldaxr x27, [x25]; cmp x27, x26; csel x28, x27, x26, gt; stlxr w24, x28, [x25]; cbnz w24, 1b",
|
||||
@@ -6272,7 +6288,15 @@ fn test_aarch64_binemit() {
|
||||
insns.push((
|
||||
Inst::AtomicRMWLoop {
|
||||
ty: I8,
|
||||
op: inst_common::AtomicRmwOp::Umin,
|
||||
op: AtomicRMWLoopOp::Smax,
|
||||
},
|
||||
"3BFF5F087B1F00137F833A6B7CC39A9A3CFF180878FFFFB5",
|
||||
"1: ldaxrb w27, [x25]; sxtb w27, w27; cmp w27, w26, sxtb; csel w28, w27, w26, gt; stlxrb w24, w28, [x25]; cbnz w24, 1b",
|
||||
));
|
||||
insns.push((
|
||||
Inst::AtomicRMWLoop {
|
||||
ty: I8,
|
||||
op: AtomicRMWLoopOp::Umin,
|
||||
},
|
||||
"3BFF5F087F031A6B7C339A9A3CFF180898FFFFB5",
|
||||
"1: ldaxrb w27, [x25]; cmp w27, w26; csel w28, w27, w26, lo; stlxrb w24, w28, [x25]; cbnz w24, 1b",
|
||||
@@ -6280,7 +6304,7 @@ fn test_aarch64_binemit() {
|
||||
insns.push((
|
||||
Inst::AtomicRMWLoop {
|
||||
ty: I16,
|
||||
op: inst_common::AtomicRmwOp::Umax,
|
||||
op: AtomicRMWLoopOp::Umax,
|
||||
},
|
||||
"3BFF5F487F031A6B7C839A9A3CFF184898FFFFB5",
|
||||
"1: ldaxrh w27, [x25]; cmp w27, w26; csel w28, w27, w26, hi; stlxrh w24, w28, [x25]; cbnz w24, 1b",
|
||||
@@ -6638,6 +6662,50 @@ fn test_aarch64_binemit() {
|
||||
"7A73F9F8",
|
||||
"lduminal x25, x26, [x27]",
|
||||
));
|
||||
insns.push((
|
||||
Inst::AtomicRMW {
|
||||
ty: I8,
|
||||
op: AtomicRMWOp::Swp,
|
||||
rs: xreg(28),
|
||||
rt: writable_xreg(29),
|
||||
rn: xreg(30),
|
||||
},
|
||||
"DD83FC38",
|
||||
"swpalb w28, fp, [lr]",
|
||||
));
|
||||
insns.push((
|
||||
Inst::AtomicRMW {
|
||||
ty: I16,
|
||||
op: AtomicRMWOp::Swp,
|
||||
rs: xreg(0),
|
||||
rt: writable_xreg(1),
|
||||
rn: xreg(2),
|
||||
},
|
||||
"4180E078",
|
||||
"swpalh w0, w1, [x2]",
|
||||
));
|
||||
insns.push((
|
||||
Inst::AtomicRMW {
|
||||
ty: I32,
|
||||
op: AtomicRMWOp::Swp,
|
||||
rs: xreg(3),
|
||||
rt: writable_xreg(4),
|
||||
rn: xreg(5),
|
||||
},
|
||||
"A480E3B8",
|
||||
"swpal w3, w4, [x5]",
|
||||
));
|
||||
insns.push((
|
||||
Inst::AtomicRMW {
|
||||
ty: I64,
|
||||
op: AtomicRMWOp::Swp,
|
||||
rs: xreg(6),
|
||||
rt: writable_xreg(7),
|
||||
rn: xreg(8),
|
||||
},
|
||||
"0781E6F8",
|
||||
"swpal x6, x7, [x8]",
|
||||
));
|
||||
|
||||
insns.push((
|
||||
Inst::AtomicCAS {
|
||||
|
||||
@@ -39,9 +39,9 @@ mod emit_tests;
|
||||
// Instructions (top level): definition
|
||||
|
||||
pub use crate::isa::aarch64::lower::isle::generated_code::{
|
||||
ALUOp, ALUOp3, AtomicRMWOp, BitOp, FPUOp1, FPUOp2, FPUOp3, FpuRoundMode, FpuToIntOp,
|
||||
IntToFpuOp, MInst as Inst, MoveWideOp, VecALUOp, VecExtendOp, VecLanesOp, VecMisc2, VecPairOp,
|
||||
VecRRLongOp, VecRRNarrowOp, VecRRPairLongOp, VecRRRLongOp, VecShiftImmOp,
|
||||
ALUOp, ALUOp3, AtomicRMWLoopOp, AtomicRMWOp, BitOp, FPUOp1, FPUOp2, FPUOp3, FpuRoundMode,
|
||||
FpuToIntOp, IntToFpuOp, MInst as Inst, MoveWideOp, VecALUOp, VecExtendOp, VecLanesOp, VecMisc2,
|
||||
VecPairOp, VecRRLongOp, VecRRNarrowOp, VecRRPairLongOp, VecRRRLongOp, VecShiftImmOp,
|
||||
};
|
||||
|
||||
/// A floating-point unit (FPU) operation with two args, a register and an immediate.
|
||||
@@ -676,12 +676,14 @@ fn aarch64_get_operands<F: Fn(VReg) -> VReg>(inst: &Inst, collector: &mut Operan
|
||||
&Inst::CCmpImm { rn, .. } => {
|
||||
collector.reg_use(rn);
|
||||
}
|
||||
&Inst::AtomicRMWLoop { .. } => {
|
||||
&Inst::AtomicRMWLoop { op, .. } => {
|
||||
collector.reg_use(xreg(25));
|
||||
collector.reg_use(xreg(26));
|
||||
collector.reg_def(writable_xreg(24));
|
||||
collector.reg_def(writable_xreg(27));
|
||||
collector.reg_def(writable_xreg(28));
|
||||
if op != AtomicRMWLoopOp::Xchg {
|
||||
collector.reg_def(writable_xreg(28));
|
||||
}
|
||||
}
|
||||
&Inst::AtomicRMW { rs, rt, rn, .. } => {
|
||||
collector.reg_use(rs);
|
||||
@@ -1538,6 +1540,7 @@ impl Inst {
|
||||
AtomicRMWOp::Umax => "ldumaxal",
|
||||
AtomicRMWOp::Smin => "ldsminal",
|
||||
AtomicRMWOp::Umin => "lduminal",
|
||||
AtomicRMWOp::Swp => "swpal",
|
||||
};
|
||||
|
||||
let size = OperandSize::from_ty(ty);
|
||||
@@ -1569,28 +1572,39 @@ impl Inst {
|
||||
loop_str.push_str(&format!("ldaxr{} {}, [{}]; ", ty_suffix, r_tmp, r_addr));
|
||||
|
||||
let op_str = match op {
|
||||
inst_common::AtomicRmwOp::Add => "add",
|
||||
inst_common::AtomicRmwOp::Sub => "sub",
|
||||
inst_common::AtomicRmwOp::Xor => "eor",
|
||||
inst_common::AtomicRmwOp::Or => "orr",
|
||||
inst_common::AtomicRmwOp::And => "and",
|
||||
AtomicRMWLoopOp::Add => "add",
|
||||
AtomicRMWLoopOp::Sub => "sub",
|
||||
AtomicRMWLoopOp::Eor => "eor",
|
||||
AtomicRMWLoopOp::Orr => "orr",
|
||||
AtomicRMWLoopOp::And => "and",
|
||||
_ => "",
|
||||
};
|
||||
|
||||
if op_str.is_empty() {
|
||||
match op {
|
||||
inst_common::AtomicRmwOp::Xchg => r_dst = r_arg2,
|
||||
inst_common::AtomicRmwOp::Nand => {
|
||||
AtomicRMWLoopOp::Xchg => r_dst = r_arg2,
|
||||
AtomicRMWLoopOp::Nand => {
|
||||
loop_str.push_str(&format!("and {}, {}, {}; ", r_dst, r_tmp, r_arg2));
|
||||
loop_str.push_str(&format!("mvn {}, {}; ", r_dst, r_dst));
|
||||
}
|
||||
_ => {
|
||||
loop_str.push_str(&format!("cmp {}, {}; ", r_tmp, r_arg2));
|
||||
if (op == AtomicRMWLoopOp::Smin || op == AtomicRMWLoopOp::Smax)
|
||||
&& (ty == I8 || ty == I16)
|
||||
{
|
||||
loop_str
|
||||
.push_str(&format!("sxt{} {}, {}; ", ty_suffix, r_tmp, r_tmp));
|
||||
loop_str.push_str(&format!(
|
||||
"cmp {}, {}, sxt{}; ",
|
||||
r_tmp, r_arg2, ty_suffix
|
||||
));
|
||||
} else {
|
||||
loop_str.push_str(&format!("cmp {}, {}; ", r_tmp, r_arg2));
|
||||
}
|
||||
let cond = match op {
|
||||
inst_common::AtomicRmwOp::Smin => "lt",
|
||||
inst_common::AtomicRmwOp::Smax => "gt",
|
||||
inst_common::AtomicRmwOp::Umin => "lo",
|
||||
inst_common::AtomicRmwOp::Umax => "hi",
|
||||
AtomicRMWLoopOp::Smin => "lt",
|
||||
AtomicRMWLoopOp::Smax => "gt",
|
||||
AtomicRMWLoopOp::Umin => "lo",
|
||||
AtomicRMWLoopOp::Umax => "hi",
|
||||
_ => unreachable!(),
|
||||
};
|
||||
loop_str.push_str(&format!(
|
||||
|
||||
@@ -1196,3 +1196,77 @@
|
||||
(let ((rn Reg y)
|
||||
(vec_size VectorSize (vector_size ty)))
|
||||
(value_reg (int_cmp_zero_swap cond rn vec_size))))
|
||||
|
||||
;;;; Rules for `AtomicRMW` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
(rule (lower (and (use_lse)
|
||||
(has_type (valid_atomic_transaction ty)
|
||||
(atomic_rmw flags (AtomicRmwOp.Add) addr src))))
|
||||
(lse_atomic_rmw (AtomicRMWOp.Add) addr src ty))
|
||||
(rule (lower (and (use_lse)
|
||||
(has_type (valid_atomic_transaction ty)
|
||||
(atomic_rmw flags (AtomicRmwOp.Xor) addr src))))
|
||||
(lse_atomic_rmw (AtomicRMWOp.Eor) addr src ty))
|
||||
(rule (lower (and (use_lse)
|
||||
(has_type (valid_atomic_transaction ty)
|
||||
(atomic_rmw flags (AtomicRmwOp.Or) addr src))))
|
||||
(lse_atomic_rmw (AtomicRMWOp.Set) addr src ty))
|
||||
(rule (lower (and (use_lse)
|
||||
(has_type (valid_atomic_transaction ty)
|
||||
(atomic_rmw flags (AtomicRmwOp.Smax) addr src))))
|
||||
(lse_atomic_rmw (AtomicRMWOp.Smax) addr src ty))
|
||||
(rule (lower (and (use_lse)
|
||||
(has_type (valid_atomic_transaction ty)
|
||||
(atomic_rmw flags (AtomicRmwOp.Smin) addr src))))
|
||||
(lse_atomic_rmw (AtomicRMWOp.Smin) addr src ty))
|
||||
(rule (lower (and (use_lse)
|
||||
(has_type (valid_atomic_transaction ty)
|
||||
(atomic_rmw flags (AtomicRmwOp.Umax) addr src))))
|
||||
(lse_atomic_rmw (AtomicRMWOp.Umax) addr src ty))
|
||||
(rule (lower (and (use_lse)
|
||||
(has_type (valid_atomic_transaction ty)
|
||||
(atomic_rmw flags (AtomicRmwOp.Umin) addr src))))
|
||||
(lse_atomic_rmw (AtomicRMWOp.Umin) addr src ty))
|
||||
(rule (lower (and (use_lse)
|
||||
(has_type (valid_atomic_transaction ty)
|
||||
(atomic_rmw flags (AtomicRmwOp.Sub) addr src))))
|
||||
(lse_atomic_rmw (AtomicRMWOp.Add) addr (sub ty (zero_reg) src) ty))
|
||||
(rule (lower (and (use_lse)
|
||||
(has_type (valid_atomic_transaction ty)
|
||||
(atomic_rmw flags (AtomicRmwOp.And) addr src))))
|
||||
(lse_atomic_rmw (AtomicRMWOp.Clr) addr (eon ty src (zero_reg)) ty))
|
||||
|
||||
|
||||
(rule (lower (has_type (valid_atomic_transaction ty)
|
||||
(atomic_rmw flags (AtomicRmwOp.Add) addr src)))
|
||||
(atomic_rmw_loop (AtomicRMWLoopOp.Add) addr src ty))
|
||||
(rule (lower (has_type (valid_atomic_transaction ty)
|
||||
(atomic_rmw flags (AtomicRmwOp.Sub) addr src)))
|
||||
(atomic_rmw_loop (AtomicRMWLoopOp.Sub) addr src ty))
|
||||
(rule (lower (has_type (valid_atomic_transaction ty)
|
||||
(atomic_rmw flags (AtomicRmwOp.And) addr src)))
|
||||
(atomic_rmw_loop (AtomicRMWLoopOp.And) addr src ty))
|
||||
(rule (lower (has_type (valid_atomic_transaction ty)
|
||||
(atomic_rmw flags (AtomicRmwOp.Nand) addr src)))
|
||||
(atomic_rmw_loop (AtomicRMWLoopOp.Nand) addr src ty))
|
||||
(rule (lower (has_type (valid_atomic_transaction ty)
|
||||
(atomic_rmw flags (AtomicRmwOp.Or) addr src)))
|
||||
(atomic_rmw_loop (AtomicRMWLoopOp.Orr) addr src ty))
|
||||
(rule (lower (has_type (valid_atomic_transaction ty)
|
||||
(atomic_rmw flags (AtomicRmwOp.Xor) addr src)))
|
||||
(atomic_rmw_loop (AtomicRMWLoopOp.Eor) addr src ty))
|
||||
(rule (lower (has_type (valid_atomic_transaction ty)
|
||||
(atomic_rmw flags (AtomicRmwOp.Smin) addr src)))
|
||||
(atomic_rmw_loop (AtomicRMWLoopOp.Smin) addr src ty))
|
||||
(rule (lower (has_type (valid_atomic_transaction ty)
|
||||
(atomic_rmw flags (AtomicRmwOp.Smax) addr src)))
|
||||
(atomic_rmw_loop (AtomicRMWLoopOp.Smax) addr src ty))
|
||||
(rule (lower (has_type (valid_atomic_transaction ty)
|
||||
(atomic_rmw flags (AtomicRmwOp.Umin) addr src)))
|
||||
(atomic_rmw_loop (AtomicRMWLoopOp.Umin) addr src ty))
|
||||
(rule (lower (has_type (valid_atomic_transaction ty)
|
||||
(atomic_rmw flags (AtomicRmwOp.Umax) addr src)))
|
||||
(atomic_rmw_loop (AtomicRMWLoopOp.Umax) addr src ty))
|
||||
(rule (lower (has_type (valid_atomic_transaction ty)
|
||||
(atomic_rmw flags (AtomicRmwOp.Xchg) addr src)))
|
||||
(atomic_rmw_loop (AtomicRMWLoopOp.Xchg) addr src ty))
|
||||
|
||||
@@ -5,8 +5,8 @@ pub mod generated_code;
|
||||
|
||||
// Types that the generated ISLE code uses via `use super::*`.
|
||||
use super::{
|
||||
writable_zero_reg, zero_reg, AMode, ASIMDFPModImm, ASIMDMovModImm, AtomicRmwOp, BranchTarget,
|
||||
CallIndInfo, CallInfo, Cond, CondBrKind, ExtendOp, FPUOpRI, FloatCC, Imm12, ImmLogic, ImmShift,
|
||||
writable_zero_reg, zero_reg, AMode, ASIMDFPModImm, ASIMDMovModImm, BranchTarget, CallIndInfo,
|
||||
CallInfo, Cond, CondBrKind, ExtendOp, FPUOpRI, FloatCC, Imm12, ImmLogic, ImmShift,
|
||||
Inst as MInst, IntCC, JTSequenceInfo, MachLabel, MoveWideConst, MoveWideOp, NarrowValueMode,
|
||||
Opcode, OperandSize, PairAMode, Reg, ScalarSize, ShiftOpAndAmt, UImm5, VecMisc2, VectorSize,
|
||||
NZCV,
|
||||
@@ -17,10 +17,11 @@ use crate::settings::Flags;
|
||||
use crate::{
|
||||
binemit::CodeOffset,
|
||||
ir::{
|
||||
immediates::*, types::*, ExternalName, Inst, InstructionData, MemFlags, TrapCode, Value,
|
||||
ValueList,
|
||||
immediates::*, types::*, AtomicRmwOp, ExternalName, Inst, InstructionData, MemFlags,
|
||||
TrapCode, Value, ValueList,
|
||||
},
|
||||
isa::aarch64::inst::args::{ShiftOp, ShiftOpShiftImm},
|
||||
isa::aarch64::lower::{is_valid_atomic_transaction_ty, writable_xreg, xreg},
|
||||
isa::unwind::UnwindInst,
|
||||
machinst::{ty_bits, InsnOutput, LowerCtx},
|
||||
};
|
||||
@@ -66,6 +67,14 @@ where
|
||||
{
|
||||
isle_prelude_methods!();
|
||||
|
||||
fn use_lse(&mut self, _: Inst) -> Option<()> {
|
||||
if self.isa_flags.use_lse() {
|
||||
Some(())
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
fn move_wide_const_from_u64(&mut self, n: u64) -> Option<MoveWideConst> {
|
||||
MoveWideConst::maybe_from_u64(n)
|
||||
}
|
||||
@@ -114,6 +123,14 @@ where
|
||||
}
|
||||
}
|
||||
|
||||
fn valid_atomic_transaction(&mut self, ty: Type) -> Option<Type> {
|
||||
if is_valid_atomic_transaction_ty(ty) {
|
||||
Some(ty)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
/// This is the fallback case for loading a 64-bit integral constant into a
|
||||
/// register.
|
||||
///
|
||||
@@ -194,6 +211,14 @@ where
|
||||
zero_reg()
|
||||
}
|
||||
|
||||
fn xreg(&mut self, index: u8) -> Reg {
|
||||
xreg(index)
|
||||
}
|
||||
|
||||
fn writable_xreg(&mut self, index: u8) -> WritableReg {
|
||||
writable_xreg(index)
|
||||
}
|
||||
|
||||
fn extended_value_from_value(&mut self, val: Value) -> Option<ExtendedValue> {
|
||||
let (val, extend) =
|
||||
super::get_as_extended_value(self.lower_ctx, val, NarrowValueMode::None)?;
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
src/clif.isle 443b34b797fc8ace
|
||||
src/prelude.isle d8a93eb727abd7f4
|
||||
src/isa/aarch64/inst.isle 77984cc33a05be7
|
||||
src/isa/aarch64/lower.isle 71c7e603b0e4bdef
|
||||
src/prelude.isle a7915a6b88310eb5
|
||||
src/isa/aarch64/inst.isle a2c0ae729bfa24a8
|
||||
src/isa/aarch64/lower.isle 15641ca7f0ac061a
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -237,48 +237,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
||||
ctx.emit(inst);
|
||||
}
|
||||
|
||||
Opcode::AtomicRmw => {
|
||||
let r_dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
|
||||
let mut r_addr = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
|
||||
let mut r_arg2 = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None);
|
||||
let ty_access = ty.unwrap();
|
||||
assert!(is_valid_atomic_transaction_ty(ty_access));
|
||||
|
||||
let op = inst_common::AtomicRmwOp::from(ctx.data(insn).atomic_rmw_op().unwrap());
|
||||
let lse_op = match op {
|
||||
AtomicRmwOp::Add => Some(AtomicRMWOp::Add),
|
||||
AtomicRmwOp::And => Some(AtomicRMWOp::Clr),
|
||||
AtomicRmwOp::Xor => Some(AtomicRMWOp::Eor),
|
||||
AtomicRmwOp::Or => Some(AtomicRMWOp::Set),
|
||||
AtomicRmwOp::Smax => Some(AtomicRMWOp::Smax),
|
||||
AtomicRmwOp::Umax => Some(AtomicRMWOp::Umax),
|
||||
AtomicRmwOp::Smin => Some(AtomicRMWOp::Smin),
|
||||
AtomicRmwOp::Umin => Some(AtomicRMWOp::Umin),
|
||||
_ => None,
|
||||
};
|
||||
if isa_flags.use_lse() && lse_op.is_some() {
|
||||
ctx.emit(Inst::AtomicRMW {
|
||||
op: lse_op.unwrap(),
|
||||
rs: r_arg2,
|
||||
rt: r_dst,
|
||||
rn: r_addr,
|
||||
ty: ty_access,
|
||||
});
|
||||
} else {
|
||||
// Make sure that both args are in virtual regs, since in effect
|
||||
// we have to do a parallel copy to get them safely to the AtomicRMW input
|
||||
// regs, and that's not guaranteed safe if either is in a real reg.
|
||||
r_addr = ctx.ensure_in_vreg(r_addr, I64);
|
||||
r_arg2 = ctx.ensure_in_vreg(r_arg2, I64);
|
||||
// Move the args to the preordained AtomicRMW input regs
|
||||
ctx.emit(Inst::gen_move(Writable::from_reg(xreg(25)), r_addr, I64));
|
||||
ctx.emit(Inst::gen_move(Writable::from_reg(xreg(26)), r_arg2, I64));
|
||||
ctx.emit(Inst::AtomicRMWLoop { ty: ty_access, op });
|
||||
// And finally, copy the preordained AtomicRMW output reg to its destination.
|
||||
ctx.emit(Inst::gen_move(r_dst, xreg(27), I64));
|
||||
// Also, x24 and x28 are trashed. `fn aarch64_get_regs` must mention that.
|
||||
}
|
||||
}
|
||||
Opcode::AtomicRmw => implemented_in_isle(ctx),
|
||||
|
||||
Opcode::AtomicCas => {
|
||||
let r_dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
|
||||
|
||||
Reference in New Issue
Block a user