[AArch64] Port atomic rmw to ISLE (#4021)

Also fix and extend the current implementation:
- AtomicRMWOp::Clr != AtomicRmwOp::And, as the input needs to be
  inverted first.
- Inputs to the cmp for the RMWLoop case are sign-extended when
  needed.
- Lower Xchg to Swp.
- Lower Sub to Add with a negated input.
- Added more runtests.

Copyright (c) 2022, Arm Limited.
This commit is contained in:
Sam Parker
2022-04-27 21:13:59 +01:00
committed by GitHub
parent 8381179503
commit 12b4374cd5
26 changed files with 1632 additions and 1281 deletions

View File

@@ -209,10 +209,8 @@
;; effect of atomically modifying a memory location in a particular way. Because we have
;; no way to explain to the regalloc about earlyclobber registers, this instruction has
;; completely fixed operand registers, and we rely on the RA's coalescing to remove copies
;; in the surrounding code to the extent it can. The sequence is both preceded and
;; followed by a fence which is at least as comprehensive as that of the `Fence`
;; instruction below. This instruction is sequentially consistent. The operand
;; conventions are:
;; in the surrounding code to the extent it can. Load- and store-exclusive instructions,
;; with acquire-release semantics, are used to access memory. The operand conventions are:
;;
;; x25 (rd) address
;; x26 (rd) second operand for `op`
@@ -221,28 +219,10 @@
;; x28 (wr) scratch reg; value afterwards has no meaning
(AtomicRMWLoop
(ty Type) ;; I8, I16, I32 or I64
(op AtomicRmwOp))
;; An atomic read-modify-write operation. These instructions require the
;; Large System Extension (LSE) ISA support (FEAT_LSE). The instructions have
;; acquire-release semantics.
(AtomicRMW
(op AtomicRMWOp)
(rs Reg)
(rt WritableReg)
(rn Reg)
(ty Type))
;; An atomic compare-and-swap operation. This instruction is sequentially consistent.
(AtomicCAS
(rs WritableReg)
(rt Reg)
(rn Reg)
(ty Type))
(op AtomicRMWLoopOp))
;; Similar to AtomicRMWLoop, a compare-and-swap operation implemented using a load-linked
;; store-conditional loop.
;; This instruction is sequentially consistent.
;; store-conditional loop, with acquire-release semantics.
;; Note that the operand conventions, although very similar to AtomicRMWLoop, are different:
;;
;; x25 (rd) address
@@ -254,6 +234,23 @@
(ty Type) ;; I8, I16, I32 or I64
)
;; An atomic read-modify-write operation. These instructions require the
;; Large System Extension (LSE) ISA support (FEAT_LSE). The instructions have
;; acquire-release semantics.
(AtomicRMW
(op AtomicRMWOp)
(rs Reg)
(rt WritableReg)
(rn Reg)
(ty Type))
;; An atomic compare-and-swap operation. This instruction is sequentially consistent.
(AtomicCAS
(rs WritableReg)
(rt Reg)
(rn Reg)
(ty Type))
;; Read `access_ty` bits from address `rt`, either 8, 16, 32 or 64-bits, and put
;; it in `rn`, optionally zero-extending to fill a word or double word result.
;; This instruction is sequentially consistent.
@@ -1261,8 +1258,30 @@
(Smin)
(Umax)
(Umin)
(Swp)
))
;; Atomic read-modify-write operations, with acquire-release semantics,
;; implemented with a loop.
(type AtomicRMWLoopOp
(enum
(Add)
(Sub)
(And)
(Nand)
(Eor)
(Orr)
(Smax)
(Smin)
(Umax)
(Umin)
(Xchg)
))
;; Extractors for target features ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(decl use_lse () Inst)
(extern extractor use_lse use_lse)
;; Extractor helpers for various immmediate constants ;;;;;;;;;;;;;;;;;;;;;;;;;;
(decl move_wide_const_from_u64 (MoveWideConst) u64)
@@ -1304,6 +1323,9 @@
(decl integral_ty (Type) Type)
(extern extractor integral_ty integral_ty)
(decl valid_atomic_transaction (Type) Type)
(extern extractor valid_atomic_transaction valid_atomic_transaction)
;; Helper to go directly from a `Value`, when it's an `iconst`, to an `Imm12`.
(decl imm12_from_value (Imm12) Value)
(extractor
@@ -1345,6 +1367,26 @@
(decl writable_zero_reg () WritableReg)
(extern constructor writable_zero_reg writable_zero_reg)
;; Helpers for getting a particular real register
(decl xreg (u8) Reg)
(extern constructor xreg xreg)
(decl writable_xreg (u8) WritableReg)
(extern constructor writable_xreg writable_xreg)
;; Helper for emitting `MInst.Mov64` instructions.
(decl mov64_to_real (u8 Reg) Reg)
(rule (mov64_to_real num src)
(let ((dst WritableReg (writable_xreg num))
(_ Unit (emit (MInst.Mov (operand_size $I64) dst src))))
dst))
(decl mov64_from_real (u8) Reg)
(rule (mov64_from_real num)
(let ((dst WritableReg (temp_writable_reg $I64))
(_ Unit (emit (MInst.Mov (operand_size $I64) dst (xreg num)))))
dst))
;; Helper for emitting `MInst.MovZ` instructions.
(decl movz (MoveWideConst OperandSize) Reg)
(rule (movz imm size)
@@ -2053,3 +2095,30 @@
(decl cmeq0 (Reg VectorSize) Reg)
(rule (cmeq0 rn size)
(vec_misc (VecMisc2.Cmeq0) rn size))
;; Helper for emitting `MInst.AtomicRMW` instructions.
(decl lse_atomic_rmw (AtomicRMWOp Value Reg Type) Reg)
(rule (lse_atomic_rmw op p r_arg2 ty)
(let (
(r_addr Reg p)
(dst WritableReg (temp_writable_reg ty))
(_ Unit (emit (MInst.AtomicRMW op r_arg2 dst r_addr ty)))
)
dst))
;; Helper for emitting `MInst.AtomicRMWLoop` instructions.
;; - Make sure that both args are in virtual regs, since in effect
;; we have to do a parallel copy to get them safely to the AtomicRMW input
;; regs, and that's not guaranteed safe if either is in a real reg.
;; - Move the args to the preordained AtomicRMW input regs
;; - And finally, copy the preordained AtomicRMW output reg to its destination.
(decl atomic_rmw_loop (AtomicRMWLoopOp Value Value Type) Reg)
(rule (atomic_rmw_loop op p arg2 ty)
(let (
(v_addr Reg (ensure_in_vreg p $I64))
(v_arg2 Reg (ensure_in_vreg arg2 $I64))
(r_addr Reg (mov64_to_real 25 v_addr))
(r_arg2 Reg (mov64_to_real 26 v_arg2))
(_ Unit (emit (MInst.AtomicRMWLoop ty op)))
)
(mov64_from_real 27)))

View File

@@ -583,6 +583,13 @@ impl OperandSize {
}
}
pub fn bits(&self) -> u8 {
match self {
OperandSize::Size32 => 32,
OperandSize::Size64 => 64,
}
}
/// Convert from an integer type into the smallest size that fits.
pub fn from_ty(ty: Type) -> OperandSize {
debug_assert!(!ty.is_vector());

View File

@@ -7,6 +7,7 @@ use crate::ir::constant::ConstantData;
use crate::ir::types::*;
use crate::ir::{LibCall, MemFlags, TrapCode};
use crate::isa::aarch64::inst::*;
use crate::isa::aarch64::lower::is_valid_atomic_transaction_ty;
use crate::machinst::{ty_bits, Reg, RegClass, Writable};
use core::convert::TryFrom;
@@ -505,7 +506,7 @@ fn enc_dmb_ish() -> u32 {
0xD5033BBF
}
fn enc_ldal(ty: Type, op: AtomicRMWOp, rs: Reg, rt: Writable<Reg>, rn: Reg) -> u32 {
fn enc_acq_rel(ty: Type, op: AtomicRMWOp, rs: Reg, rt: Writable<Reg>, rn: Reg) -> u32 {
assert!(machreg_to_gpr(rt.to_reg()) != 31);
let sz = match ty {
I64 => 0b11,
@@ -514,6 +515,10 @@ fn enc_ldal(ty: Type, op: AtomicRMWOp, rs: Reg, rt: Writable<Reg>, rn: Reg) -> u
I8 => 0b00,
_ => unreachable!(),
};
let bit15 = match op {
AtomicRMWOp::Swp => 0b1,
_ => 0b0,
};
let op = match op {
AtomicRMWOp::Add => 0b000,
AtomicRMWOp::Clr => 0b001,
@@ -523,10 +528,12 @@ fn enc_ldal(ty: Type, op: AtomicRMWOp, rs: Reg, rt: Writable<Reg>, rn: Reg) -> u
AtomicRMWOp::Smin => 0b101,
AtomicRMWOp::Umax => 0b110,
AtomicRMWOp::Umin => 0b111,
AtomicRMWOp::Swp => 0b000,
};
0b00_111_000_111_00000_0_000_00_00000_00000
| (sz << 30)
| (machreg_to_gpr(rs) << 16)
| bit15 << 15
| (op << 12)
| (machreg_to_gpr(rn) << 5)
| machreg_to_gpr(rt.to_reg())
@@ -1371,15 +1378,18 @@ impl MachInstEmit for Inst {
sink.put4(enc_ccmp_imm(size, rn, imm, nzcv, cond));
}
&Inst::AtomicRMW { ty, op, rs, rt, rn } => {
assert!(is_valid_atomic_transaction_ty(ty));
let rs = allocs.next(rs);
let rt = allocs.next_writable(rt);
let rn = allocs.next(rn);
sink.put4(enc_ldal(ty, op, rs, rt, rn));
sink.put4(enc_acq_rel(ty, op, rs, rt, rn));
}
&Inst::AtomicRMWLoop { ty, op } => {
assert!(is_valid_atomic_transaction_ty(ty));
/* Emit this:
again:
ldaxr{,b,h} x/w27, [x25]
// maybe sign extend
op x28, x27, x26 // op is add,sub,and,orr,eor
stlxr{,b,h} w24, x/w28, [x25]
cbnz x24, again
@@ -1414,10 +1424,31 @@ impl MachInstEmit for Inst {
}
sink.put4(enc_ldaxr(ty, x27wr, x25)); // ldaxr x27, [x25]
let size = OperandSize::from_ty(ty);
let sign_ext = match op {
AtomicRMWLoopOp::Smin | AtomicRMWLoopOp::Smax => match ty {
I16 => Some((ExtendOp::SXTH, 16)),
I8 => Some((ExtendOp::SXTB, 8)),
_ => None,
},
_ => None,
};
// sxt{b|h} the loaded result if necessary.
if sign_ext.is_some() {
let (_, from_bits) = sign_ext.unwrap();
Inst::Extend {
rd: x27wr,
rn: x27,
signed: true,
from_bits,
to_bits: size.bits(),
}
.emit(&[], sink, emit_info, state);
}
match op {
AtomicRmwOp::Xchg => {} // do nothing
AtomicRmwOp::Nand => {
AtomicRMWLoopOp::Xchg => {} // do nothing
AtomicRMWLoopOp::Nand => {
// and x28, x27, x26
// mvn x28, x28
@@ -1439,29 +1470,42 @@ impl MachInstEmit for Inst {
}
.emit(&[], sink, emit_info, state);
}
AtomicRmwOp::Umin
| AtomicRmwOp::Umax
| AtomicRmwOp::Smin
| AtomicRmwOp::Smax => {
// cmp x27, x26
AtomicRMWLoopOp::Umin
| AtomicRMWLoopOp::Umax
| AtomicRMWLoopOp::Smin
| AtomicRMWLoopOp::Smax => {
// cmp x27, x26 {?sxt}
// csel.op x28, x27, x26
let cond = match op {
AtomicRmwOp::Umin => Cond::Lo,
AtomicRmwOp::Umax => Cond::Hi,
AtomicRmwOp::Smin => Cond::Lt,
AtomicRmwOp::Smax => Cond::Gt,
AtomicRMWLoopOp::Umin => Cond::Lo,
AtomicRMWLoopOp::Umax => Cond::Hi,
AtomicRMWLoopOp::Smin => Cond::Lt,
AtomicRMWLoopOp::Smax => Cond::Gt,
_ => unreachable!(),
};
Inst::AluRRR {
alu_op: ALUOp::SubS,
size,
rd: writable_zero_reg(),
rn: x27,
rm: x26,
if sign_ext.is_some() {
let (extendop, _) = sign_ext.unwrap();
Inst::AluRRRExtend {
alu_op: ALUOp::SubS,
size,
rd: writable_zero_reg(),
rn: x27,
rm: x26,
extendop,
}
.emit(&[], sink, emit_info, state);
} else {
Inst::AluRRR {
alu_op: ALUOp::SubS,
size,
rd: writable_zero_reg(),
rn: x27,
rm: x26,
}
.emit(&[], sink, emit_info, state);
}
.emit(&[], sink, emit_info, state);
Inst::CSel {
cond,
@@ -1474,17 +1518,17 @@ impl MachInstEmit for Inst {
_ => {
// add/sub/and/orr/eor x28, x27, x26
let alu_op = match op {
AtomicRmwOp::Add => ALUOp::Add,
AtomicRmwOp::Sub => ALUOp::Sub,
AtomicRmwOp::And => ALUOp::And,
AtomicRmwOp::Or => ALUOp::Orr,
AtomicRmwOp::Xor => ALUOp::Eor,
AtomicRmwOp::Nand
| AtomicRmwOp::Umin
| AtomicRmwOp::Umax
| AtomicRmwOp::Smin
| AtomicRmwOp::Smax
| AtomicRmwOp::Xchg => unreachable!(),
AtomicRMWLoopOp::Add => ALUOp::Add,
AtomicRMWLoopOp::Sub => ALUOp::Sub,
AtomicRMWLoopOp::And => ALUOp::And,
AtomicRMWLoopOp::Orr => ALUOp::Orr,
AtomicRMWLoopOp::Eor => ALUOp::Eor,
AtomicRMWLoopOp::Nand
| AtomicRMWLoopOp::Umin
| AtomicRMWLoopOp::Umax
| AtomicRMWLoopOp::Smin
| AtomicRMWLoopOp::Smax
| AtomicRMWLoopOp::Xchg => unreachable!(),
};
Inst::AluRRR {
@@ -1502,7 +1546,7 @@ impl MachInstEmit for Inst {
if srcloc != SourceLoc::default() {
sink.add_trap(srcloc, TrapCode::HeapOutOfBounds);
}
if op == AtomicRmwOp::Xchg {
if op == AtomicRMWLoopOp::Xchg {
sink.put4(enc_stlxr(ty, x24wr, x26, x25)); // stlxr w24, x26, [x25]
} else {
sink.put4(enc_stlxr(ty, x24wr, x28, x25)); // stlxr w24, x28, [x25]

View File

@@ -6205,10 +6205,18 @@ fn test_aarch64_binemit() {
"frintn d23, d24",
));
insns.push((
Inst::AtomicRMWLoop {
ty: I8,
op: AtomicRMWLoopOp::Sub,
},
"3BFF5F087C031A4B3CFF1808B8FFFFB5",
"1: ldaxrb w27, [x25]; sub w28, w27, w26; stlxrb w24, w28, [x25]; cbnz w24, 1b",
));
insns.push((
Inst::AtomicRMWLoop {
ty: I16,
op: inst_common::AtomicRmwOp::Xor,
op: AtomicRMWLoopOp::Eor,
},
"3BFF5F487C031A4A3CFF1848B8FFFFB5",
"1: ldaxrh w27, [x25]; eor w28, w27, w26; stlxrh w24, w28, [x25]; cbnz w24, 1b",
@@ -6216,7 +6224,7 @@ fn test_aarch64_binemit() {
insns.push((
Inst::AtomicRMWLoop {
ty: I8,
op: inst_common::AtomicRmwOp::Add,
op: AtomicRMWLoopOp::Add,
},
"3BFF5F087C031A0B3CFF1808B8FFFFB5",
"1: ldaxrb w27, [x25]; add w28, w27, w26; stlxrb w24, w28, [x25]; cbnz w24, 1b",
@@ -6224,7 +6232,7 @@ fn test_aarch64_binemit() {
insns.push((
Inst::AtomicRMWLoop {
ty: I32,
op: inst_common::AtomicRmwOp::Or,
op: AtomicRMWLoopOp::Orr,
},
"3BFF5F887C031A2A3CFF1888B8FFFFB5",
"1: ldaxr w27, [x25]; orr w28, w27, w26; stlxr w24, w28, [x25]; cbnz w24, 1b",
@@ -6232,7 +6240,7 @@ fn test_aarch64_binemit() {
insns.push((
Inst::AtomicRMWLoop {
ty: I64,
op: inst_common::AtomicRmwOp::And,
op: AtomicRMWLoopOp::And,
},
"3BFF5FC87C031A8A3CFF18C8B8FFFFB5",
"1: ldaxr x27, [x25]; and x28, x27, x26; stlxr w24, x28, [x25]; cbnz w24, 1b",
@@ -6240,7 +6248,7 @@ fn test_aarch64_binemit() {
insns.push((
Inst::AtomicRMWLoop {
ty: I8,
op: inst_common::AtomicRmwOp::Xchg,
op: AtomicRMWLoopOp::Xchg,
},
"3BFF5F083AFF1808D8FFFFB5",
"1: ldaxrb w27, [x25]; stlxrb w24, w26, [x25]; cbnz w24, 1b",
@@ -6248,15 +6256,23 @@ fn test_aarch64_binemit() {
insns.push((
Inst::AtomicRMWLoop {
ty: I16,
op: inst_common::AtomicRmwOp::Nand,
op: AtomicRMWLoopOp::Nand,
},
"3BFF5F487C031A0AFC033C2A3CFF184898FFFFB5",
"1: ldaxrh w27, [x25]; and w28, w27, w26; mvn w28, w28; stlxrh w24, w28, [x25]; cbnz w24, 1b",
));
insns.push((
Inst::AtomicRMWLoop {
ty: I16,
op: AtomicRMWLoopOp::Smin,
},
"3BFF5F487B3F00137FA33A6B7CB39A9A3CFF184878FFFFB5",
"1: ldaxrh w27, [x25]; sxth w27, w27; cmp w27, w26, sxth; csel w28, w27, w26, lt; stlxrh w24, w28, [x25]; cbnz w24, 1b",
));
insns.push((
Inst::AtomicRMWLoop {
ty: I32,
op: inst_common::AtomicRmwOp::Smin,
op: AtomicRMWLoopOp::Smin,
},
"3BFF5F887F031A6B7CB39A9A3CFF188898FFFFB5",
"1: ldaxr w27, [x25]; cmp w27, w26; csel w28, w27, w26, lt; stlxr w24, w28, [x25]; cbnz w24, 1b",
@@ -6264,7 +6280,7 @@ fn test_aarch64_binemit() {
insns.push((
Inst::AtomicRMWLoop {
ty: I64,
op: inst_common::AtomicRmwOp::Smax,
op: AtomicRMWLoopOp::Smax,
},
"3BFF5FC87F031AEB7CC39A9A3CFF18C898FFFFB5",
"1: ldaxr x27, [x25]; cmp x27, x26; csel x28, x27, x26, gt; stlxr w24, x28, [x25]; cbnz w24, 1b",
@@ -6272,7 +6288,15 @@ fn test_aarch64_binemit() {
insns.push((
Inst::AtomicRMWLoop {
ty: I8,
op: inst_common::AtomicRmwOp::Umin,
op: AtomicRMWLoopOp::Smax,
},
"3BFF5F087B1F00137F833A6B7CC39A9A3CFF180878FFFFB5",
"1: ldaxrb w27, [x25]; sxtb w27, w27; cmp w27, w26, sxtb; csel w28, w27, w26, gt; stlxrb w24, w28, [x25]; cbnz w24, 1b",
));
insns.push((
Inst::AtomicRMWLoop {
ty: I8,
op: AtomicRMWLoopOp::Umin,
},
"3BFF5F087F031A6B7C339A9A3CFF180898FFFFB5",
"1: ldaxrb w27, [x25]; cmp w27, w26; csel w28, w27, w26, lo; stlxrb w24, w28, [x25]; cbnz w24, 1b",
@@ -6280,7 +6304,7 @@ fn test_aarch64_binemit() {
insns.push((
Inst::AtomicRMWLoop {
ty: I16,
op: inst_common::AtomicRmwOp::Umax,
op: AtomicRMWLoopOp::Umax,
},
"3BFF5F487F031A6B7C839A9A3CFF184898FFFFB5",
"1: ldaxrh w27, [x25]; cmp w27, w26; csel w28, w27, w26, hi; stlxrh w24, w28, [x25]; cbnz w24, 1b",
@@ -6638,6 +6662,50 @@ fn test_aarch64_binemit() {
"7A73F9F8",
"lduminal x25, x26, [x27]",
));
insns.push((
Inst::AtomicRMW {
ty: I8,
op: AtomicRMWOp::Swp,
rs: xreg(28),
rt: writable_xreg(29),
rn: xreg(30),
},
"DD83FC38",
"swpalb w28, fp, [lr]",
));
insns.push((
Inst::AtomicRMW {
ty: I16,
op: AtomicRMWOp::Swp,
rs: xreg(0),
rt: writable_xreg(1),
rn: xreg(2),
},
"4180E078",
"swpalh w0, w1, [x2]",
));
insns.push((
Inst::AtomicRMW {
ty: I32,
op: AtomicRMWOp::Swp,
rs: xreg(3),
rt: writable_xreg(4),
rn: xreg(5),
},
"A480E3B8",
"swpal w3, w4, [x5]",
));
insns.push((
Inst::AtomicRMW {
ty: I64,
op: AtomicRMWOp::Swp,
rs: xreg(6),
rt: writable_xreg(7),
rn: xreg(8),
},
"0781E6F8",
"swpal x6, x7, [x8]",
));
insns.push((
Inst::AtomicCAS {

View File

@@ -39,9 +39,9 @@ mod emit_tests;
// Instructions (top level): definition
pub use crate::isa::aarch64::lower::isle::generated_code::{
ALUOp, ALUOp3, AtomicRMWOp, BitOp, FPUOp1, FPUOp2, FPUOp3, FpuRoundMode, FpuToIntOp,
IntToFpuOp, MInst as Inst, MoveWideOp, VecALUOp, VecExtendOp, VecLanesOp, VecMisc2, VecPairOp,
VecRRLongOp, VecRRNarrowOp, VecRRPairLongOp, VecRRRLongOp, VecShiftImmOp,
ALUOp, ALUOp3, AtomicRMWLoopOp, AtomicRMWOp, BitOp, FPUOp1, FPUOp2, FPUOp3, FpuRoundMode,
FpuToIntOp, IntToFpuOp, MInst as Inst, MoveWideOp, VecALUOp, VecExtendOp, VecLanesOp, VecMisc2,
VecPairOp, VecRRLongOp, VecRRNarrowOp, VecRRPairLongOp, VecRRRLongOp, VecShiftImmOp,
};
/// A floating-point unit (FPU) operation with two args, a register and an immediate.
@@ -676,12 +676,14 @@ fn aarch64_get_operands<F: Fn(VReg) -> VReg>(inst: &Inst, collector: &mut Operan
&Inst::CCmpImm { rn, .. } => {
collector.reg_use(rn);
}
&Inst::AtomicRMWLoop { .. } => {
&Inst::AtomicRMWLoop { op, .. } => {
collector.reg_use(xreg(25));
collector.reg_use(xreg(26));
collector.reg_def(writable_xreg(24));
collector.reg_def(writable_xreg(27));
collector.reg_def(writable_xreg(28));
if op != AtomicRMWLoopOp::Xchg {
collector.reg_def(writable_xreg(28));
}
}
&Inst::AtomicRMW { rs, rt, rn, .. } => {
collector.reg_use(rs);
@@ -1538,6 +1540,7 @@ impl Inst {
AtomicRMWOp::Umax => "ldumaxal",
AtomicRMWOp::Smin => "ldsminal",
AtomicRMWOp::Umin => "lduminal",
AtomicRMWOp::Swp => "swpal",
};
let size = OperandSize::from_ty(ty);
@@ -1569,28 +1572,39 @@ impl Inst {
loop_str.push_str(&format!("ldaxr{} {}, [{}]; ", ty_suffix, r_tmp, r_addr));
let op_str = match op {
inst_common::AtomicRmwOp::Add => "add",
inst_common::AtomicRmwOp::Sub => "sub",
inst_common::AtomicRmwOp::Xor => "eor",
inst_common::AtomicRmwOp::Or => "orr",
inst_common::AtomicRmwOp::And => "and",
AtomicRMWLoopOp::Add => "add",
AtomicRMWLoopOp::Sub => "sub",
AtomicRMWLoopOp::Eor => "eor",
AtomicRMWLoopOp::Orr => "orr",
AtomicRMWLoopOp::And => "and",
_ => "",
};
if op_str.is_empty() {
match op {
inst_common::AtomicRmwOp::Xchg => r_dst = r_arg2,
inst_common::AtomicRmwOp::Nand => {
AtomicRMWLoopOp::Xchg => r_dst = r_arg2,
AtomicRMWLoopOp::Nand => {
loop_str.push_str(&format!("and {}, {}, {}; ", r_dst, r_tmp, r_arg2));
loop_str.push_str(&format!("mvn {}, {}; ", r_dst, r_dst));
}
_ => {
loop_str.push_str(&format!("cmp {}, {}; ", r_tmp, r_arg2));
if (op == AtomicRMWLoopOp::Smin || op == AtomicRMWLoopOp::Smax)
&& (ty == I8 || ty == I16)
{
loop_str
.push_str(&format!("sxt{} {}, {}; ", ty_suffix, r_tmp, r_tmp));
loop_str.push_str(&format!(
"cmp {}, {}, sxt{}; ",
r_tmp, r_arg2, ty_suffix
));
} else {
loop_str.push_str(&format!("cmp {}, {}; ", r_tmp, r_arg2));
}
let cond = match op {
inst_common::AtomicRmwOp::Smin => "lt",
inst_common::AtomicRmwOp::Smax => "gt",
inst_common::AtomicRmwOp::Umin => "lo",
inst_common::AtomicRmwOp::Umax => "hi",
AtomicRMWLoopOp::Smin => "lt",
AtomicRMWLoopOp::Smax => "gt",
AtomicRMWLoopOp::Umin => "lo",
AtomicRMWLoopOp::Umax => "hi",
_ => unreachable!(),
};
loop_str.push_str(&format!(

View File

@@ -1196,3 +1196,77 @@
(let ((rn Reg y)
(vec_size VectorSize (vector_size ty)))
(value_reg (int_cmp_zero_swap cond rn vec_size))))
;;;; Rules for `AtomicRMW` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule (lower (and (use_lse)
(has_type (valid_atomic_transaction ty)
(atomic_rmw flags (AtomicRmwOp.Add) addr src))))
(lse_atomic_rmw (AtomicRMWOp.Add) addr src ty))
(rule (lower (and (use_lse)
(has_type (valid_atomic_transaction ty)
(atomic_rmw flags (AtomicRmwOp.Xor) addr src))))
(lse_atomic_rmw (AtomicRMWOp.Eor) addr src ty))
(rule (lower (and (use_lse)
(has_type (valid_atomic_transaction ty)
(atomic_rmw flags (AtomicRmwOp.Or) addr src))))
(lse_atomic_rmw (AtomicRMWOp.Set) addr src ty))
(rule (lower (and (use_lse)
(has_type (valid_atomic_transaction ty)
(atomic_rmw flags (AtomicRmwOp.Smax) addr src))))
(lse_atomic_rmw (AtomicRMWOp.Smax) addr src ty))
(rule (lower (and (use_lse)
(has_type (valid_atomic_transaction ty)
(atomic_rmw flags (AtomicRmwOp.Smin) addr src))))
(lse_atomic_rmw (AtomicRMWOp.Smin) addr src ty))
(rule (lower (and (use_lse)
(has_type (valid_atomic_transaction ty)
(atomic_rmw flags (AtomicRmwOp.Umax) addr src))))
(lse_atomic_rmw (AtomicRMWOp.Umax) addr src ty))
(rule (lower (and (use_lse)
(has_type (valid_atomic_transaction ty)
(atomic_rmw flags (AtomicRmwOp.Umin) addr src))))
(lse_atomic_rmw (AtomicRMWOp.Umin) addr src ty))
(rule (lower (and (use_lse)
(has_type (valid_atomic_transaction ty)
(atomic_rmw flags (AtomicRmwOp.Sub) addr src))))
(lse_atomic_rmw (AtomicRMWOp.Add) addr (sub ty (zero_reg) src) ty))
(rule (lower (and (use_lse)
(has_type (valid_atomic_transaction ty)
(atomic_rmw flags (AtomicRmwOp.And) addr src))))
(lse_atomic_rmw (AtomicRMWOp.Clr) addr (eon ty src (zero_reg)) ty))
(rule (lower (has_type (valid_atomic_transaction ty)
(atomic_rmw flags (AtomicRmwOp.Add) addr src)))
(atomic_rmw_loop (AtomicRMWLoopOp.Add) addr src ty))
(rule (lower (has_type (valid_atomic_transaction ty)
(atomic_rmw flags (AtomicRmwOp.Sub) addr src)))
(atomic_rmw_loop (AtomicRMWLoopOp.Sub) addr src ty))
(rule (lower (has_type (valid_atomic_transaction ty)
(atomic_rmw flags (AtomicRmwOp.And) addr src)))
(atomic_rmw_loop (AtomicRMWLoopOp.And) addr src ty))
(rule (lower (has_type (valid_atomic_transaction ty)
(atomic_rmw flags (AtomicRmwOp.Nand) addr src)))
(atomic_rmw_loop (AtomicRMWLoopOp.Nand) addr src ty))
(rule (lower (has_type (valid_atomic_transaction ty)
(atomic_rmw flags (AtomicRmwOp.Or) addr src)))
(atomic_rmw_loop (AtomicRMWLoopOp.Orr) addr src ty))
(rule (lower (has_type (valid_atomic_transaction ty)
(atomic_rmw flags (AtomicRmwOp.Xor) addr src)))
(atomic_rmw_loop (AtomicRMWLoopOp.Eor) addr src ty))
(rule (lower (has_type (valid_atomic_transaction ty)
(atomic_rmw flags (AtomicRmwOp.Smin) addr src)))
(atomic_rmw_loop (AtomicRMWLoopOp.Smin) addr src ty))
(rule (lower (has_type (valid_atomic_transaction ty)
(atomic_rmw flags (AtomicRmwOp.Smax) addr src)))
(atomic_rmw_loop (AtomicRMWLoopOp.Smax) addr src ty))
(rule (lower (has_type (valid_atomic_transaction ty)
(atomic_rmw flags (AtomicRmwOp.Umin) addr src)))
(atomic_rmw_loop (AtomicRMWLoopOp.Umin) addr src ty))
(rule (lower (has_type (valid_atomic_transaction ty)
(atomic_rmw flags (AtomicRmwOp.Umax) addr src)))
(atomic_rmw_loop (AtomicRMWLoopOp.Umax) addr src ty))
(rule (lower (has_type (valid_atomic_transaction ty)
(atomic_rmw flags (AtomicRmwOp.Xchg) addr src)))
(atomic_rmw_loop (AtomicRMWLoopOp.Xchg) addr src ty))

View File

@@ -5,8 +5,8 @@ pub mod generated_code;
// Types that the generated ISLE code uses via `use super::*`.
use super::{
writable_zero_reg, zero_reg, AMode, ASIMDFPModImm, ASIMDMovModImm, AtomicRmwOp, BranchTarget,
CallIndInfo, CallInfo, Cond, CondBrKind, ExtendOp, FPUOpRI, FloatCC, Imm12, ImmLogic, ImmShift,
writable_zero_reg, zero_reg, AMode, ASIMDFPModImm, ASIMDMovModImm, BranchTarget, CallIndInfo,
CallInfo, Cond, CondBrKind, ExtendOp, FPUOpRI, FloatCC, Imm12, ImmLogic, ImmShift,
Inst as MInst, IntCC, JTSequenceInfo, MachLabel, MoveWideConst, MoveWideOp, NarrowValueMode,
Opcode, OperandSize, PairAMode, Reg, ScalarSize, ShiftOpAndAmt, UImm5, VecMisc2, VectorSize,
NZCV,
@@ -17,10 +17,11 @@ use crate::settings::Flags;
use crate::{
binemit::CodeOffset,
ir::{
immediates::*, types::*, ExternalName, Inst, InstructionData, MemFlags, TrapCode, Value,
ValueList,
immediates::*, types::*, AtomicRmwOp, ExternalName, Inst, InstructionData, MemFlags,
TrapCode, Value, ValueList,
},
isa::aarch64::inst::args::{ShiftOp, ShiftOpShiftImm},
isa::aarch64::lower::{is_valid_atomic_transaction_ty, writable_xreg, xreg},
isa::unwind::UnwindInst,
machinst::{ty_bits, InsnOutput, LowerCtx},
};
@@ -66,6 +67,14 @@ where
{
isle_prelude_methods!();
fn use_lse(&mut self, _: Inst) -> Option<()> {
if self.isa_flags.use_lse() {
Some(())
} else {
None
}
}
fn move_wide_const_from_u64(&mut self, n: u64) -> Option<MoveWideConst> {
MoveWideConst::maybe_from_u64(n)
}
@@ -114,6 +123,14 @@ where
}
}
fn valid_atomic_transaction(&mut self, ty: Type) -> Option<Type> {
if is_valid_atomic_transaction_ty(ty) {
Some(ty)
} else {
None
}
}
/// This is the fallback case for loading a 64-bit integral constant into a
/// register.
///
@@ -194,6 +211,14 @@ where
zero_reg()
}
fn xreg(&mut self, index: u8) -> Reg {
xreg(index)
}
fn writable_xreg(&mut self, index: u8) -> WritableReg {
writable_xreg(index)
}
fn extended_value_from_value(&mut self, val: Value) -> Option<ExtendedValue> {
let (val, extend) =
super::get_as_extended_value(self.lower_ctx, val, NarrowValueMode::None)?;

View File

@@ -1,4 +1,4 @@
src/clif.isle 443b34b797fc8ace
src/prelude.isle d8a93eb727abd7f4
src/isa/aarch64/inst.isle 77984cc33a05be7
src/isa/aarch64/lower.isle 71c7e603b0e4bdef
src/prelude.isle a7915a6b88310eb5
src/isa/aarch64/inst.isle a2c0ae729bfa24a8
src/isa/aarch64/lower.isle 15641ca7f0ac061a

File diff suppressed because it is too large Load Diff

View File

@@ -237,48 +237,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
ctx.emit(inst);
}
Opcode::AtomicRmw => {
let r_dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
let mut r_addr = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
let mut r_arg2 = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None);
let ty_access = ty.unwrap();
assert!(is_valid_atomic_transaction_ty(ty_access));
let op = inst_common::AtomicRmwOp::from(ctx.data(insn).atomic_rmw_op().unwrap());
let lse_op = match op {
AtomicRmwOp::Add => Some(AtomicRMWOp::Add),
AtomicRmwOp::And => Some(AtomicRMWOp::Clr),
AtomicRmwOp::Xor => Some(AtomicRMWOp::Eor),
AtomicRmwOp::Or => Some(AtomicRMWOp::Set),
AtomicRmwOp::Smax => Some(AtomicRMWOp::Smax),
AtomicRmwOp::Umax => Some(AtomicRMWOp::Umax),
AtomicRmwOp::Smin => Some(AtomicRMWOp::Smin),
AtomicRmwOp::Umin => Some(AtomicRMWOp::Umin),
_ => None,
};
if isa_flags.use_lse() && lse_op.is_some() {
ctx.emit(Inst::AtomicRMW {
op: lse_op.unwrap(),
rs: r_arg2,
rt: r_dst,
rn: r_addr,
ty: ty_access,
});
} else {
// Make sure that both args are in virtual regs, since in effect
// we have to do a parallel copy to get them safely to the AtomicRMW input
// regs, and that's not guaranteed safe if either is in a real reg.
r_addr = ctx.ensure_in_vreg(r_addr, I64);
r_arg2 = ctx.ensure_in_vreg(r_arg2, I64);
// Move the args to the preordained AtomicRMW input regs
ctx.emit(Inst::gen_move(Writable::from_reg(xreg(25)), r_addr, I64));
ctx.emit(Inst::gen_move(Writable::from_reg(xreg(26)), r_arg2, I64));
ctx.emit(Inst::AtomicRMWLoop { ty: ty_access, op });
// And finally, copy the preordained AtomicRMW output reg to its destination.
ctx.emit(Inst::gen_move(r_dst, xreg(27), I64));
// Also, x24 and x28 are trashed. `fn aarch64_get_regs` must mention that.
}
}
Opcode::AtomicRmw => implemented_in_isle(ctx),
Opcode::AtomicCas => {
let r_dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap();