AArch64 LSE atomic_rmw support
Rename the existing AtomicRMW to AtomicRMWLoop and directly lower atomic_rmw operations, without a loop if LSE support is available. Copyright (c) 2021, Arm Limited
This commit is contained in:
@@ -504,6 +504,33 @@ fn enc_dmb_ish() -> u32 {
|
|||||||
0xD5033BBF
|
0xD5033BBF
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn enc_ldal(ty: Type, op: AtomicRMWOp, rs: Reg, rt: Writable<Reg>, rn: Reg) -> u32 {
|
||||||
|
assert!(machreg_to_gpr(rt.to_reg()) != 31);
|
||||||
|
let sz = match ty {
|
||||||
|
I64 => 0b11,
|
||||||
|
I32 => 0b10,
|
||||||
|
I16 => 0b01,
|
||||||
|
I8 => 0b00,
|
||||||
|
_ => unreachable!(),
|
||||||
|
};
|
||||||
|
let op = match op {
|
||||||
|
AtomicRMWOp::Add => 0b000,
|
||||||
|
AtomicRMWOp::Clr => 0b001,
|
||||||
|
AtomicRMWOp::Eor => 0b010,
|
||||||
|
AtomicRMWOp::Set => 0b011,
|
||||||
|
AtomicRMWOp::Smax => 0b100,
|
||||||
|
AtomicRMWOp::Smin => 0b101,
|
||||||
|
AtomicRMWOp::Umax => 0b110,
|
||||||
|
AtomicRMWOp::Umin => 0b111,
|
||||||
|
};
|
||||||
|
0b00_111_000_111_00000_0_000_00_00000_00000
|
||||||
|
| (sz << 30)
|
||||||
|
| (machreg_to_gpr(rs) << 16)
|
||||||
|
| (op << 12)
|
||||||
|
| (machreg_to_gpr(rn) << 5)
|
||||||
|
| machreg_to_gpr(rt.to_reg())
|
||||||
|
}
|
||||||
|
|
||||||
fn enc_ldar(ty: Type, rt: Writable<Reg>, rn: Reg) -> u32 {
|
fn enc_ldar(ty: Type, rt: Writable<Reg>, rn: Reg) -> u32 {
|
||||||
let sz = match ty {
|
let sz = match ty {
|
||||||
I64 => 0b11,
|
I64 => 0b11,
|
||||||
@@ -1318,7 +1345,10 @@ impl MachInstEmit for Inst {
|
|||||||
} => {
|
} => {
|
||||||
sink.put4(enc_ccmp_imm(size, rn, imm, nzcv, cond));
|
sink.put4(enc_ccmp_imm(size, rn, imm, nzcv, cond));
|
||||||
}
|
}
|
||||||
&Inst::AtomicRMW { ty, op } => {
|
&Inst::AtomicRMW { ty, op, rs, rt, rn } => {
|
||||||
|
sink.put4(enc_ldal(ty, op, rs, rt, rn));
|
||||||
|
}
|
||||||
|
&Inst::AtomicRMWLoop { ty, op } => {
|
||||||
/* Emit this:
|
/* Emit this:
|
||||||
again:
|
again:
|
||||||
ldaxr{,b,h} x/w27, [x25]
|
ldaxr{,b,h} x/w27, [x25]
|
||||||
@@ -1340,7 +1370,7 @@ impl MachInstEmit for Inst {
|
|||||||
so that we simply write in the destination, the "2nd arg for op".
|
so that we simply write in the destination, the "2nd arg for op".
|
||||||
*/
|
*/
|
||||||
// TODO: We should not hardcode registers here, a better idea would be to
|
// TODO: We should not hardcode registers here, a better idea would be to
|
||||||
// pass some scratch registers in the AtomicRMW pseudo-instruction, and use those
|
// pass some scratch registers in the AtomicRMWLoop pseudo-instruction, and use those
|
||||||
let xzr = zero_reg();
|
let xzr = zero_reg();
|
||||||
let x24 = xreg(24);
|
let x24 = xreg(24);
|
||||||
let x25 = xreg(25);
|
let x25 = xreg(25);
|
||||||
|
|||||||
@@ -5887,7 +5887,7 @@ fn test_aarch64_binemit() {
|
|||||||
));
|
));
|
||||||
|
|
||||||
insns.push((
|
insns.push((
|
||||||
Inst::AtomicRMW {
|
Inst::AtomicRMWLoop {
|
||||||
ty: I16,
|
ty: I16,
|
||||||
op: inst_common::AtomicRmwOp::Xor,
|
op: inst_common::AtomicRmwOp::Xor,
|
||||||
},
|
},
|
||||||
@@ -5897,6 +5897,359 @@ fn test_aarch64_binemit() {
|
|||||||
|
|
||||||
insns.push((
|
insns.push((
|
||||||
Inst::AtomicRMW {
|
Inst::AtomicRMW {
|
||||||
|
ty: I8,
|
||||||
|
op: AtomicRMWOp::Add,
|
||||||
|
rs: xreg(1),
|
||||||
|
rt: writable_xreg(2),
|
||||||
|
rn: xreg(3),
|
||||||
|
},
|
||||||
|
"6200E138",
|
||||||
|
"ldaddalb w1, w2, [x3]",
|
||||||
|
));
|
||||||
|
insns.push((
|
||||||
|
Inst::AtomicRMW {
|
||||||
|
ty: I16,
|
||||||
|
op: AtomicRMWOp::Add,
|
||||||
|
rs: xreg(4),
|
||||||
|
rt: writable_xreg(5),
|
||||||
|
rn: xreg(6),
|
||||||
|
},
|
||||||
|
"C500E478",
|
||||||
|
"ldaddalh w4, w5, [x6]",
|
||||||
|
));
|
||||||
|
insns.push((
|
||||||
|
Inst::AtomicRMW {
|
||||||
|
ty: I32,
|
||||||
|
op: AtomicRMWOp::Add,
|
||||||
|
rs: xreg(7),
|
||||||
|
rt: writable_xreg(8),
|
||||||
|
rn: xreg(9),
|
||||||
|
},
|
||||||
|
"2801E7B8",
|
||||||
|
"ldaddal w7, w8, [x9]",
|
||||||
|
));
|
||||||
|
insns.push((
|
||||||
|
Inst::AtomicRMW {
|
||||||
|
ty: I64,
|
||||||
|
op: AtomicRMWOp::Add,
|
||||||
|
rs: xreg(10),
|
||||||
|
rt: writable_xreg(11),
|
||||||
|
rn: xreg(12),
|
||||||
|
},
|
||||||
|
"8B01EAF8",
|
||||||
|
"ldaddal x10, x11, [x12]",
|
||||||
|
));
|
||||||
|
insns.push((
|
||||||
|
Inst::AtomicRMW {
|
||||||
|
ty: I8,
|
||||||
|
op: AtomicRMWOp::Clr,
|
||||||
|
rs: xreg(13),
|
||||||
|
rt: writable_xreg(14),
|
||||||
|
rn: xreg(15),
|
||||||
|
},
|
||||||
|
"EE11ED38",
|
||||||
|
"ldclralb w13, w14, [x15]",
|
||||||
|
));
|
||||||
|
insns.push((
|
||||||
|
Inst::AtomicRMW {
|
||||||
|
ty: I16,
|
||||||
|
op: AtomicRMWOp::Clr,
|
||||||
|
rs: xreg(16),
|
||||||
|
rt: writable_xreg(17),
|
||||||
|
rn: xreg(18),
|
||||||
|
},
|
||||||
|
"5112F078",
|
||||||
|
"ldclralh w16, w17, [x18]",
|
||||||
|
));
|
||||||
|
insns.push((
|
||||||
|
Inst::AtomicRMW {
|
||||||
|
ty: I32,
|
||||||
|
op: AtomicRMWOp::Clr,
|
||||||
|
rs: xreg(19),
|
||||||
|
rt: writable_xreg(20),
|
||||||
|
rn: xreg(21),
|
||||||
|
},
|
||||||
|
"B412F3B8",
|
||||||
|
"ldclral w19, w20, [x21]",
|
||||||
|
));
|
||||||
|
insns.push((
|
||||||
|
Inst::AtomicRMW {
|
||||||
|
ty: I64,
|
||||||
|
op: AtomicRMWOp::Clr,
|
||||||
|
rs: xreg(22),
|
||||||
|
rt: writable_xreg(23),
|
||||||
|
rn: xreg(24),
|
||||||
|
},
|
||||||
|
"1713F6F8",
|
||||||
|
"ldclral x22, x23, [x24]",
|
||||||
|
));
|
||||||
|
insns.push((
|
||||||
|
Inst::AtomicRMW {
|
||||||
|
ty: I8,
|
||||||
|
op: AtomicRMWOp::Eor,
|
||||||
|
rs: xreg(25),
|
||||||
|
rt: writable_xreg(26),
|
||||||
|
rn: xreg(27),
|
||||||
|
},
|
||||||
|
"7A23F938",
|
||||||
|
"ldeoralb w25, w26, [x27]",
|
||||||
|
));
|
||||||
|
insns.push((
|
||||||
|
Inst::AtomicRMW {
|
||||||
|
ty: I16,
|
||||||
|
op: AtomicRMWOp::Eor,
|
||||||
|
rs: xreg(28),
|
||||||
|
rt: writable_xreg(29),
|
||||||
|
rn: xreg(30),
|
||||||
|
},
|
||||||
|
"DD23FC78",
|
||||||
|
"ldeoralh w28, fp, [lr]",
|
||||||
|
));
|
||||||
|
insns.push((
|
||||||
|
Inst::AtomicRMW {
|
||||||
|
ty: I32,
|
||||||
|
op: AtomicRMWOp::Eor,
|
||||||
|
rs: xreg(29),
|
||||||
|
rt: writable_xreg(28),
|
||||||
|
rn: xreg(27),
|
||||||
|
},
|
||||||
|
"7C23FDB8",
|
||||||
|
"ldeoral fp, w28, [x27]",
|
||||||
|
));
|
||||||
|
insns.push((
|
||||||
|
Inst::AtomicRMW {
|
||||||
|
ty: I64,
|
||||||
|
op: AtomicRMWOp::Eor,
|
||||||
|
rs: xreg(26),
|
||||||
|
rt: writable_xreg(25),
|
||||||
|
rn: xreg(24),
|
||||||
|
},
|
||||||
|
"1923FAF8",
|
||||||
|
"ldeoral x26, x25, [x24]",
|
||||||
|
));
|
||||||
|
insns.push((
|
||||||
|
Inst::AtomicRMW {
|
||||||
|
ty: I8,
|
||||||
|
op: AtomicRMWOp::Set,
|
||||||
|
rs: xreg(23),
|
||||||
|
rt: writable_xreg(22),
|
||||||
|
rn: xreg(21),
|
||||||
|
},
|
||||||
|
"B632F738",
|
||||||
|
"ldsetalb w23, w22, [x21]",
|
||||||
|
));
|
||||||
|
insns.push((
|
||||||
|
Inst::AtomicRMW {
|
||||||
|
ty: I16,
|
||||||
|
op: AtomicRMWOp::Set,
|
||||||
|
rs: xreg(20),
|
||||||
|
rt: writable_xreg(19),
|
||||||
|
rn: xreg(18),
|
||||||
|
},
|
||||||
|
"5332F478",
|
||||||
|
"ldsetalh w20, w19, [x18]",
|
||||||
|
));
|
||||||
|
insns.push((
|
||||||
|
Inst::AtomicRMW {
|
||||||
|
ty: I32,
|
||||||
|
op: AtomicRMWOp::Set,
|
||||||
|
rs: xreg(17),
|
||||||
|
rt: writable_xreg(16),
|
||||||
|
rn: xreg(15),
|
||||||
|
},
|
||||||
|
"F031F1B8",
|
||||||
|
"ldsetal w17, w16, [x15]",
|
||||||
|
));
|
||||||
|
insns.push((
|
||||||
|
Inst::AtomicRMW {
|
||||||
|
ty: I64,
|
||||||
|
op: AtomicRMWOp::Set,
|
||||||
|
rs: xreg(14),
|
||||||
|
rt: writable_xreg(13),
|
||||||
|
rn: xreg(12),
|
||||||
|
},
|
||||||
|
"8D31EEF8",
|
||||||
|
"ldsetal x14, x13, [x12]",
|
||||||
|
));
|
||||||
|
insns.push((
|
||||||
|
Inst::AtomicRMW {
|
||||||
|
ty: I8,
|
||||||
|
op: AtomicRMWOp::Smax,
|
||||||
|
rs: xreg(11),
|
||||||
|
rt: writable_xreg(10),
|
||||||
|
rn: xreg(9),
|
||||||
|
},
|
||||||
|
"2A41EB38",
|
||||||
|
"ldsmaxalb w11, w10, [x9]",
|
||||||
|
));
|
||||||
|
insns.push((
|
||||||
|
Inst::AtomicRMW {
|
||||||
|
ty: I16,
|
||||||
|
op: AtomicRMWOp::Smax,
|
||||||
|
rs: xreg(8),
|
||||||
|
rt: writable_xreg(7),
|
||||||
|
rn: xreg(6),
|
||||||
|
},
|
||||||
|
"C740E878",
|
||||||
|
"ldsmaxalh w8, w7, [x6]",
|
||||||
|
));
|
||||||
|
insns.push((
|
||||||
|
Inst::AtomicRMW {
|
||||||
|
ty: I32,
|
||||||
|
op: AtomicRMWOp::Smax,
|
||||||
|
rs: xreg(5),
|
||||||
|
rt: writable_xreg(4),
|
||||||
|
rn: xreg(3),
|
||||||
|
},
|
||||||
|
"6440E5B8",
|
||||||
|
"ldsmaxal w5, w4, [x3]",
|
||||||
|
));
|
||||||
|
insns.push((
|
||||||
|
Inst::AtomicRMW {
|
||||||
|
ty: I64,
|
||||||
|
op: AtomicRMWOp::Smax,
|
||||||
|
rs: xreg(2),
|
||||||
|
rt: writable_xreg(1),
|
||||||
|
rn: xreg(0),
|
||||||
|
},
|
||||||
|
"0140E2F8",
|
||||||
|
"ldsmaxal x2, x1, [x0]",
|
||||||
|
));
|
||||||
|
insns.push((
|
||||||
|
Inst::AtomicRMW {
|
||||||
|
ty: I8,
|
||||||
|
op: AtomicRMWOp::Smin,
|
||||||
|
rs: xreg(1),
|
||||||
|
rt: writable_xreg(2),
|
||||||
|
rn: xreg(3),
|
||||||
|
},
|
||||||
|
"6250E138",
|
||||||
|
"ldsminalb w1, w2, [x3]",
|
||||||
|
));
|
||||||
|
insns.push((
|
||||||
|
Inst::AtomicRMW {
|
||||||
|
ty: I16,
|
||||||
|
op: AtomicRMWOp::Smin,
|
||||||
|
rs: xreg(4),
|
||||||
|
rt: writable_xreg(5),
|
||||||
|
rn: xreg(6),
|
||||||
|
},
|
||||||
|
"C550E478",
|
||||||
|
"ldsminalh w4, w5, [x6]",
|
||||||
|
));
|
||||||
|
insns.push((
|
||||||
|
Inst::AtomicRMW {
|
||||||
|
ty: I32,
|
||||||
|
op: AtomicRMWOp::Smin,
|
||||||
|
rs: xreg(7),
|
||||||
|
rt: writable_xreg(8),
|
||||||
|
rn: xreg(9),
|
||||||
|
},
|
||||||
|
"2851E7B8",
|
||||||
|
"ldsminal w7, w8, [x9]",
|
||||||
|
));
|
||||||
|
insns.push((
|
||||||
|
Inst::AtomicRMW {
|
||||||
|
ty: I64,
|
||||||
|
op: AtomicRMWOp::Smin,
|
||||||
|
rs: xreg(10),
|
||||||
|
rt: writable_xreg(11),
|
||||||
|
rn: xreg(12),
|
||||||
|
},
|
||||||
|
"8B51EAF8",
|
||||||
|
"ldsminal x10, x11, [x12]",
|
||||||
|
));
|
||||||
|
insns.push((
|
||||||
|
Inst::AtomicRMW {
|
||||||
|
ty: I8,
|
||||||
|
op: AtomicRMWOp::Umax,
|
||||||
|
rs: xreg(13),
|
||||||
|
rt: writable_xreg(14),
|
||||||
|
rn: xreg(15),
|
||||||
|
},
|
||||||
|
"EE61ED38",
|
||||||
|
"ldumaxalb w13, w14, [x15]",
|
||||||
|
));
|
||||||
|
insns.push((
|
||||||
|
Inst::AtomicRMW {
|
||||||
|
ty: I16,
|
||||||
|
op: AtomicRMWOp::Umax,
|
||||||
|
rs: xreg(16),
|
||||||
|
rt: writable_xreg(17),
|
||||||
|
rn: xreg(18),
|
||||||
|
},
|
||||||
|
"5162F078",
|
||||||
|
"ldumaxalh w16, w17, [x18]",
|
||||||
|
));
|
||||||
|
insns.push((
|
||||||
|
Inst::AtomicRMW {
|
||||||
|
ty: I32,
|
||||||
|
op: AtomicRMWOp::Umax,
|
||||||
|
rs: xreg(19),
|
||||||
|
rt: writable_xreg(20),
|
||||||
|
rn: xreg(21),
|
||||||
|
},
|
||||||
|
"B462F3B8",
|
||||||
|
"ldumaxal w19, w20, [x21]",
|
||||||
|
));
|
||||||
|
insns.push((
|
||||||
|
Inst::AtomicRMW {
|
||||||
|
ty: I64,
|
||||||
|
op: AtomicRMWOp::Umax,
|
||||||
|
rs: xreg(22),
|
||||||
|
rt: writable_xreg(23),
|
||||||
|
rn: xreg(24),
|
||||||
|
},
|
||||||
|
"1763F6F8",
|
||||||
|
"ldumaxal x22, x23, [x24]",
|
||||||
|
));
|
||||||
|
insns.push((
|
||||||
|
Inst::AtomicRMW {
|
||||||
|
ty: I8,
|
||||||
|
op: AtomicRMWOp::Umin,
|
||||||
|
rs: xreg(16),
|
||||||
|
rt: writable_xreg(17),
|
||||||
|
rn: xreg(18),
|
||||||
|
},
|
||||||
|
"5172F038",
|
||||||
|
"lduminalb w16, w17, [x18]",
|
||||||
|
));
|
||||||
|
insns.push((
|
||||||
|
Inst::AtomicRMW {
|
||||||
|
ty: I16,
|
||||||
|
op: AtomicRMWOp::Umin,
|
||||||
|
rs: xreg(19),
|
||||||
|
rt: writable_xreg(20),
|
||||||
|
rn: xreg(21),
|
||||||
|
},
|
||||||
|
"B472F378",
|
||||||
|
"lduminalh w19, w20, [x21]",
|
||||||
|
));
|
||||||
|
insns.push((
|
||||||
|
Inst::AtomicRMW {
|
||||||
|
ty: I32,
|
||||||
|
op: AtomicRMWOp::Umin,
|
||||||
|
rs: xreg(22),
|
||||||
|
rt: writable_xreg(23),
|
||||||
|
rn: xreg(24),
|
||||||
|
},
|
||||||
|
"1773F6B8",
|
||||||
|
"lduminal w22, w23, [x24]",
|
||||||
|
));
|
||||||
|
insns.push((
|
||||||
|
Inst::AtomicRMW {
|
||||||
|
ty: I64,
|
||||||
|
op: AtomicRMWOp::Umin,
|
||||||
|
rs: xreg(25),
|
||||||
|
rt: writable_xreg(26),
|
||||||
|
rn: xreg(27),
|
||||||
|
},
|
||||||
|
"7A73F9F8",
|
||||||
|
"lduminal x25, x26, [x27]",
|
||||||
|
));
|
||||||
|
|
||||||
|
insns.push((
|
||||||
|
Inst::AtomicRMWLoop {
|
||||||
ty: I32,
|
ty: I32,
|
||||||
op: inst_common::AtomicRmwOp::Xchg,
|
op: inst_common::AtomicRmwOp::Xchg,
|
||||||
},
|
},
|
||||||
|
|||||||
@@ -451,6 +451,19 @@ pub enum VecShiftImmOp {
|
|||||||
Sshr,
|
Sshr,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Atomic read-modify-write operations with acquire-release semantics
|
||||||
|
#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
|
||||||
|
pub enum AtomicRMWOp {
|
||||||
|
Add,
|
||||||
|
Clr,
|
||||||
|
Eor,
|
||||||
|
Set,
|
||||||
|
Smax,
|
||||||
|
Smin,
|
||||||
|
Umax,
|
||||||
|
Umin,
|
||||||
|
}
|
||||||
|
|
||||||
/// An operation on the bits of a register. This can be paired with several instruction formats
|
/// An operation on the bits of a register. This can be paired with several instruction formats
|
||||||
/// below (see `Inst`) in any combination.
|
/// below (see `Inst`) in any combination.
|
||||||
#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
|
#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
|
||||||
@@ -775,11 +788,22 @@ pub enum Inst {
|
|||||||
/// x27 (wr) old value
|
/// x27 (wr) old value
|
||||||
/// x24 (wr) scratch reg; value afterwards has no meaning
|
/// x24 (wr) scratch reg; value afterwards has no meaning
|
||||||
/// x28 (wr) scratch reg; value afterwards has no meaning
|
/// x28 (wr) scratch reg; value afterwards has no meaning
|
||||||
AtomicRMW {
|
AtomicRMWLoop {
|
||||||
ty: Type, // I8, I16, I32 or I64
|
ty: Type, // I8, I16, I32 or I64
|
||||||
op: inst_common::AtomicRmwOp,
|
op: inst_common::AtomicRmwOp,
|
||||||
},
|
},
|
||||||
|
|
||||||
|
/// An atomic read-modify-write operation. These instructions require the
|
||||||
|
/// Large System Extension (LSE) ISA support. The instructions have acquire-release
|
||||||
|
/// semantics.
|
||||||
|
AtomicRMW {
|
||||||
|
op: AtomicRMWOp,
|
||||||
|
rs: Reg,
|
||||||
|
rt: Writable<Reg>,
|
||||||
|
rn: Reg,
|
||||||
|
ty: Type,
|
||||||
|
},
|
||||||
|
|
||||||
/// An atomic compare-and-swap operation. This instruction is sequentially consistent.
|
/// An atomic compare-and-swap operation. This instruction is sequentially consistent.
|
||||||
AtomicCAS {
|
AtomicCAS {
|
||||||
rs: Writable<Reg>,
|
rs: Writable<Reg>,
|
||||||
@@ -788,10 +812,10 @@ pub enum Inst {
|
|||||||
ty: Type,
|
ty: Type,
|
||||||
},
|
},
|
||||||
|
|
||||||
/// Similar to AtomicRMW, a compare-and-swap operation implemented using a load-linked
|
/// Similar to AtomicRMWLoop, a compare-and-swap operation implemented using a load-linked
|
||||||
/// store-conditional loop.
|
/// store-conditional loop.
|
||||||
/// This instruction is sequentially consistent.
|
/// This instruction is sequentially consistent.
|
||||||
/// Note that the operand conventions, although very similar to AtomicRMW, are different:
|
/// Note that the operand conventions, although very similar to AtomicRMWLoop, are different:
|
||||||
///
|
///
|
||||||
/// x25 (rd) address
|
/// x25 (rd) address
|
||||||
/// x26 (rd) expected value
|
/// x26 (rd) expected value
|
||||||
@@ -1919,13 +1943,18 @@ fn aarch64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) {
|
|||||||
&Inst::CCmpImm { rn, .. } => {
|
&Inst::CCmpImm { rn, .. } => {
|
||||||
collector.add_use(rn);
|
collector.add_use(rn);
|
||||||
}
|
}
|
||||||
&Inst::AtomicRMW { .. } => {
|
&Inst::AtomicRMWLoop { .. } => {
|
||||||
collector.add_use(xreg(25));
|
collector.add_use(xreg(25));
|
||||||
collector.add_use(xreg(26));
|
collector.add_use(xreg(26));
|
||||||
collector.add_def(writable_xreg(24));
|
collector.add_def(writable_xreg(24));
|
||||||
collector.add_def(writable_xreg(27));
|
collector.add_def(writable_xreg(27));
|
||||||
collector.add_def(writable_xreg(28));
|
collector.add_def(writable_xreg(28));
|
||||||
}
|
}
|
||||||
|
&Inst::AtomicRMW { rs, rt, rn, .. } => {
|
||||||
|
collector.add_use(rs);
|
||||||
|
collector.add_def(rt);
|
||||||
|
collector.add_use(rn);
|
||||||
|
}
|
||||||
&Inst::AtomicCAS { rs, rt, rn, .. } => {
|
&Inst::AtomicCAS { rs, rt, rn, .. } => {
|
||||||
collector.add_mod(rs);
|
collector.add_mod(rs);
|
||||||
collector.add_use(rt);
|
collector.add_use(rt);
|
||||||
@@ -2561,9 +2590,19 @@ fn aarch64_map_regs<RUM: RegUsageMapper>(inst: &mut Inst, mapper: &RUM) {
|
|||||||
&mut Inst::CCmpImm { ref mut rn, .. } => {
|
&mut Inst::CCmpImm { ref mut rn, .. } => {
|
||||||
map_use(mapper, rn);
|
map_use(mapper, rn);
|
||||||
}
|
}
|
||||||
&mut Inst::AtomicRMW { .. } => {
|
&mut Inst::AtomicRMWLoop { .. } => {
|
||||||
// There are no vregs to map in this insn.
|
// There are no vregs to map in this insn.
|
||||||
}
|
}
|
||||||
|
&mut Inst::AtomicRMW {
|
||||||
|
ref mut rs,
|
||||||
|
ref mut rt,
|
||||||
|
ref mut rn,
|
||||||
|
..
|
||||||
|
} => {
|
||||||
|
map_use(mapper, rs);
|
||||||
|
map_def(mapper, rt);
|
||||||
|
map_use(mapper, rn);
|
||||||
|
}
|
||||||
&mut Inst::AtomicCAS {
|
&mut Inst::AtomicCAS {
|
||||||
ref mut rs,
|
ref mut rs,
|
||||||
ref mut rt,
|
ref mut rt,
|
||||||
@@ -3617,7 +3656,33 @@ impl Inst {
|
|||||||
let cond = cond.show_rru(mb_rru);
|
let cond = cond.show_rru(mb_rru);
|
||||||
format!("ccmp {}, {}, {}, {}", rn, imm, nzcv, cond)
|
format!("ccmp {}, {}, {}, {}", rn, imm, nzcv, cond)
|
||||||
}
|
}
|
||||||
&Inst::AtomicRMW { ty, op, .. } => {
|
&Inst::AtomicRMW {
|
||||||
|
rs, rt, rn, ty, op
|
||||||
|
} => {
|
||||||
|
let op = match op {
|
||||||
|
AtomicRMWOp::Add => "ldaddal",
|
||||||
|
AtomicRMWOp::Clr => "ldclral",
|
||||||
|
AtomicRMWOp::Eor => "ldeoral",
|
||||||
|
AtomicRMWOp::Set => "ldsetal",
|
||||||
|
AtomicRMWOp::Smax => "ldsmaxal",
|
||||||
|
AtomicRMWOp::Umax => "ldumaxal",
|
||||||
|
AtomicRMWOp::Smin => "ldsminal",
|
||||||
|
AtomicRMWOp::Umin => "lduminal",
|
||||||
|
};
|
||||||
|
|
||||||
|
let size = OperandSize::from_ty(ty);
|
||||||
|
let rs = show_ireg_sized(rs, mb_rru, size);
|
||||||
|
let rt = show_ireg_sized(rt.to_reg(), mb_rru, size);
|
||||||
|
let rn = rn.show_rru(mb_rru);
|
||||||
|
|
||||||
|
let ty_suffix = match ty {
|
||||||
|
I8 => "b",
|
||||||
|
I16 => "h",
|
||||||
|
_ => "",
|
||||||
|
};
|
||||||
|
format!("{}{} {}, {}, [{}]", op, ty_suffix, rs, rt, rn)
|
||||||
|
}
|
||||||
|
&Inst::AtomicRMWLoop { ty, op, .. } => {
|
||||||
format!(
|
format!(
|
||||||
"atomically {{ {}_bits_at_[x25]) {:?}= x26 ; x27 = old_value_at_[x25]; x24,x28 = trash }}",
|
"atomically {{ {}_bits_at_[x25]) {:?}= x26 ; x27 = old_value_at_[x25]; x24,x28 = trash }}",
|
||||||
ty.bits(), op)
|
ty.bits(), op)
|
||||||
|
|||||||
@@ -1529,6 +1529,28 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
|||||||
let mut r_arg2 = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None);
|
let mut r_arg2 = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None);
|
||||||
let ty_access = ty.unwrap();
|
let ty_access = ty.unwrap();
|
||||||
assert!(is_valid_atomic_transaction_ty(ty_access));
|
assert!(is_valid_atomic_transaction_ty(ty_access));
|
||||||
|
|
||||||
|
let op = inst_common::AtomicRmwOp::from(ctx.data(insn).atomic_rmw_op().unwrap());
|
||||||
|
let lse_op = match op {
|
||||||
|
AtomicRmwOp::Add => Some(AtomicRMWOp::Add),
|
||||||
|
AtomicRmwOp::And => Some(AtomicRMWOp::Clr),
|
||||||
|
AtomicRmwOp::Xor => Some(AtomicRMWOp::Eor),
|
||||||
|
AtomicRmwOp::Or => Some(AtomicRMWOp::Set),
|
||||||
|
AtomicRmwOp::Smax => Some(AtomicRMWOp::Smax),
|
||||||
|
AtomicRmwOp::Umax => Some(AtomicRMWOp::Umax),
|
||||||
|
AtomicRmwOp::Smin => Some(AtomicRMWOp::Smin),
|
||||||
|
AtomicRmwOp::Umin => Some(AtomicRMWOp::Umin),
|
||||||
|
_ => None
|
||||||
|
};
|
||||||
|
if isa_flags.use_lse() && lse_op.is_some() {
|
||||||
|
ctx.emit(Inst::AtomicRMW {
|
||||||
|
op: lse_op.unwrap(),
|
||||||
|
rs: r_arg2,
|
||||||
|
rt: r_dst,
|
||||||
|
rn: r_addr,
|
||||||
|
ty: ty_access,
|
||||||
|
});
|
||||||
|
} else {
|
||||||
// Make sure that both args are in virtual regs, since in effect
|
// Make sure that both args are in virtual regs, since in effect
|
||||||
// we have to do a parallel copy to get them safely to the AtomicRMW input
|
// we have to do a parallel copy to get them safely to the AtomicRMW input
|
||||||
// regs, and that's not guaranteed safe if either is in a real reg.
|
// regs, and that's not guaranteed safe if either is in a real reg.
|
||||||
@@ -1537,13 +1559,12 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
|||||||
// Move the args to the preordained AtomicRMW input regs
|
// Move the args to the preordained AtomicRMW input regs
|
||||||
ctx.emit(Inst::gen_move(Writable::from_reg(xreg(25)), r_addr, I64));
|
ctx.emit(Inst::gen_move(Writable::from_reg(xreg(25)), r_addr, I64));
|
||||||
ctx.emit(Inst::gen_move(Writable::from_reg(xreg(26)), r_arg2, I64));
|
ctx.emit(Inst::gen_move(Writable::from_reg(xreg(26)), r_arg2, I64));
|
||||||
// Now the AtomicRMW insn itself
|
ctx.emit(Inst::AtomicRMWLoop { ty: ty_access, op });
|
||||||
let op = inst_common::AtomicRmwOp::from(ctx.data(insn).atomic_rmw_op().unwrap());
|
|
||||||
ctx.emit(Inst::AtomicRMW { ty: ty_access, op });
|
|
||||||
// And finally, copy the preordained AtomicRMW output reg to its destination.
|
// And finally, copy the preordained AtomicRMW output reg to its destination.
|
||||||
ctx.emit(Inst::gen_move(r_dst, xreg(27), I64));
|
ctx.emit(Inst::gen_move(r_dst, xreg(27), I64));
|
||||||
// Also, x24 and x28 are trashed. `fn aarch64_get_regs` must mention that.
|
// Also, x24 and x28 are trashed. `fn aarch64_get_regs` must mention that.
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
Opcode::AtomicCas => {
|
Opcode::AtomicCas => {
|
||||||
let r_dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
|
let r_dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
|
||||||
|
|||||||
114
cranelift/filetests/filetests/isa/aarch64/atomic-rmw-lse.clif
Normal file
114
cranelift/filetests/filetests/isa/aarch64/atomic-rmw-lse.clif
Normal file
@@ -0,0 +1,114 @@
|
|||||||
|
test compile
|
||||||
|
target aarch64 has_lse
|
||||||
|
|
||||||
|
function %atomic_rmw_add_i64(i64, i64) {
|
||||||
|
block0(v0: i64, v1: i64):
|
||||||
|
v2 = atomic_rmw.i64 add v0, v1
|
||||||
|
return
|
||||||
|
}
|
||||||
|
; check: ldaddal x1, x0, [x0]
|
||||||
|
|
||||||
|
function %atomic_rmw_add_i32(i32, i32) {
|
||||||
|
block0(v0: i32, v1: i32):
|
||||||
|
v2 = atomic_rmw.i32 add v0, v1
|
||||||
|
return
|
||||||
|
}
|
||||||
|
; check: ldaddal w1, w0, [x0]
|
||||||
|
|
||||||
|
function %atomic_rmw_and_i64(i64, i64) {
|
||||||
|
block0(v0: i64, v1: i64):
|
||||||
|
v2 = atomic_rmw.i64 and v0, v1
|
||||||
|
return
|
||||||
|
}
|
||||||
|
; check: ldclral x1, x0, [x0]
|
||||||
|
|
||||||
|
function %atomic_rmw_and_i32(i32, i32) {
|
||||||
|
block0(v0: i32, v1: i32):
|
||||||
|
v2 = atomic_rmw.i32 and v0, v1
|
||||||
|
return
|
||||||
|
}
|
||||||
|
; check: ldclral w1, w0, [x0]
|
||||||
|
|
||||||
|
function %atomic_rmw_or_i64(i64, i64) {
|
||||||
|
block0(v0: i64, v1: i64):
|
||||||
|
v2 = atomic_rmw.i64 or v0, v1
|
||||||
|
return
|
||||||
|
}
|
||||||
|
; check: ldsetal x1, x0, [x0]
|
||||||
|
|
||||||
|
function %atomic_rmw_or_i32(i32, i32) {
|
||||||
|
block0(v0: i32, v1: i32):
|
||||||
|
v2 = atomic_rmw.i32 or v0, v1
|
||||||
|
return
|
||||||
|
}
|
||||||
|
; check: ldsetal w1, w0, [x0]
|
||||||
|
|
||||||
|
function %atomic_rmw_xor_i64(i64, i64) {
|
||||||
|
block0(v0: i64, v1: i64):
|
||||||
|
v2 = atomic_rmw.i64 xor v0, v1
|
||||||
|
return
|
||||||
|
}
|
||||||
|
; check: ldeoral x1, x0, [x0]
|
||||||
|
|
||||||
|
function %atomic_rmw_xor_i32(i32, i32) {
|
||||||
|
block0(v0: i32, v1: i32):
|
||||||
|
v2 = atomic_rmw.i32 xor v0, v1
|
||||||
|
return
|
||||||
|
}
|
||||||
|
; check: ldeoral w1, w0, [x0]
|
||||||
|
|
||||||
|
function %atomic_rmw_smax_i64(i64, i64) {
|
||||||
|
block0(v0: i64, v1: i64):
|
||||||
|
v2 = atomic_rmw.i64 smax v0, v1
|
||||||
|
return
|
||||||
|
}
|
||||||
|
; check: ldsmaxal x1, x0, [x0]
|
||||||
|
|
||||||
|
function %atomic_rmw_smax_i32(i32, i32) {
|
||||||
|
block0(v0: i32, v1: i32):
|
||||||
|
v2 = atomic_rmw.i32 smax v0, v1
|
||||||
|
return
|
||||||
|
}
|
||||||
|
; check: ldsmaxal w1, w0, [x0]
|
||||||
|
|
||||||
|
function %atomic_rmw_umax_i64(i64, i64) {
|
||||||
|
block0(v0: i64, v1: i64):
|
||||||
|
v2 = atomic_rmw.i64 umax v0, v1
|
||||||
|
return
|
||||||
|
}
|
||||||
|
; check: ldumaxal x1, x0, [x0]
|
||||||
|
|
||||||
|
function %atomic_rmw_umax_i32(i32, i32) {
|
||||||
|
block0(v0: i32, v1: i32):
|
||||||
|
v2 = atomic_rmw.i32 umax v0, v1
|
||||||
|
return
|
||||||
|
}
|
||||||
|
; check: ldumaxal w1, w0, [x0]
|
||||||
|
|
||||||
|
function %atomic_rmw_smin_i64(i64, i64) {
|
||||||
|
block0(v0: i64, v1: i64):
|
||||||
|
v2 = atomic_rmw.i64 smin v0, v1
|
||||||
|
return
|
||||||
|
}
|
||||||
|
; check: ldsminal x1, x0, [x0]
|
||||||
|
|
||||||
|
function %atomic_rmw_smin_i32(i32, i32) {
|
||||||
|
block0(v0: i32, v1: i32):
|
||||||
|
v2 = atomic_rmw.i32 smin v0, v1
|
||||||
|
return
|
||||||
|
}
|
||||||
|
; check: ldsminal w1, w0, [x0]
|
||||||
|
|
||||||
|
function %atomic_rmw_umin_i64(i64, i64) {
|
||||||
|
block0(v0: i64, v1: i64):
|
||||||
|
v2 = atomic_rmw.i64 umin v0, v1
|
||||||
|
return
|
||||||
|
}
|
||||||
|
; check: lduminal x1, x0, [x0]
|
||||||
|
|
||||||
|
function %atomic_rmw_umin_i32(i32, i32) {
|
||||||
|
block0(v0: i32, v1: i32):
|
||||||
|
v2 = atomic_rmw.i32 umin v0, v1
|
||||||
|
return
|
||||||
|
}
|
||||||
|
; check: lduminal w1, w0, [x0]
|
||||||
Reference in New Issue
Block a user