Merge pull request #3322 from sparker-arm/aarch64-lse-ops
AArch64 LSE atomic_rmw support
This commit is contained in:
@@ -504,6 +504,33 @@ fn enc_dmb_ish() -> u32 {
|
||||
0xD5033BBF
|
||||
}
|
||||
|
||||
fn enc_ldal(ty: Type, op: AtomicRMWOp, rs: Reg, rt: Writable<Reg>, rn: Reg) -> u32 {
|
||||
assert!(machreg_to_gpr(rt.to_reg()) != 31);
|
||||
let sz = match ty {
|
||||
I64 => 0b11,
|
||||
I32 => 0b10,
|
||||
I16 => 0b01,
|
||||
I8 => 0b00,
|
||||
_ => unreachable!(),
|
||||
};
|
||||
let op = match op {
|
||||
AtomicRMWOp::Add => 0b000,
|
||||
AtomicRMWOp::Clr => 0b001,
|
||||
AtomicRMWOp::Eor => 0b010,
|
||||
AtomicRMWOp::Set => 0b011,
|
||||
AtomicRMWOp::Smax => 0b100,
|
||||
AtomicRMWOp::Smin => 0b101,
|
||||
AtomicRMWOp::Umax => 0b110,
|
||||
AtomicRMWOp::Umin => 0b111,
|
||||
};
|
||||
0b00_111_000_111_00000_0_000_00_00000_00000
|
||||
| (sz << 30)
|
||||
| (machreg_to_gpr(rs) << 16)
|
||||
| (op << 12)
|
||||
| (machreg_to_gpr(rn) << 5)
|
||||
| machreg_to_gpr(rt.to_reg())
|
||||
}
|
||||
|
||||
fn enc_ldar(ty: Type, rt: Writable<Reg>, rn: Reg) -> u32 {
|
||||
let sz = match ty {
|
||||
I64 => 0b11,
|
||||
@@ -1318,7 +1345,10 @@ impl MachInstEmit for Inst {
|
||||
} => {
|
||||
sink.put4(enc_ccmp_imm(size, rn, imm, nzcv, cond));
|
||||
}
|
||||
&Inst::AtomicRMW { ty, op } => {
|
||||
&Inst::AtomicRMW { ty, op, rs, rt, rn } => {
|
||||
sink.put4(enc_ldal(ty, op, rs, rt, rn));
|
||||
}
|
||||
&Inst::AtomicRMWLoop { ty, op } => {
|
||||
/* Emit this:
|
||||
again:
|
||||
ldaxr{,b,h} x/w27, [x25]
|
||||
@@ -1340,7 +1370,7 @@ impl MachInstEmit for Inst {
|
||||
so that we simply write in the destination, the "2nd arg for op".
|
||||
*/
|
||||
// TODO: We should not hardcode registers here, a better idea would be to
|
||||
// pass some scratch registers in the AtomicRMW pseudo-instruction, and use those
|
||||
// pass some scratch registers in the AtomicRMWLoop pseudo-instruction, and use those
|
||||
let xzr = zero_reg();
|
||||
let x24 = xreg(24);
|
||||
let x25 = xreg(25);
|
||||
|
||||
@@ -5986,7 +5986,7 @@ fn test_aarch64_binemit() {
|
||||
));
|
||||
|
||||
insns.push((
|
||||
Inst::AtomicRMW {
|
||||
Inst::AtomicRMWLoop {
|
||||
ty: I16,
|
||||
op: inst_common::AtomicRmwOp::Xor,
|
||||
},
|
||||
@@ -5996,6 +5996,359 @@ fn test_aarch64_binemit() {
|
||||
|
||||
insns.push((
|
||||
Inst::AtomicRMW {
|
||||
ty: I8,
|
||||
op: AtomicRMWOp::Add,
|
||||
rs: xreg(1),
|
||||
rt: writable_xreg(2),
|
||||
rn: xreg(3),
|
||||
},
|
||||
"6200E138",
|
||||
"ldaddalb w1, w2, [x3]",
|
||||
));
|
||||
insns.push((
|
||||
Inst::AtomicRMW {
|
||||
ty: I16,
|
||||
op: AtomicRMWOp::Add,
|
||||
rs: xreg(4),
|
||||
rt: writable_xreg(5),
|
||||
rn: xreg(6),
|
||||
},
|
||||
"C500E478",
|
||||
"ldaddalh w4, w5, [x6]",
|
||||
));
|
||||
insns.push((
|
||||
Inst::AtomicRMW {
|
||||
ty: I32,
|
||||
op: AtomicRMWOp::Add,
|
||||
rs: xreg(7),
|
||||
rt: writable_xreg(8),
|
||||
rn: xreg(9),
|
||||
},
|
||||
"2801E7B8",
|
||||
"ldaddal w7, w8, [x9]",
|
||||
));
|
||||
insns.push((
|
||||
Inst::AtomicRMW {
|
||||
ty: I64,
|
||||
op: AtomicRMWOp::Add,
|
||||
rs: xreg(10),
|
||||
rt: writable_xreg(11),
|
||||
rn: xreg(12),
|
||||
},
|
||||
"8B01EAF8",
|
||||
"ldaddal x10, x11, [x12]",
|
||||
));
|
||||
insns.push((
|
||||
Inst::AtomicRMW {
|
||||
ty: I8,
|
||||
op: AtomicRMWOp::Clr,
|
||||
rs: xreg(13),
|
||||
rt: writable_xreg(14),
|
||||
rn: xreg(15),
|
||||
},
|
||||
"EE11ED38",
|
||||
"ldclralb w13, w14, [x15]",
|
||||
));
|
||||
insns.push((
|
||||
Inst::AtomicRMW {
|
||||
ty: I16,
|
||||
op: AtomicRMWOp::Clr,
|
||||
rs: xreg(16),
|
||||
rt: writable_xreg(17),
|
||||
rn: xreg(18),
|
||||
},
|
||||
"5112F078",
|
||||
"ldclralh w16, w17, [x18]",
|
||||
));
|
||||
insns.push((
|
||||
Inst::AtomicRMW {
|
||||
ty: I32,
|
||||
op: AtomicRMWOp::Clr,
|
||||
rs: xreg(19),
|
||||
rt: writable_xreg(20),
|
||||
rn: xreg(21),
|
||||
},
|
||||
"B412F3B8",
|
||||
"ldclral w19, w20, [x21]",
|
||||
));
|
||||
insns.push((
|
||||
Inst::AtomicRMW {
|
||||
ty: I64,
|
||||
op: AtomicRMWOp::Clr,
|
||||
rs: xreg(22),
|
||||
rt: writable_xreg(23),
|
||||
rn: xreg(24),
|
||||
},
|
||||
"1713F6F8",
|
||||
"ldclral x22, x23, [x24]",
|
||||
));
|
||||
insns.push((
|
||||
Inst::AtomicRMW {
|
||||
ty: I8,
|
||||
op: AtomicRMWOp::Eor,
|
||||
rs: xreg(25),
|
||||
rt: writable_xreg(26),
|
||||
rn: xreg(27),
|
||||
},
|
||||
"7A23F938",
|
||||
"ldeoralb w25, w26, [x27]",
|
||||
));
|
||||
insns.push((
|
||||
Inst::AtomicRMW {
|
||||
ty: I16,
|
||||
op: AtomicRMWOp::Eor,
|
||||
rs: xreg(28),
|
||||
rt: writable_xreg(29),
|
||||
rn: xreg(30),
|
||||
},
|
||||
"DD23FC78",
|
||||
"ldeoralh w28, fp, [lr]",
|
||||
));
|
||||
insns.push((
|
||||
Inst::AtomicRMW {
|
||||
ty: I32,
|
||||
op: AtomicRMWOp::Eor,
|
||||
rs: xreg(29),
|
||||
rt: writable_xreg(28),
|
||||
rn: xreg(27),
|
||||
},
|
||||
"7C23FDB8",
|
||||
"ldeoral fp, w28, [x27]",
|
||||
));
|
||||
insns.push((
|
||||
Inst::AtomicRMW {
|
||||
ty: I64,
|
||||
op: AtomicRMWOp::Eor,
|
||||
rs: xreg(26),
|
||||
rt: writable_xreg(25),
|
||||
rn: xreg(24),
|
||||
},
|
||||
"1923FAF8",
|
||||
"ldeoral x26, x25, [x24]",
|
||||
));
|
||||
insns.push((
|
||||
Inst::AtomicRMW {
|
||||
ty: I8,
|
||||
op: AtomicRMWOp::Set,
|
||||
rs: xreg(23),
|
||||
rt: writable_xreg(22),
|
||||
rn: xreg(21),
|
||||
},
|
||||
"B632F738",
|
||||
"ldsetalb w23, w22, [x21]",
|
||||
));
|
||||
insns.push((
|
||||
Inst::AtomicRMW {
|
||||
ty: I16,
|
||||
op: AtomicRMWOp::Set,
|
||||
rs: xreg(20),
|
||||
rt: writable_xreg(19),
|
||||
rn: xreg(18),
|
||||
},
|
||||
"5332F478",
|
||||
"ldsetalh w20, w19, [x18]",
|
||||
));
|
||||
insns.push((
|
||||
Inst::AtomicRMW {
|
||||
ty: I32,
|
||||
op: AtomicRMWOp::Set,
|
||||
rs: xreg(17),
|
||||
rt: writable_xreg(16),
|
||||
rn: xreg(15),
|
||||
},
|
||||
"F031F1B8",
|
||||
"ldsetal w17, w16, [x15]",
|
||||
));
|
||||
insns.push((
|
||||
Inst::AtomicRMW {
|
||||
ty: I64,
|
||||
op: AtomicRMWOp::Set,
|
||||
rs: xreg(14),
|
||||
rt: writable_xreg(13),
|
||||
rn: xreg(12),
|
||||
},
|
||||
"8D31EEF8",
|
||||
"ldsetal x14, x13, [x12]",
|
||||
));
|
||||
insns.push((
|
||||
Inst::AtomicRMW {
|
||||
ty: I8,
|
||||
op: AtomicRMWOp::Smax,
|
||||
rs: xreg(11),
|
||||
rt: writable_xreg(10),
|
||||
rn: xreg(9),
|
||||
},
|
||||
"2A41EB38",
|
||||
"ldsmaxalb w11, w10, [x9]",
|
||||
));
|
||||
insns.push((
|
||||
Inst::AtomicRMW {
|
||||
ty: I16,
|
||||
op: AtomicRMWOp::Smax,
|
||||
rs: xreg(8),
|
||||
rt: writable_xreg(7),
|
||||
rn: xreg(6),
|
||||
},
|
||||
"C740E878",
|
||||
"ldsmaxalh w8, w7, [x6]",
|
||||
));
|
||||
insns.push((
|
||||
Inst::AtomicRMW {
|
||||
ty: I32,
|
||||
op: AtomicRMWOp::Smax,
|
||||
rs: xreg(5),
|
||||
rt: writable_xreg(4),
|
||||
rn: xreg(3),
|
||||
},
|
||||
"6440E5B8",
|
||||
"ldsmaxal w5, w4, [x3]",
|
||||
));
|
||||
insns.push((
|
||||
Inst::AtomicRMW {
|
||||
ty: I64,
|
||||
op: AtomicRMWOp::Smax,
|
||||
rs: xreg(2),
|
||||
rt: writable_xreg(1),
|
||||
rn: xreg(0),
|
||||
},
|
||||
"0140E2F8",
|
||||
"ldsmaxal x2, x1, [x0]",
|
||||
));
|
||||
insns.push((
|
||||
Inst::AtomicRMW {
|
||||
ty: I8,
|
||||
op: AtomicRMWOp::Smin,
|
||||
rs: xreg(1),
|
||||
rt: writable_xreg(2),
|
||||
rn: xreg(3),
|
||||
},
|
||||
"6250E138",
|
||||
"ldsminalb w1, w2, [x3]",
|
||||
));
|
||||
insns.push((
|
||||
Inst::AtomicRMW {
|
||||
ty: I16,
|
||||
op: AtomicRMWOp::Smin,
|
||||
rs: xreg(4),
|
||||
rt: writable_xreg(5),
|
||||
rn: xreg(6),
|
||||
},
|
||||
"C550E478",
|
||||
"ldsminalh w4, w5, [x6]",
|
||||
));
|
||||
insns.push((
|
||||
Inst::AtomicRMW {
|
||||
ty: I32,
|
||||
op: AtomicRMWOp::Smin,
|
||||
rs: xreg(7),
|
||||
rt: writable_xreg(8),
|
||||
rn: xreg(9),
|
||||
},
|
||||
"2851E7B8",
|
||||
"ldsminal w7, w8, [x9]",
|
||||
));
|
||||
insns.push((
|
||||
Inst::AtomicRMW {
|
||||
ty: I64,
|
||||
op: AtomicRMWOp::Smin,
|
||||
rs: xreg(10),
|
||||
rt: writable_xreg(11),
|
||||
rn: xreg(12),
|
||||
},
|
||||
"8B51EAF8",
|
||||
"ldsminal x10, x11, [x12]",
|
||||
));
|
||||
insns.push((
|
||||
Inst::AtomicRMW {
|
||||
ty: I8,
|
||||
op: AtomicRMWOp::Umax,
|
||||
rs: xreg(13),
|
||||
rt: writable_xreg(14),
|
||||
rn: xreg(15),
|
||||
},
|
||||
"EE61ED38",
|
||||
"ldumaxalb w13, w14, [x15]",
|
||||
));
|
||||
insns.push((
|
||||
Inst::AtomicRMW {
|
||||
ty: I16,
|
||||
op: AtomicRMWOp::Umax,
|
||||
rs: xreg(16),
|
||||
rt: writable_xreg(17),
|
||||
rn: xreg(18),
|
||||
},
|
||||
"5162F078",
|
||||
"ldumaxalh w16, w17, [x18]",
|
||||
));
|
||||
insns.push((
|
||||
Inst::AtomicRMW {
|
||||
ty: I32,
|
||||
op: AtomicRMWOp::Umax,
|
||||
rs: xreg(19),
|
||||
rt: writable_xreg(20),
|
||||
rn: xreg(21),
|
||||
},
|
||||
"B462F3B8",
|
||||
"ldumaxal w19, w20, [x21]",
|
||||
));
|
||||
insns.push((
|
||||
Inst::AtomicRMW {
|
||||
ty: I64,
|
||||
op: AtomicRMWOp::Umax,
|
||||
rs: xreg(22),
|
||||
rt: writable_xreg(23),
|
||||
rn: xreg(24),
|
||||
},
|
||||
"1763F6F8",
|
||||
"ldumaxal x22, x23, [x24]",
|
||||
));
|
||||
insns.push((
|
||||
Inst::AtomicRMW {
|
||||
ty: I8,
|
||||
op: AtomicRMWOp::Umin,
|
||||
rs: xreg(16),
|
||||
rt: writable_xreg(17),
|
||||
rn: xreg(18),
|
||||
},
|
||||
"5172F038",
|
||||
"lduminalb w16, w17, [x18]",
|
||||
));
|
||||
insns.push((
|
||||
Inst::AtomicRMW {
|
||||
ty: I16,
|
||||
op: AtomicRMWOp::Umin,
|
||||
rs: xreg(19),
|
||||
rt: writable_xreg(20),
|
||||
rn: xreg(21),
|
||||
},
|
||||
"B472F378",
|
||||
"lduminalh w19, w20, [x21]",
|
||||
));
|
||||
insns.push((
|
||||
Inst::AtomicRMW {
|
||||
ty: I32,
|
||||
op: AtomicRMWOp::Umin,
|
||||
rs: xreg(22),
|
||||
rt: writable_xreg(23),
|
||||
rn: xreg(24),
|
||||
},
|
||||
"1773F6B8",
|
||||
"lduminal w22, w23, [x24]",
|
||||
));
|
||||
insns.push((
|
||||
Inst::AtomicRMW {
|
||||
ty: I64,
|
||||
op: AtomicRMWOp::Umin,
|
||||
rs: xreg(25),
|
||||
rt: writable_xreg(26),
|
||||
rn: xreg(27),
|
||||
},
|
||||
"7A73F9F8",
|
||||
"lduminal x25, x26, [x27]",
|
||||
));
|
||||
|
||||
insns.push((
|
||||
Inst::AtomicRMWLoop {
|
||||
ty: I32,
|
||||
op: inst_common::AtomicRmwOp::Xchg,
|
||||
},
|
||||
|
||||
@@ -451,6 +451,19 @@ pub enum VecShiftImmOp {
|
||||
Sshr,
|
||||
}
|
||||
|
||||
/// Atomic read-modify-write operations with acquire-release semantics
|
||||
#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
|
||||
pub enum AtomicRMWOp {
|
||||
Add,
|
||||
Clr,
|
||||
Eor,
|
||||
Set,
|
||||
Smax,
|
||||
Smin,
|
||||
Umax,
|
||||
Umin,
|
||||
}
|
||||
|
||||
/// An operation on the bits of a register. This can be paired with several instruction formats
|
||||
/// below (see `Inst`) in any combination.
|
||||
#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
|
||||
@@ -775,11 +788,22 @@ pub enum Inst {
|
||||
/// x27 (wr) old value
|
||||
/// x24 (wr) scratch reg; value afterwards has no meaning
|
||||
/// x28 (wr) scratch reg; value afterwards has no meaning
|
||||
AtomicRMW {
|
||||
AtomicRMWLoop {
|
||||
ty: Type, // I8, I16, I32 or I64
|
||||
op: inst_common::AtomicRmwOp,
|
||||
},
|
||||
|
||||
/// An atomic read-modify-write operation. These instructions require the
|
||||
/// Large System Extension (LSE) ISA support (FEAT_LSE). The instructions have
|
||||
/// acquire-release semantics.
|
||||
AtomicRMW {
|
||||
op: AtomicRMWOp,
|
||||
rs: Reg,
|
||||
rt: Writable<Reg>,
|
||||
rn: Reg,
|
||||
ty: Type,
|
||||
},
|
||||
|
||||
/// An atomic compare-and-swap operation. This instruction is sequentially consistent.
|
||||
AtomicCAS {
|
||||
rs: Writable<Reg>,
|
||||
@@ -788,10 +812,10 @@ pub enum Inst {
|
||||
ty: Type,
|
||||
},
|
||||
|
||||
/// Similar to AtomicRMW, a compare-and-swap operation implemented using a load-linked
|
||||
/// Similar to AtomicRMWLoop, a compare-and-swap operation implemented using a load-linked
|
||||
/// store-conditional loop.
|
||||
/// This instruction is sequentially consistent.
|
||||
/// Note that the operand conventions, although very similar to AtomicRMW, are different:
|
||||
/// Note that the operand conventions, although very similar to AtomicRMWLoop, are different:
|
||||
///
|
||||
/// x25 (rd) address
|
||||
/// x26 (rd) expected value
|
||||
@@ -1920,13 +1944,18 @@ fn aarch64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) {
|
||||
&Inst::CCmpImm { rn, .. } => {
|
||||
collector.add_use(rn);
|
||||
}
|
||||
&Inst::AtomicRMW { .. } => {
|
||||
&Inst::AtomicRMWLoop { .. } => {
|
||||
collector.add_use(xreg(25));
|
||||
collector.add_use(xreg(26));
|
||||
collector.add_def(writable_xreg(24));
|
||||
collector.add_def(writable_xreg(27));
|
||||
collector.add_def(writable_xreg(28));
|
||||
}
|
||||
&Inst::AtomicRMW { rs, rt, rn, .. } => {
|
||||
collector.add_use(rs);
|
||||
collector.add_def(rt);
|
||||
collector.add_use(rn);
|
||||
}
|
||||
&Inst::AtomicCAS { rs, rt, rn, .. } => {
|
||||
collector.add_mod(rs);
|
||||
collector.add_use(rt);
|
||||
@@ -2562,9 +2591,19 @@ fn aarch64_map_regs<RUM: RegUsageMapper>(inst: &mut Inst, mapper: &RUM) {
|
||||
&mut Inst::CCmpImm { ref mut rn, .. } => {
|
||||
map_use(mapper, rn);
|
||||
}
|
||||
&mut Inst::AtomicRMW { .. } => {
|
||||
&mut Inst::AtomicRMWLoop { .. } => {
|
||||
// There are no vregs to map in this insn.
|
||||
}
|
||||
&mut Inst::AtomicRMW {
|
||||
ref mut rs,
|
||||
ref mut rt,
|
||||
ref mut rn,
|
||||
..
|
||||
} => {
|
||||
map_use(mapper, rs);
|
||||
map_def(mapper, rt);
|
||||
map_use(mapper, rn);
|
||||
}
|
||||
&mut Inst::AtomicCAS {
|
||||
ref mut rs,
|
||||
ref mut rt,
|
||||
@@ -3618,7 +3657,31 @@ impl Inst {
|
||||
let cond = cond.show_rru(mb_rru);
|
||||
format!("ccmp {}, {}, {}, {}", rn, imm, nzcv, cond)
|
||||
}
|
||||
&Inst::AtomicRMW { ty, op, .. } => {
|
||||
&Inst::AtomicRMW { rs, rt, rn, ty, op } => {
|
||||
let op = match op {
|
||||
AtomicRMWOp::Add => "ldaddal",
|
||||
AtomicRMWOp::Clr => "ldclral",
|
||||
AtomicRMWOp::Eor => "ldeoral",
|
||||
AtomicRMWOp::Set => "ldsetal",
|
||||
AtomicRMWOp::Smax => "ldsmaxal",
|
||||
AtomicRMWOp::Umax => "ldumaxal",
|
||||
AtomicRMWOp::Smin => "ldsminal",
|
||||
AtomicRMWOp::Umin => "lduminal",
|
||||
};
|
||||
|
||||
let size = OperandSize::from_ty(ty);
|
||||
let rs = show_ireg_sized(rs, mb_rru, size);
|
||||
let rt = show_ireg_sized(rt.to_reg(), mb_rru, size);
|
||||
let rn = rn.show_rru(mb_rru);
|
||||
|
||||
let ty_suffix = match ty {
|
||||
I8 => "b",
|
||||
I16 => "h",
|
||||
_ => "",
|
||||
};
|
||||
format!("{}{} {}, {}, [{}]", op, ty_suffix, rs, rt, rn)
|
||||
}
|
||||
&Inst::AtomicRMWLoop { ty, op, .. } => {
|
||||
format!(
|
||||
"atomically {{ {}_bits_at_[x25]) {:?}= x26 ; x27 = old_value_at_[x25]; x24,x28 = trash }}",
|
||||
ty.bits(), op)
|
||||
|
||||
@@ -1529,20 +1529,41 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
||||
let mut r_arg2 = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None);
|
||||
let ty_access = ty.unwrap();
|
||||
assert!(is_valid_atomic_transaction_ty(ty_access));
|
||||
// Make sure that both args are in virtual regs, since in effect
|
||||
// we have to do a parallel copy to get them safely to the AtomicRMW input
|
||||
// regs, and that's not guaranteed safe if either is in a real reg.
|
||||
r_addr = ctx.ensure_in_vreg(r_addr, I64);
|
||||
r_arg2 = ctx.ensure_in_vreg(r_arg2, I64);
|
||||
// Move the args to the preordained AtomicRMW input regs
|
||||
ctx.emit(Inst::gen_move(Writable::from_reg(xreg(25)), r_addr, I64));
|
||||
ctx.emit(Inst::gen_move(Writable::from_reg(xreg(26)), r_arg2, I64));
|
||||
// Now the AtomicRMW insn itself
|
||||
|
||||
let op = inst_common::AtomicRmwOp::from(ctx.data(insn).atomic_rmw_op().unwrap());
|
||||
ctx.emit(Inst::AtomicRMW { ty: ty_access, op });
|
||||
// And finally, copy the preordained AtomicRMW output reg to its destination.
|
||||
ctx.emit(Inst::gen_move(r_dst, xreg(27), I64));
|
||||
// Also, x24 and x28 are trashed. `fn aarch64_get_regs` must mention that.
|
||||
let lse_op = match op {
|
||||
AtomicRmwOp::Add => Some(AtomicRMWOp::Add),
|
||||
AtomicRmwOp::And => Some(AtomicRMWOp::Clr),
|
||||
AtomicRmwOp::Xor => Some(AtomicRMWOp::Eor),
|
||||
AtomicRmwOp::Or => Some(AtomicRMWOp::Set),
|
||||
AtomicRmwOp::Smax => Some(AtomicRMWOp::Smax),
|
||||
AtomicRmwOp::Umax => Some(AtomicRMWOp::Umax),
|
||||
AtomicRmwOp::Smin => Some(AtomicRMWOp::Smin),
|
||||
AtomicRmwOp::Umin => Some(AtomicRMWOp::Umin),
|
||||
_ => None,
|
||||
};
|
||||
if isa_flags.use_lse() && lse_op.is_some() {
|
||||
ctx.emit(Inst::AtomicRMW {
|
||||
op: lse_op.unwrap(),
|
||||
rs: r_arg2,
|
||||
rt: r_dst,
|
||||
rn: r_addr,
|
||||
ty: ty_access,
|
||||
});
|
||||
} else {
|
||||
// Make sure that both args are in virtual regs, since in effect
|
||||
// we have to do a parallel copy to get them safely to the AtomicRMW input
|
||||
// regs, and that's not guaranteed safe if either is in a real reg.
|
||||
r_addr = ctx.ensure_in_vreg(r_addr, I64);
|
||||
r_arg2 = ctx.ensure_in_vreg(r_arg2, I64);
|
||||
// Move the args to the preordained AtomicRMW input regs
|
||||
ctx.emit(Inst::gen_move(Writable::from_reg(xreg(25)), r_addr, I64));
|
||||
ctx.emit(Inst::gen_move(Writable::from_reg(xreg(26)), r_arg2, I64));
|
||||
ctx.emit(Inst::AtomicRMWLoop { ty: ty_access, op });
|
||||
// And finally, copy the preordained AtomicRMW output reg to its destination.
|
||||
ctx.emit(Inst::gen_move(r_dst, xreg(27), I64));
|
||||
// Also, x24 and x28 are trashed. `fn aarch64_get_regs` must mention that.
|
||||
}
|
||||
}
|
||||
|
||||
Opcode::AtomicCas => {
|
||||
|
||||
Reference in New Issue
Block a user