Merge pull request #3128 from sparker-arm/aarch64-atomics
Re-implement AArch64 atomic load and stores
This commit is contained in:
@@ -498,7 +498,7 @@ fn enc_dmb_ish() -> u32 {
|
||||
0xD5033BBF
|
||||
}
|
||||
|
||||
fn enc_ldxr(ty: Type, rt: Writable<Reg>, rn: Reg) -> u32 {
|
||||
fn enc_ldar(ty: Type, rt: Writable<Reg>, rn: Reg) -> u32 {
|
||||
let sz = match ty {
|
||||
I64 => 0b11,
|
||||
I32 => 0b10,
|
||||
@@ -506,13 +506,13 @@ fn enc_ldxr(ty: Type, rt: Writable<Reg>, rn: Reg) -> u32 {
|
||||
I8 => 0b00,
|
||||
_ => unreachable!(),
|
||||
};
|
||||
0b00001000_01011111_01111100_00000000
|
||||
0b00_001000_1_1_0_11111_1_11111_00000_00000
|
||||
| (sz << 30)
|
||||
| (machreg_to_gpr(rn) << 5)
|
||||
| machreg_to_gpr(rt.to_reg())
|
||||
}
|
||||
|
||||
fn enc_stxr(ty: Type, rs: Writable<Reg>, rt: Reg, rn: Reg) -> u32 {
|
||||
fn enc_stlr(ty: Type, rt: Reg, rn: Reg) -> u32 {
|
||||
let sz = match ty {
|
||||
I64 => 0b11,
|
||||
I32 => 0b10,
|
||||
@@ -520,7 +520,35 @@ fn enc_stxr(ty: Type, rs: Writable<Reg>, rt: Reg, rn: Reg) -> u32 {
|
||||
I8 => 0b00,
|
||||
_ => unreachable!(),
|
||||
};
|
||||
0b00001000_00000000_01111100_00000000
|
||||
0b00_001000_100_11111_1_11111_00000_00000
|
||||
| (sz << 30)
|
||||
| (machreg_to_gpr(rn) << 5)
|
||||
| machreg_to_gpr(rt)
|
||||
}
|
||||
|
||||
fn enc_ldaxr(ty: Type, rt: Writable<Reg>, rn: Reg) -> u32 {
|
||||
let sz = match ty {
|
||||
I64 => 0b11,
|
||||
I32 => 0b10,
|
||||
I16 => 0b01,
|
||||
I8 => 0b00,
|
||||
_ => unreachable!(),
|
||||
};
|
||||
0b00_001000_0_1_0_11111_1_11111_00000_00000
|
||||
| (sz << 30)
|
||||
| (machreg_to_gpr(rn) << 5)
|
||||
| machreg_to_gpr(rt.to_reg())
|
||||
}
|
||||
|
||||
fn enc_stlxr(ty: Type, rs: Writable<Reg>, rt: Reg, rn: Reg) -> u32 {
|
||||
let sz = match ty {
|
||||
I64 => 0b11,
|
||||
I32 => 0b10,
|
||||
I16 => 0b01,
|
||||
I8 => 0b00,
|
||||
_ => unreachable!(),
|
||||
};
|
||||
0b00_001000_000_00000_1_11111_00000_00000
|
||||
| (sz << 30)
|
||||
| (machreg_to_gpr(rs.to_reg()) << 16)
|
||||
| (machreg_to_gpr(rn) << 5)
|
||||
@@ -1286,20 +1314,18 @@ impl MachInstEmit for Inst {
|
||||
}
|
||||
&Inst::AtomicRMW { ty, op } => {
|
||||
/* Emit this:
|
||||
dmb ish
|
||||
again:
|
||||
ldxr{,b,h} x/w27, [x25]
|
||||
ldaxr{,b,h} x/w27, [x25]
|
||||
op x28, x27, x26 // op is add,sub,and,orr,eor
|
||||
stxr{,b,h} w24, x/w28, [x25]
|
||||
stlxr{,b,h} w24, x/w28, [x25]
|
||||
cbnz x24, again
|
||||
dmb ish
|
||||
|
||||
Operand conventions:
|
||||
IN: x25 (addr), x26 (2nd arg for op)
|
||||
OUT: x27 (old value), x24 (trashed), x28 (trashed)
|
||||
|
||||
It is unfortunate that, per the ARM documentation, x28 cannot be used for
|
||||
both the store-data and success-flag operands of stxr. This causes the
|
||||
both the store-data and success-flag operands of stlxr. This causes the
|
||||
instruction's behaviour to be "CONSTRAINED UNPREDICTABLE", so we use x24
|
||||
instead for the success-flag.
|
||||
|
||||
@@ -1320,15 +1346,13 @@ impl MachInstEmit for Inst {
|
||||
let x28wr = writable_xreg(28);
|
||||
let again_label = sink.get_label();
|
||||
|
||||
sink.put4(enc_dmb_ish()); // dmb ish
|
||||
|
||||
// again:
|
||||
sink.bind_label(again_label);
|
||||
let srcloc = state.cur_srcloc();
|
||||
if srcloc != SourceLoc::default() {
|
||||
sink.add_trap(srcloc, TrapCode::HeapOutOfBounds);
|
||||
}
|
||||
sink.put4(enc_ldxr(ty, x27wr, x25)); // ldxr x27, [x25]
|
||||
sink.put4(enc_ldaxr(ty, x27wr, x25)); // ldaxr x27, [x25]
|
||||
|
||||
match op {
|
||||
AtomicRmwOp::Xchg => {
|
||||
@@ -1420,19 +1444,17 @@ impl MachInstEmit for Inst {
|
||||
if srcloc != SourceLoc::default() {
|
||||
sink.add_trap(srcloc, TrapCode::HeapOutOfBounds);
|
||||
}
|
||||
sink.put4(enc_stxr(ty, x24wr, x28, x25)); // stxr w24, x28, [x25]
|
||||
sink.put4(enc_stlxr(ty, x24wr, x28, x25)); // stlxr w24, x28, [x25]
|
||||
|
||||
// cbnz w24, again
|
||||
// Note, we're actually testing x24, and relying on the default zero-high-half
|
||||
// rule in the assignment that `stxr` does.
|
||||
// rule in the assignment that `stlxr` does.
|
||||
let br_offset = sink.cur_offset();
|
||||
sink.put4(enc_conditional_br(
|
||||
BranchTarget::Label(again_label),
|
||||
CondBrKind::NotZero(x24),
|
||||
));
|
||||
sink.use_label_at_offset(br_offset, again_label, LabelUse::Branch19);
|
||||
|
||||
sink.put4(enc_dmb_ish()); // dmb ish
|
||||
}
|
||||
&Inst::AtomicCAS { rs, rt, rn, ty } => {
|
||||
let size = match ty {
|
||||
@@ -1447,22 +1469,18 @@ impl MachInstEmit for Inst {
|
||||
}
|
||||
&Inst::AtomicCASLoop { ty } => {
|
||||
/* Emit this:
|
||||
dmb ish
|
||||
again:
|
||||
ldxr{,b,h} x/w27, [x25]
|
||||
and x24, x26, MASK (= 2^size_bits - 1)
|
||||
cmp x27, x24
|
||||
ldaxr{,b,h} x/w27, [x25]
|
||||
cmp x27, x/w26 uxt{b,h}
|
||||
b.ne out
|
||||
stxr{,b,h} w24, x/w28, [x25]
|
||||
stlxr{,b,h} w24, x/w28, [x25]
|
||||
cbnz x24, again
|
||||
out:
|
||||
dmb ish
|
||||
|
||||
Operand conventions:
|
||||
IN: x25 (addr), x26 (expected value), x28 (replacement value)
|
||||
OUT: x27 (old value), x24 (trashed)
|
||||
*/
|
||||
let xzr = zero_reg();
|
||||
let x24 = xreg(24);
|
||||
let x25 = xreg(25);
|
||||
let x26 = xreg(26);
|
||||
@@ -1474,37 +1492,25 @@ impl MachInstEmit for Inst {
|
||||
let again_label = sink.get_label();
|
||||
let out_label = sink.get_label();
|
||||
|
||||
sink.put4(enc_dmb_ish()); // dmb ish
|
||||
|
||||
// again:
|
||||
sink.bind_label(again_label);
|
||||
let srcloc = state.cur_srcloc();
|
||||
if srcloc != SourceLoc::default() {
|
||||
sink.add_trap(srcloc, TrapCode::HeapOutOfBounds);
|
||||
}
|
||||
sink.put4(enc_ldxr(ty, x27wr, x25)); // ldxr x27, [x25]
|
||||
// ldaxr x27, [x25]
|
||||
sink.put4(enc_ldaxr(ty, x27wr, x25));
|
||||
|
||||
if ty == I64 {
|
||||
// mov x24, x26
|
||||
sink.put4(enc_arith_rrr(0b101_01010_00_0, 0b000000, x24wr, xzr, x26))
|
||||
} else {
|
||||
// and x24, x26, 0xFF/0xFFFF/0xFFFFFFFF
|
||||
let (mask, s) = match ty {
|
||||
I8 => (0xFF, 7),
|
||||
I16 => (0xFFFF, 15),
|
||||
I32 => (0xFFFFFFFF, 31),
|
||||
_ => unreachable!(),
|
||||
};
|
||||
sink.put4(enc_arith_rr_imml(
|
||||
0b100_100100,
|
||||
ImmLogic::from_n_r_s(mask, true, 0, s, OperandSize::Size64).enc_bits(),
|
||||
x26,
|
||||
x24wr,
|
||||
))
|
||||
}
|
||||
|
||||
// cmp x27, x24 (== subs xzr, x27, x24)
|
||||
sink.put4(enc_arith_rrr(0b111_01011_00_0, 0b000000, xzrwr, x27, x24));
|
||||
// The top 32-bits are zero-extended by the ldaxr so we don't
|
||||
// have to use UXTW, just the x-form of the register.
|
||||
let (bit21, extend_op) = match ty {
|
||||
I8 => (0b1, 0b000000),
|
||||
I16 => (0b1, 0b001000),
|
||||
_ => (0b0, 0b000000),
|
||||
};
|
||||
let bits_31_21 = 0b111_01011_000 | bit21;
|
||||
// cmp x27, x26 (== subs xzr, x27, x26)
|
||||
sink.put4(enc_arith_rrr(bits_31_21, extend_op, xzrwr, x27, x26));
|
||||
|
||||
// b.ne out
|
||||
let br_out_offset = sink.cur_offset();
|
||||
@@ -1518,11 +1524,11 @@ impl MachInstEmit for Inst {
|
||||
if srcloc != SourceLoc::default() {
|
||||
sink.add_trap(srcloc, TrapCode::HeapOutOfBounds);
|
||||
}
|
||||
sink.put4(enc_stxr(ty, x24wr, x28, x25)); // stxr w24, x28, [x25]
|
||||
sink.put4(enc_stlxr(ty, x24wr, x28, x25)); // stlxr w24, x28, [x25]
|
||||
|
||||
// cbnz w24, again.
|
||||
// Note, we're actually testing x24, and relying on the default zero-high-half
|
||||
// rule in the assignment that `stxr` does.
|
||||
// rule in the assignment that `stlxr` does.
|
||||
let br_again_offset = sink.cur_offset();
|
||||
sink.put4(enc_conditional_br(
|
||||
BranchTarget::Label(again_label),
|
||||
@@ -1532,46 +1538,12 @@ impl MachInstEmit for Inst {
|
||||
|
||||
// out:
|
||||
sink.bind_label(out_label);
|
||||
sink.put4(enc_dmb_ish()); // dmb ish
|
||||
}
|
||||
&Inst::AtomicLoad { ty, r_data, r_addr } => {
|
||||
let op = match ty {
|
||||
I8 => 0b0011100001,
|
||||
I16 => 0b0111100001,
|
||||
I32 => 0b1011100001,
|
||||
I64 => 0b1111100001,
|
||||
_ => unreachable!(),
|
||||
};
|
||||
sink.put4(enc_dmb_ish()); // dmb ish
|
||||
|
||||
let srcloc = state.cur_srcloc();
|
||||
if srcloc != SourceLoc::default() {
|
||||
sink.add_trap(srcloc, TrapCode::HeapOutOfBounds);
|
||||
}
|
||||
let uimm12scaled_zero = UImm12Scaled::zero(I8 /*irrelevant*/);
|
||||
sink.put4(enc_ldst_uimm12(
|
||||
op,
|
||||
uimm12scaled_zero,
|
||||
r_addr,
|
||||
r_data.to_reg(),
|
||||
));
|
||||
&Inst::LoadAcquire { access_ty, rt, rn } => {
|
||||
sink.put4(enc_ldar(access_ty, rt, rn));
|
||||
}
|
||||
&Inst::AtomicStore { ty, r_data, r_addr } => {
|
||||
let op = match ty {
|
||||
I8 => 0b0011100000,
|
||||
I16 => 0b0111100000,
|
||||
I32 => 0b1011100000,
|
||||
I64 => 0b1111100000,
|
||||
_ => unreachable!(),
|
||||
};
|
||||
|
||||
let srcloc = state.cur_srcloc();
|
||||
if srcloc != SourceLoc::default() {
|
||||
sink.add_trap(srcloc, TrapCode::HeapOutOfBounds);
|
||||
}
|
||||
let uimm12scaled_zero = UImm12Scaled::zero(I8 /*irrelevant*/);
|
||||
sink.put4(enc_ldst_uimm12(op, uimm12scaled_zero, r_addr, r_data));
|
||||
sink.put4(enc_dmb_ish()); // dmb ish
|
||||
&Inst::StoreRelease { access_ty, rt, rn } => {
|
||||
sink.put4(enc_stlr(access_ty, rt, rn));
|
||||
}
|
||||
&Inst::Fence {} => {
|
||||
sink.put4(enc_dmb_ish()); // dmb ish
|
||||
|
||||
@@ -5891,7 +5891,7 @@ fn test_aarch64_binemit() {
|
||||
ty: I16,
|
||||
op: inst_common::AtomicRmwOp::Xor,
|
||||
},
|
||||
"BF3B03D53B7F5F487C031ACA3C7F1848B8FFFFB5BF3B03D5",
|
||||
"3BFF5F487C031ACA3CFF1848B8FFFFB5",
|
||||
"atomically { 16_bits_at_[x25]) Xor= x26 ; x27 = old_value_at_[x25]; x24,x28 = trash }",
|
||||
));
|
||||
|
||||
@@ -5900,7 +5900,7 @@ fn test_aarch64_binemit() {
|
||||
ty: I32,
|
||||
op: inst_common::AtomicRmwOp::Xchg,
|
||||
},
|
||||
"BF3B03D53B7F5F88FC031AAA3C7F1888B8FFFFB5BF3B03D5",
|
||||
"3BFF5F88FC031AAA3CFF1888B8FFFFB5",
|
||||
"atomically { 32_bits_at_[x25]) Xchg= x26 ; x27 = old_value_at_[x25]; x24,x28 = trash }",
|
||||
));
|
||||
insns.push((
|
||||
@@ -5947,56 +5947,112 @@ fn test_aarch64_binemit() {
|
||||
Inst::AtomicCASLoop {
|
||||
ty: I8,
|
||||
},
|
||||
"BF3B03D53B7F5F08581F40927F0318EB610000543C7F180878FFFFB5BF3B03D5",
|
||||
"3BFF5F087F033AEB610000543CFF180898FFFFB5",
|
||||
"atomically { compare-and-swap(8_bits_at_[x25], x26 -> x28), x27 = old_value_at_[x25]; x24 = trash }"
|
||||
));
|
||||
|
||||
insns.push((
|
||||
Inst::AtomicCASLoop {
|
||||
ty: I16,
|
||||
},
|
||||
"3BFF5F487F233AEB610000543CFF184898FFFFB5",
|
||||
"atomically { compare-and-swap(16_bits_at_[x25], x26 -> x28), x27 = old_value_at_[x25]; x24 = trash }"
|
||||
));
|
||||
|
||||
insns.push((
|
||||
Inst::AtomicCASLoop {
|
||||
ty: I32,
|
||||
},
|
||||
"3BFF5F887F031AEB610000543CFF188898FFFFB5",
|
||||
"atomically { compare-and-swap(32_bits_at_[x25], x26 -> x28), x27 = old_value_at_[x25]; x24 = trash }"
|
||||
));
|
||||
|
||||
insns.push((
|
||||
Inst::AtomicCASLoop {
|
||||
ty: I64,
|
||||
},
|
||||
"BF3B03D53B7F5FC8F8031AAA7F0318EB610000543C7F18C878FFFFB5BF3B03D5",
|
||||
"3BFF5FC87F031AEB610000543CFF18C898FFFFB5",
|
||||
"atomically { compare-and-swap(64_bits_at_[x25], x26 -> x28), x27 = old_value_at_[x25]; x24 = trash }"
|
||||
));
|
||||
|
||||
insns.push((
|
||||
Inst::AtomicLoad {
|
||||
ty: I8,
|
||||
r_data: writable_xreg(7),
|
||||
r_addr: xreg(28),
|
||||
Inst::LoadAcquire {
|
||||
access_ty: I8,
|
||||
rt: writable_xreg(7),
|
||||
rn: xreg(28),
|
||||
},
|
||||
"BF3B03D587034039",
|
||||
"atomically { x7 = zero_extend_8_bits_at[x28] }",
|
||||
"87FFDF08",
|
||||
"ldarb w7, [x28]",
|
||||
));
|
||||
|
||||
insns.push((
|
||||
Inst::AtomicLoad {
|
||||
ty: I64,
|
||||
r_data: writable_xreg(28),
|
||||
r_addr: xreg(7),
|
||||
Inst::LoadAcquire {
|
||||
access_ty: I16,
|
||||
rt: writable_xreg(2),
|
||||
rn: xreg(3),
|
||||
},
|
||||
"BF3B03D5FC0040F9",
|
||||
"atomically { x28 = zero_extend_64_bits_at[x7] }",
|
||||
"62FCDF48",
|
||||
"ldarh w2, [x3]",
|
||||
));
|
||||
|
||||
insns.push((
|
||||
Inst::AtomicStore {
|
||||
ty: I16,
|
||||
r_data: xreg(17),
|
||||
r_addr: xreg(8),
|
||||
Inst::LoadAcquire {
|
||||
access_ty: I32,
|
||||
rt: writable_xreg(15),
|
||||
rn: xreg(0),
|
||||
},
|
||||
"11010079BF3B03D5",
|
||||
"atomically { 16_bits_at[x8] = x17 }",
|
||||
"0FFCDF88",
|
||||
"ldar w15, [x0]",
|
||||
));
|
||||
|
||||
insns.push((
|
||||
Inst::AtomicStore {
|
||||
ty: I32,
|
||||
r_data: xreg(18),
|
||||
r_addr: xreg(7),
|
||||
Inst::LoadAcquire {
|
||||
access_ty: I64,
|
||||
rt: writable_xreg(28),
|
||||
rn: xreg(7),
|
||||
},
|
||||
"F20000B9BF3B03D5",
|
||||
"atomically { 32_bits_at[x7] = x18 }",
|
||||
"FCFCDFC8",
|
||||
"ldar x28, [x7]",
|
||||
));
|
||||
|
||||
insns.push((
|
||||
Inst::StoreRelease {
|
||||
access_ty: I8,
|
||||
rt: xreg(7),
|
||||
rn: xreg(28),
|
||||
},
|
||||
"87FF9F08",
|
||||
"stlrb w7, [x28]",
|
||||
));
|
||||
|
||||
insns.push((
|
||||
Inst::StoreRelease {
|
||||
access_ty: I16,
|
||||
rt: xreg(2),
|
||||
rn: xreg(3),
|
||||
},
|
||||
"62FC9F48",
|
||||
"stlrh w2, [x3]",
|
||||
));
|
||||
|
||||
insns.push((
|
||||
Inst::StoreRelease {
|
||||
access_ty: I32,
|
||||
rt: xreg(15),
|
||||
rn: xreg(0),
|
||||
},
|
||||
"0FFC9F88",
|
||||
"stlr w15, [x0]",
|
||||
));
|
||||
|
||||
insns.push((
|
||||
Inst::StoreRelease {
|
||||
access_ty: I64,
|
||||
rt: xreg(28),
|
||||
rn: xreg(7),
|
||||
},
|
||||
"FCFC9FC8",
|
||||
"stlr x28, [x7]",
|
||||
));
|
||||
|
||||
insns.push((Inst::Fence {}, "BF3B03D5", "dmb ish"));
|
||||
|
||||
@@ -789,10 +789,9 @@ pub enum Inst {
|
||||
},
|
||||
|
||||
/// Similar to AtomicRMW, a compare-and-swap operation implemented using a load-linked
|
||||
/// store-conditional loop. The sequence is both preceded and followed by a fence which is
|
||||
/// at least as comprehensive as that of the `Fence` instruction below. This instruction
|
||||
/// is sequentially consistent. Note that the operand conventions, although very similar
|
||||
/// to AtomicRMW, are different:
|
||||
/// store-conditional loop.
|
||||
/// This instruction is sequentially consistent.
|
||||
/// Note that the operand conventions, although very similar to AtomicRMW, are different:
|
||||
///
|
||||
/// x25 (rd) address
|
||||
/// x26 (rd) expected value
|
||||
@@ -803,22 +802,21 @@ pub enum Inst {
|
||||
ty: Type, // I8, I16, I32 or I64
|
||||
},
|
||||
|
||||
/// Read `ty` bits from address `r_addr`, zero extend the loaded value to 64 bits and put it
|
||||
/// in `r_data`. The load instruction is preceded by a fence at least as comprehensive as
|
||||
/// that of the `Fence` instruction below. This instruction is sequentially consistent.
|
||||
AtomicLoad {
|
||||
ty: Type, // I8, I16, I32 or I64
|
||||
r_data: Writable<Reg>,
|
||||
r_addr: Reg,
|
||||
/// Read `access_ty` bits from address `rt`, either 8, 16, 32 or 64-bits, and put
|
||||
/// it in `rn`, optionally zero-extending to fill a word or double word result.
|
||||
/// This instruction is sequentially consistent.
|
||||
LoadAcquire {
|
||||
access_ty: Type, // I8, I16, I32 or I64
|
||||
rt: Writable<Reg>,
|
||||
rn: Reg,
|
||||
},
|
||||
|
||||
/// Write the lowest `ty` bits of `r_data` to address `r_addr`, with a memory fence
|
||||
/// instruction following the store. The fence is at least as comprehensive as that of the
|
||||
/// `Fence` instruction below. This instruction is sequentially consistent.
|
||||
AtomicStore {
|
||||
ty: Type, // I8, I16, I32 or I64
|
||||
r_data: Reg,
|
||||
r_addr: Reg,
|
||||
/// Write the lowest `ty` bits of `rt` to address `rn`.
|
||||
/// This instruction is sequentially consistent.
|
||||
StoreRelease {
|
||||
access_ty: Type, // I8, I16, I32 or I64
|
||||
rt: Reg,
|
||||
rn: Reg,
|
||||
},
|
||||
|
||||
/// A memory fence. This must provide ordering to ensure that, at a minimum, neither loads
|
||||
@@ -1940,13 +1938,13 @@ fn aarch64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) {
|
||||
collector.add_def(writable_xreg(24));
|
||||
collector.add_def(writable_xreg(27));
|
||||
}
|
||||
&Inst::AtomicLoad { r_data, r_addr, .. } => {
|
||||
collector.add_use(r_addr);
|
||||
collector.add_def(r_data);
|
||||
&Inst::LoadAcquire { rt, rn, .. } => {
|
||||
collector.add_use(rn);
|
||||
collector.add_def(rt);
|
||||
}
|
||||
&Inst::AtomicStore { r_data, r_addr, .. } => {
|
||||
collector.add_use(r_addr);
|
||||
collector.add_use(r_data);
|
||||
&Inst::StoreRelease { rt, rn, .. } => {
|
||||
collector.add_use(rn);
|
||||
collector.add_use(rt);
|
||||
}
|
||||
&Inst::Fence {} => {}
|
||||
&Inst::FpuMove64 { rd, rn } => {
|
||||
@@ -2579,21 +2577,21 @@ fn aarch64_map_regs<RUM: RegUsageMapper>(inst: &mut Inst, mapper: &RUM) {
|
||||
&mut Inst::AtomicCASLoop { .. } => {
|
||||
// There are no vregs to map in this insn.
|
||||
}
|
||||
&mut Inst::AtomicLoad {
|
||||
ref mut r_data,
|
||||
ref mut r_addr,
|
||||
&mut Inst::LoadAcquire {
|
||||
ref mut rt,
|
||||
ref mut rn,
|
||||
..
|
||||
} => {
|
||||
map_def(mapper, r_data);
|
||||
map_use(mapper, r_addr);
|
||||
map_def(mapper, rt);
|
||||
map_use(mapper, rn);
|
||||
}
|
||||
&mut Inst::AtomicStore {
|
||||
ref mut r_data,
|
||||
ref mut r_addr,
|
||||
&mut Inst::StoreRelease {
|
||||
ref mut rt,
|
||||
ref mut rn,
|
||||
..
|
||||
} => {
|
||||
map_use(mapper, r_data);
|
||||
map_use(mapper, r_addr);
|
||||
map_use(mapper, rt);
|
||||
map_use(mapper, rn);
|
||||
}
|
||||
&mut Inst::Fence {} => {}
|
||||
&mut Inst::FpuMove64 {
|
||||
@@ -3643,25 +3641,35 @@ impl Inst {
|
||||
"atomically {{ compare-and-swap({}_bits_at_[x25], x26 -> x28), x27 = old_value_at_[x25]; x24 = trash }}",
|
||||
ty.bits())
|
||||
}
|
||||
&Inst::AtomicLoad {
|
||||
ty, r_data, r_addr, ..
|
||||
&Inst::LoadAcquire {
|
||||
access_ty, rt, rn, ..
|
||||
} => {
|
||||
format!(
|
||||
"atomically {{ {} = zero_extend_{}_bits_at[{}] }}",
|
||||
r_data.show_rru(mb_rru),
|
||||
ty.bits(),
|
||||
r_addr.show_rru(mb_rru)
|
||||
)
|
||||
let (op, ty) = match access_ty {
|
||||
I8 => ("ldarb", I32),
|
||||
I16 => ("ldarh", I32),
|
||||
I32 => ("ldar", I32),
|
||||
I64 => ("ldar", I64),
|
||||
_ => panic!("Unsupported type: {}", access_ty),
|
||||
};
|
||||
let size = OperandSize::from_ty(ty);
|
||||
let rt = show_ireg_sized(rt.to_reg(), mb_rru, size);
|
||||
let rn = rn.show_rru(mb_rru);
|
||||
format!("{} {}, [{}]", op, rt, rn)
|
||||
}
|
||||
&Inst::AtomicStore {
|
||||
ty, r_data, r_addr, ..
|
||||
&Inst::StoreRelease {
|
||||
access_ty, rt, rn, ..
|
||||
} => {
|
||||
format!(
|
||||
"atomically {{ {}_bits_at[{}] = {} }}",
|
||||
ty.bits(),
|
||||
r_addr.show_rru(mb_rru),
|
||||
r_data.show_rru(mb_rru)
|
||||
)
|
||||
let (op, ty) = match access_ty {
|
||||
I8 => ("stlrb", I32),
|
||||
I16 => ("stlrh", I32),
|
||||
I32 => ("stlr", I32),
|
||||
I64 => ("stlr", I64),
|
||||
_ => panic!("Unsupported type: {}", access_ty),
|
||||
};
|
||||
let size = OperandSize::from_ty(ty);
|
||||
let rt = show_ireg_sized(rt, mb_rru, size);
|
||||
let rn = rn.show_rru(mb_rru);
|
||||
format!("{} {}, [{}]", op, rt, rn)
|
||||
}
|
||||
&Inst::Fence {} => {
|
||||
format!("dmb ish")
|
||||
|
||||
@@ -1740,6 +1740,22 @@ pub(crate) fn is_valid_atomic_transaction_ty(ty: Type) -> bool {
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn emit_atomic_load<C: LowerCtx<I = Inst>>(
|
||||
ctx: &mut C,
|
||||
rt: Writable<Reg>,
|
||||
insn: IRInst,
|
||||
) {
|
||||
assert!(ctx.data(insn).opcode() == Opcode::AtomicLoad);
|
||||
let inputs = insn_inputs(ctx, insn);
|
||||
let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
|
||||
let access_ty = ctx.output_ty(insn, 0);
|
||||
assert!(is_valid_atomic_transaction_ty(access_ty));
|
||||
// We're ignoring the result type of the load because the LoadAcquire will
|
||||
// explicitly zero extend to the nearest word, and also zero the high half
|
||||
// of an X register.
|
||||
ctx.emit(Inst::LoadAcquire { access_ty, rt, rn });
|
||||
}
|
||||
|
||||
fn load_op_to_ty(op: Opcode) -> Option<Type> {
|
||||
match op {
|
||||
Opcode::Sload8 | Opcode::Uload8 | Opcode::Sload8Complex | Opcode::Uload8Complex => Some(I8),
|
||||
|
||||
@@ -521,6 +521,19 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
||||
}
|
||||
|
||||
Opcode::Uextend | Opcode::Sextend => {
|
||||
if op == Opcode::Uextend {
|
||||
let inputs = ctx.get_input_as_source_or_const(inputs[0].insn, inputs[0].input);
|
||||
if let Some((atomic_load, 0)) = inputs.inst {
|
||||
if ctx.data(atomic_load).opcode() == Opcode::AtomicLoad {
|
||||
let output_ty = ty.unwrap();
|
||||
assert!(output_ty == I32 || output_ty == I64);
|
||||
let rt = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
|
||||
emit_atomic_load(ctx, rt, atomic_load);
|
||||
ctx.sink_inst(atomic_load);
|
||||
return Ok(());
|
||||
}
|
||||
}
|
||||
}
|
||||
let output_ty = ty.unwrap();
|
||||
let input_ty = ctx.input_ty(insn, 0);
|
||||
let from_bits = ty_bits(input_ty) as u8;
|
||||
@@ -1523,27 +1536,16 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
||||
}
|
||||
|
||||
Opcode::AtomicLoad => {
|
||||
let r_data = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
|
||||
let r_addr = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
|
||||
let ty_access = ty.unwrap();
|
||||
assert!(is_valid_atomic_transaction_ty(ty_access));
|
||||
ctx.emit(Inst::AtomicLoad {
|
||||
ty: ty_access,
|
||||
r_data,
|
||||
r_addr,
|
||||
});
|
||||
let rt = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
|
||||
emit_atomic_load(ctx, rt, insn);
|
||||
}
|
||||
|
||||
Opcode::AtomicStore => {
|
||||
let r_data = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
|
||||
let r_addr = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None);
|
||||
let ty_access = ctx.input_ty(insn, 0);
|
||||
assert!(is_valid_atomic_transaction_ty(ty_access));
|
||||
ctx.emit(Inst::AtomicStore {
|
||||
ty: ty_access,
|
||||
r_data,
|
||||
r_addr,
|
||||
});
|
||||
let rt = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
|
||||
let rn = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None);
|
||||
let access_ty = ctx.input_ty(insn, 0);
|
||||
assert!(is_valid_atomic_transaction_ty(access_ty));
|
||||
ctx.emit(Inst::StoreRelease { access_ty, rt, rn });
|
||||
}
|
||||
|
||||
Opcode::Fence => {
|
||||
|
||||
97
cranelift/filetests/filetests/isa/aarch64/atomic_load.clif
Normal file
97
cranelift/filetests/filetests/isa/aarch64/atomic_load.clif
Normal file
@@ -0,0 +1,97 @@
|
||||
test compile
|
||||
target aarch64
|
||||
|
||||
function %atomic_load_i64(i64) -> i64 {
|
||||
block0(v0: i64):
|
||||
v1 = atomic_load.i64 v0
|
||||
return v1
|
||||
}
|
||||
|
||||
; check: ldar x0, [x0]
|
||||
; nextln: ldp fp, lr, [sp], #16
|
||||
; nextln: ret
|
||||
|
||||
function %atomic_load_i32(i64) -> i32 {
|
||||
block0(v0: i64):
|
||||
v1 = atomic_load.i32 v0
|
||||
return v1
|
||||
}
|
||||
|
||||
; check: ldar w0, [x0]
|
||||
; nextln: ldp fp, lr, [sp], #16
|
||||
; nextln: ret
|
||||
|
||||
function %atomic_load_i16(i64) -> i16 {
|
||||
block0(v0: i64):
|
||||
v1 = atomic_load.i16 v0
|
||||
return v1
|
||||
}
|
||||
|
||||
; check: ldarh w0, [x0]
|
||||
; nextln: ldp fp, lr, [sp], #16
|
||||
; nextln: ret
|
||||
|
||||
function %atomic_load_i8(i64) -> i8 {
|
||||
block0(v0: i64):
|
||||
v1 = atomic_load.i8 v0
|
||||
return v1
|
||||
}
|
||||
|
||||
; check: ldarb w0, [x0]
|
||||
; nextln: ldp fp, lr, [sp], #16
|
||||
; nextln: ret
|
||||
|
||||
function %atomic_load_i32_i64(i64) -> i64 {
|
||||
block0(v0: i64):
|
||||
v1 = atomic_load.i32 v0
|
||||
v2 = uextend.i64 v1
|
||||
return v2
|
||||
}
|
||||
|
||||
; check: ldar w0, [x0]
|
||||
; nextln: ldp fp, lr, [sp], #16
|
||||
; nextln: ret
|
||||
|
||||
function %atomic_load_i16_i64(i64) -> i64 {
|
||||
block0(v0: i64):
|
||||
v1 = atomic_load.i16 v0
|
||||
v2 = uextend.i64 v1
|
||||
return v2
|
||||
}
|
||||
|
||||
; check: ldarh w0, [x0]
|
||||
; nextln: ldp fp, lr, [sp], #16
|
||||
; nextln: ret
|
||||
|
||||
function %atomic_load_i8_i64(i64) -> i64 {
|
||||
block0(v0: i64):
|
||||
v1 = atomic_load.i8 v0
|
||||
v2 = uextend.i64 v1
|
||||
return v2
|
||||
}
|
||||
|
||||
; check: ldarb w0, [x0]
|
||||
; nextln: ldp fp, lr, [sp], #16
|
||||
; nextln: ret
|
||||
|
||||
function %atomic_load_i16_i32(i64) -> i32 {
|
||||
block0(v0: i64):
|
||||
v1 = atomic_load.i16 v0
|
||||
v2 = uextend.i32 v1
|
||||
return v2
|
||||
}
|
||||
|
||||
; check: ldarh w0, [x0]
|
||||
; nextln: ldp fp, lr, [sp], #16
|
||||
; nextln: ret
|
||||
|
||||
function %atomic_load_i8_i32(i64) -> i32 {
|
||||
block0(v0: i64):
|
||||
v1 = atomic_load.i8 v0
|
||||
v2 = uextend.i32 v1
|
||||
return v2
|
||||
}
|
||||
|
||||
; check: ldarb w0, [x0]
|
||||
; nextln: ldp fp, lr, [sp], #16
|
||||
; nextln: ret
|
||||
102
cranelift/filetests/filetests/isa/aarch64/atomic_store.clif
Normal file
102
cranelift/filetests/filetests/isa/aarch64/atomic_store.clif
Normal file
@@ -0,0 +1,102 @@
|
||||
test compile
|
||||
target aarch64
|
||||
|
||||
function %atomic_store_i64(i64, i64) {
|
||||
block0(v0: i64, v1: i64):
|
||||
atomic_store.i64 v0, v1
|
||||
return
|
||||
}
|
||||
|
||||
; check: stlr x0, [x1]
|
||||
; nextln: ldp fp, lr, [sp], #16
|
||||
; nextln: ret
|
||||
|
||||
function %atomic_store_i32(i32, i64) {
|
||||
block0(v0: i32, v1: i64):
|
||||
atomic_store.i32 v0, v1
|
||||
return
|
||||
}
|
||||
|
||||
; check: stlr w0, [x1]
|
||||
; nextln: ldp fp, lr, [sp], #16
|
||||
; nextln: ret
|
||||
|
||||
function %atomic_store_i16(i16, i64) {
|
||||
block0(v0: i16, v1: i64):
|
||||
atomic_store.i16 v0, v1
|
||||
return
|
||||
}
|
||||
|
||||
; check: stlrh w0, [x1]
|
||||
; nextln: ldp fp, lr, [sp], #16
|
||||
; nextln: ret
|
||||
|
||||
function %atomic_store_i8(i8, i64) {
|
||||
block0(v0: i8, v1: i64):
|
||||
atomic_store.i8 v0, v1
|
||||
return
|
||||
}
|
||||
|
||||
; check: stlrb w0, [x1]
|
||||
; nextln: ldp fp, lr, [sp], #16
|
||||
; nextln: ret
|
||||
|
||||
function %atomic_store_i64_i32(i64, i64) {
|
||||
block0(v0: i64, v1: i64):
|
||||
v2 = ireduce.i32 v0
|
||||
atomic_store.i32 v2, v1
|
||||
return
|
||||
}
|
||||
|
||||
; check-not: uxt
|
||||
; check: stlr w0, [x1]
|
||||
; nextln: ldp fp, lr, [sp], #16
|
||||
; nextln: ret
|
||||
|
||||
function %atomic_store_i64_i16(i64, i64) {
|
||||
block0(v0: i64, v1: i64):
|
||||
v2 = ireduce.i16 v0
|
||||
atomic_store.i16 v2, v1
|
||||
return
|
||||
}
|
||||
|
||||
; check-not: uxt
|
||||
; check: stlrh w0, [x1]
|
||||
; nextln: ldp fp, lr, [sp], #16
|
||||
; nextln: ret
|
||||
|
||||
function %atomic_store_i64_i8(i64, i64) {
|
||||
block0(v0: i64, v1: i64):
|
||||
v2 = ireduce.i8 v0
|
||||
atomic_store.i8 v2, v1
|
||||
return
|
||||
}
|
||||
|
||||
; check-not: uxt
|
||||
; check: stlrb w0, [x1]
|
||||
; nextln: ldp fp, lr, [sp], #16
|
||||
; nextln: ret
|
||||
|
||||
function %atomic_store_i32_i16(i32, i64) {
|
||||
block0(v0: i32, v1: i64):
|
||||
v2 = ireduce.i16 v0
|
||||
atomic_store.i16 v2, v1
|
||||
return
|
||||
}
|
||||
|
||||
; check-not: uxt
|
||||
; check: stlrh w0, [x1]
|
||||
; nextln: ldp fp, lr, [sp], #16
|
||||
; nextln: ret
|
||||
|
||||
function %atomic_store_i32_i8(i32, i64) {
|
||||
block0(v0: i32, v1: i64):
|
||||
v2 = ireduce.i8 v0
|
||||
atomic_store.i8 v2, v1
|
||||
return
|
||||
}
|
||||
|
||||
; check-not: uxt
|
||||
; check: stlrb w0, [x1]
|
||||
; nextln: ldp fp, lr, [sp], #16
|
||||
; nextln: ret
|
||||
Reference in New Issue
Block a user