Re-implement atomic load and stores
The AArch64 support was a bit broken and was using Armv7 style barriers, which aren't required with Armv8 acquire-release load/stores. The fallback CAS loops and RMW, for AArch64, have also been updated to use acquire-release, exclusive, instructions which, again, remove the need for barriers. The CAS loop has also been further optimised by using the extending form of the cmp instruction. Copyright (c) 2021, Arm Limited.
This commit is contained in:
@@ -4600,8 +4600,7 @@ pub(crate) fn define(
|
|||||||
r#"
|
r#"
|
||||||
Atomically load from memory at `p`.
|
Atomically load from memory at `p`.
|
||||||
|
|
||||||
This is a polymorphic instruction that can load any value type which has a memory
|
It should only be used for integer types with 32 or 64 bits.
|
||||||
representation. It should only be used for integer types with 8, 16, 32 or 64 bits.
|
|
||||||
This operation is sequentially consistent and creates happens-before edges that order
|
This operation is sequentially consistent and creates happens-before edges that order
|
||||||
normal (non-atomic) loads and stores.
|
normal (non-atomic) loads and stores.
|
||||||
"#,
|
"#,
|
||||||
@@ -4613,14 +4612,124 @@ pub(crate) fn define(
|
|||||||
.other_side_effects(true),
|
.other_side_effects(true),
|
||||||
);
|
);
|
||||||
|
|
||||||
|
ig.push(
|
||||||
|
Inst::new(
|
||||||
|
"atomic_uload8",
|
||||||
|
r#"
|
||||||
|
Atomically load 8 bits from memory at `p` and zero-extend to either 32 or 64 bits.
|
||||||
|
|
||||||
|
This is equivalent to ``load.i8`` followed by ``uextend``.
|
||||||
|
|
||||||
|
This operation is sequentially consistent and creates happens-before edges that order
|
||||||
|
normal (non-atomic) loads and stores.
|
||||||
|
"#,
|
||||||
|
&formats.load_no_offset,
|
||||||
|
)
|
||||||
|
.operands_in(vec![MemFlags, p])
|
||||||
|
.operands_out(vec![a])
|
||||||
|
.can_load(true)
|
||||||
|
.other_side_effects(true),
|
||||||
|
);
|
||||||
|
|
||||||
|
ig.push(
|
||||||
|
Inst::new(
|
||||||
|
"atomic_uload16",
|
||||||
|
r#"
|
||||||
|
Atomically load 16 bits from memory at `p` and zero-extend to either 32 or 64 bits.
|
||||||
|
|
||||||
|
This is equivalent to ``load.i16`` followed by ``uextend``.
|
||||||
|
|
||||||
|
This operation is sequentially consistent and creates
|
||||||
|
happens-before edges that order normal (non-atomic) loads and stores.
|
||||||
|
"#,
|
||||||
|
&formats.load_no_offset,
|
||||||
|
)
|
||||||
|
.operands_in(vec![MemFlags, p])
|
||||||
|
.operands_out(vec![a])
|
||||||
|
.can_load(true)
|
||||||
|
.other_side_effects(true),
|
||||||
|
);
|
||||||
|
|
||||||
|
ig.push(
|
||||||
|
Inst::new(
|
||||||
|
"atomic_uload32",
|
||||||
|
r#"
|
||||||
|
Atomically load 32 bits from memory at `p` and zero-extend to 64 bits.
|
||||||
|
|
||||||
|
This is equivalent to ``load.i32`` followed by ``uextend``.
|
||||||
|
|
||||||
|
This operation is sequentially consistent and creates
|
||||||
|
happens-before edges that order normal (non-atomic) loads and stores.
|
||||||
|
"#,
|
||||||
|
&formats.load_no_offset,
|
||||||
|
)
|
||||||
|
.operands_in(vec![MemFlags, p])
|
||||||
|
.operands_out(vec![a])
|
||||||
|
.can_load(true)
|
||||||
|
.other_side_effects(true),
|
||||||
|
);
|
||||||
|
|
||||||
ig.push(
|
ig.push(
|
||||||
Inst::new(
|
Inst::new(
|
||||||
"atomic_store",
|
"atomic_store",
|
||||||
r#"
|
r#"
|
||||||
Atomically store `x` to memory at `p`.
|
Atomically store `x` to memory at `p`.
|
||||||
|
|
||||||
This is a polymorphic instruction that can store any value type with a memory
|
This is a polymorphic instruction that can store a 32 or 64-bit value.
|
||||||
representation. It should only be used for integer types with 8, 16, 32 or 64 bits.
|
This operation is sequentially consistent and creates happens-before edges that order
|
||||||
|
normal (non-atomic) loads and stores.
|
||||||
|
"#,
|
||||||
|
&formats.store_no_offset,
|
||||||
|
)
|
||||||
|
.operands_in(vec![MemFlags, x, p])
|
||||||
|
.can_store(true)
|
||||||
|
.other_side_effects(true),
|
||||||
|
);
|
||||||
|
|
||||||
|
ig.push(
|
||||||
|
Inst::new(
|
||||||
|
"atomic_store8",
|
||||||
|
r#"
|
||||||
|
Atomically store the low 8 bits of `x` to memory at `p`.
|
||||||
|
|
||||||
|
This is equivalent to ``ireduce.i8`` followed by ``store.i8``.
|
||||||
|
|
||||||
|
This operation is sequentially consistent and creates happens-before edges that order
|
||||||
|
normal (non-atomic) loads and stores.
|
||||||
|
"#,
|
||||||
|
&formats.store_no_offset,
|
||||||
|
)
|
||||||
|
.operands_in(vec![MemFlags, x, p])
|
||||||
|
.can_store(true)
|
||||||
|
.other_side_effects(true),
|
||||||
|
);
|
||||||
|
|
||||||
|
ig.push(
|
||||||
|
Inst::new(
|
||||||
|
"atomic_store16",
|
||||||
|
r#"
|
||||||
|
Atomically store the low 16 bits of `x` to memory at `p`.
|
||||||
|
|
||||||
|
This is equivalent to ``ireduce.i16`` followed by ``store.i16``.
|
||||||
|
|
||||||
|
This operation is sequentially consistent and creates happens-before edges that order
|
||||||
|
normal (non-atomic) loads and stores.
|
||||||
|
"#,
|
||||||
|
&formats.store_no_offset,
|
||||||
|
)
|
||||||
|
.operands_in(vec![MemFlags, x, p])
|
||||||
|
.can_store(true)
|
||||||
|
.other_side_effects(true),
|
||||||
|
);
|
||||||
|
|
||||||
|
ig.push(
|
||||||
|
Inst::new(
|
||||||
|
"atomic_store32",
|
||||||
|
r#"
|
||||||
|
Atomically store the low 32 bits of `x` to memory at `p`.
|
||||||
|
|
||||||
|
This is equivalent to ``ireduce.i32`` followed by ``store.i32``.
|
||||||
|
|
||||||
This operation is sequentially consistent and creates happens-before edges that order
|
This operation is sequentially consistent and creates happens-before edges that order
|
||||||
normal (non-atomic) loads and stores.
|
normal (non-atomic) loads and stores.
|
||||||
"#,
|
"#,
|
||||||
|
|||||||
@@ -498,7 +498,7 @@ fn enc_dmb_ish() -> u32 {
|
|||||||
0xD5033BBF
|
0xD5033BBF
|
||||||
}
|
}
|
||||||
|
|
||||||
fn enc_ldxr(ty: Type, rt: Writable<Reg>, rn: Reg) -> u32 {
|
fn enc_ldar(ty: Type, rt: Writable<Reg>, rn: Reg) -> u32 {
|
||||||
let sz = match ty {
|
let sz = match ty {
|
||||||
I64 => 0b11,
|
I64 => 0b11,
|
||||||
I32 => 0b10,
|
I32 => 0b10,
|
||||||
@@ -506,13 +506,13 @@ fn enc_ldxr(ty: Type, rt: Writable<Reg>, rn: Reg) -> u32 {
|
|||||||
I8 => 0b00,
|
I8 => 0b00,
|
||||||
_ => unreachable!(),
|
_ => unreachable!(),
|
||||||
};
|
};
|
||||||
0b00001000_01011111_01111100_00000000
|
0b00_001000_1_1_0_11111_1_11111_00000_00000
|
||||||
| (sz << 30)
|
| (sz << 30)
|
||||||
| (machreg_to_gpr(rn) << 5)
|
| (machreg_to_gpr(rn) << 5)
|
||||||
| machreg_to_gpr(rt.to_reg())
|
| machreg_to_gpr(rt.to_reg())
|
||||||
}
|
}
|
||||||
|
|
||||||
fn enc_stxr(ty: Type, rs: Writable<Reg>, rt: Reg, rn: Reg) -> u32 {
|
fn enc_stlr(ty: Type, rt: Reg, rn: Reg) -> u32 {
|
||||||
let sz = match ty {
|
let sz = match ty {
|
||||||
I64 => 0b11,
|
I64 => 0b11,
|
||||||
I32 => 0b10,
|
I32 => 0b10,
|
||||||
@@ -520,7 +520,35 @@ fn enc_stxr(ty: Type, rs: Writable<Reg>, rt: Reg, rn: Reg) -> u32 {
|
|||||||
I8 => 0b00,
|
I8 => 0b00,
|
||||||
_ => unreachable!(),
|
_ => unreachable!(),
|
||||||
};
|
};
|
||||||
0b00001000_00000000_01111100_00000000
|
0b00_001000_100_11111_1_11111_00000_00000
|
||||||
|
| (sz << 30)
|
||||||
|
| (machreg_to_gpr(rn) << 5)
|
||||||
|
| machreg_to_gpr(rt)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn enc_ldaxr(ty: Type, rt: Writable<Reg>, rn: Reg) -> u32 {
|
||||||
|
let sz = match ty {
|
||||||
|
I64 => 0b11,
|
||||||
|
I32 => 0b10,
|
||||||
|
I16 => 0b01,
|
||||||
|
I8 => 0b00,
|
||||||
|
_ => unreachable!(),
|
||||||
|
};
|
||||||
|
0b00_001000_0_1_0_11111_1_11111_00000_00000
|
||||||
|
| (sz << 30)
|
||||||
|
| (machreg_to_gpr(rn) << 5)
|
||||||
|
| machreg_to_gpr(rt.to_reg())
|
||||||
|
}
|
||||||
|
|
||||||
|
fn enc_stlxr(ty: Type, rs: Writable<Reg>, rt: Reg, rn: Reg) -> u32 {
|
||||||
|
let sz = match ty {
|
||||||
|
I64 => 0b11,
|
||||||
|
I32 => 0b10,
|
||||||
|
I16 => 0b01,
|
||||||
|
I8 => 0b00,
|
||||||
|
_ => unreachable!(),
|
||||||
|
};
|
||||||
|
0b00_001000_000_00000_1_11111_00000_00000
|
||||||
| (sz << 30)
|
| (sz << 30)
|
||||||
| (machreg_to_gpr(rs.to_reg()) << 16)
|
| (machreg_to_gpr(rs.to_reg()) << 16)
|
||||||
| (machreg_to_gpr(rn) << 5)
|
| (machreg_to_gpr(rn) << 5)
|
||||||
@@ -1286,20 +1314,18 @@ impl MachInstEmit for Inst {
|
|||||||
}
|
}
|
||||||
&Inst::AtomicRMW { ty, op } => {
|
&Inst::AtomicRMW { ty, op } => {
|
||||||
/* Emit this:
|
/* Emit this:
|
||||||
dmb ish
|
|
||||||
again:
|
again:
|
||||||
ldxr{,b,h} x/w27, [x25]
|
ldaxr{,b,h} x/w27, [x25]
|
||||||
op x28, x27, x26 // op is add,sub,and,orr,eor
|
op x28, x27, x26 // op is add,sub,and,orr,eor
|
||||||
stxr{,b,h} w24, x/w28, [x25]
|
stlxr{,b,h} w24, x/w28, [x25]
|
||||||
cbnz x24, again
|
cbnz x24, again
|
||||||
dmb ish
|
|
||||||
|
|
||||||
Operand conventions:
|
Operand conventions:
|
||||||
IN: x25 (addr), x26 (2nd arg for op)
|
IN: x25 (addr), x26 (2nd arg for op)
|
||||||
OUT: x27 (old value), x24 (trashed), x28 (trashed)
|
OUT: x27 (old value), x24 (trashed), x28 (trashed)
|
||||||
|
|
||||||
It is unfortunate that, per the ARM documentation, x28 cannot be used for
|
It is unfortunate that, per the ARM documentation, x28 cannot be used for
|
||||||
both the store-data and success-flag operands of stxr. This causes the
|
both the store-data and success-flag operands of stlxr. This causes the
|
||||||
instruction's behaviour to be "CONSTRAINED UNPREDICTABLE", so we use x24
|
instruction's behaviour to be "CONSTRAINED UNPREDICTABLE", so we use x24
|
||||||
instead for the success-flag.
|
instead for the success-flag.
|
||||||
|
|
||||||
@@ -1320,15 +1346,13 @@ impl MachInstEmit for Inst {
|
|||||||
let x28wr = writable_xreg(28);
|
let x28wr = writable_xreg(28);
|
||||||
let again_label = sink.get_label();
|
let again_label = sink.get_label();
|
||||||
|
|
||||||
sink.put4(enc_dmb_ish()); // dmb ish
|
|
||||||
|
|
||||||
// again:
|
// again:
|
||||||
sink.bind_label(again_label);
|
sink.bind_label(again_label);
|
||||||
let srcloc = state.cur_srcloc();
|
let srcloc = state.cur_srcloc();
|
||||||
if srcloc != SourceLoc::default() {
|
if srcloc != SourceLoc::default() {
|
||||||
sink.add_trap(srcloc, TrapCode::HeapOutOfBounds);
|
sink.add_trap(srcloc, TrapCode::HeapOutOfBounds);
|
||||||
}
|
}
|
||||||
sink.put4(enc_ldxr(ty, x27wr, x25)); // ldxr x27, [x25]
|
sink.put4(enc_ldaxr(ty, x27wr, x25)); // ldaxr x27, [x25]
|
||||||
|
|
||||||
match op {
|
match op {
|
||||||
AtomicRmwOp::Xchg => {
|
AtomicRmwOp::Xchg => {
|
||||||
@@ -1420,19 +1444,17 @@ impl MachInstEmit for Inst {
|
|||||||
if srcloc != SourceLoc::default() {
|
if srcloc != SourceLoc::default() {
|
||||||
sink.add_trap(srcloc, TrapCode::HeapOutOfBounds);
|
sink.add_trap(srcloc, TrapCode::HeapOutOfBounds);
|
||||||
}
|
}
|
||||||
sink.put4(enc_stxr(ty, x24wr, x28, x25)); // stxr w24, x28, [x25]
|
sink.put4(enc_stlxr(ty, x24wr, x28, x25)); // stlxr w24, x28, [x25]
|
||||||
|
|
||||||
// cbnz w24, again
|
// cbnz w24, again
|
||||||
// Note, we're actually testing x24, and relying on the default zero-high-half
|
// Note, we're actually testing x24, and relying on the default zero-high-half
|
||||||
// rule in the assignment that `stxr` does.
|
// rule in the assignment that `stlxr` does.
|
||||||
let br_offset = sink.cur_offset();
|
let br_offset = sink.cur_offset();
|
||||||
sink.put4(enc_conditional_br(
|
sink.put4(enc_conditional_br(
|
||||||
BranchTarget::Label(again_label),
|
BranchTarget::Label(again_label),
|
||||||
CondBrKind::NotZero(x24),
|
CondBrKind::NotZero(x24),
|
||||||
));
|
));
|
||||||
sink.use_label_at_offset(br_offset, again_label, LabelUse::Branch19);
|
sink.use_label_at_offset(br_offset, again_label, LabelUse::Branch19);
|
||||||
|
|
||||||
sink.put4(enc_dmb_ish()); // dmb ish
|
|
||||||
}
|
}
|
||||||
&Inst::AtomicCAS { rs, rt, rn, ty } => {
|
&Inst::AtomicCAS { rs, rt, rn, ty } => {
|
||||||
let size = match ty {
|
let size = match ty {
|
||||||
@@ -1447,22 +1469,18 @@ impl MachInstEmit for Inst {
|
|||||||
}
|
}
|
||||||
&Inst::AtomicCASLoop { ty } => {
|
&Inst::AtomicCASLoop { ty } => {
|
||||||
/* Emit this:
|
/* Emit this:
|
||||||
dmb ish
|
|
||||||
again:
|
again:
|
||||||
ldxr{,b,h} x/w27, [x25]
|
ldaxr{,b,h} x/w27, [x25]
|
||||||
and x24, x26, MASK (= 2^size_bits - 1)
|
cmp x27, x/w26 uxt{b,h}
|
||||||
cmp x27, x24
|
|
||||||
b.ne out
|
b.ne out
|
||||||
stxr{,b,h} w24, x/w28, [x25]
|
stlxr{,b,h} w24, x/w28, [x25]
|
||||||
cbnz x24, again
|
cbnz x24, again
|
||||||
out:
|
out:
|
||||||
dmb ish
|
|
||||||
|
|
||||||
Operand conventions:
|
Operand conventions:
|
||||||
IN: x25 (addr), x26 (expected value), x28 (replacement value)
|
IN: x25 (addr), x26 (expected value), x28 (replacement value)
|
||||||
OUT: x27 (old value), x24 (trashed)
|
OUT: x27 (old value), x24 (trashed)
|
||||||
*/
|
*/
|
||||||
let xzr = zero_reg();
|
|
||||||
let x24 = xreg(24);
|
let x24 = xreg(24);
|
||||||
let x25 = xreg(25);
|
let x25 = xreg(25);
|
||||||
let x26 = xreg(26);
|
let x26 = xreg(26);
|
||||||
@@ -1474,37 +1492,25 @@ impl MachInstEmit for Inst {
|
|||||||
let again_label = sink.get_label();
|
let again_label = sink.get_label();
|
||||||
let out_label = sink.get_label();
|
let out_label = sink.get_label();
|
||||||
|
|
||||||
sink.put4(enc_dmb_ish()); // dmb ish
|
|
||||||
|
|
||||||
// again:
|
// again:
|
||||||
sink.bind_label(again_label);
|
sink.bind_label(again_label);
|
||||||
let srcloc = state.cur_srcloc();
|
let srcloc = state.cur_srcloc();
|
||||||
if srcloc != SourceLoc::default() {
|
if srcloc != SourceLoc::default() {
|
||||||
sink.add_trap(srcloc, TrapCode::HeapOutOfBounds);
|
sink.add_trap(srcloc, TrapCode::HeapOutOfBounds);
|
||||||
}
|
}
|
||||||
sink.put4(enc_ldxr(ty, x27wr, x25)); // ldxr x27, [x25]
|
// ldaxr x27, [x25]
|
||||||
|
sink.put4(enc_ldaxr(ty, x27wr, x25));
|
||||||
|
|
||||||
if ty == I64 {
|
// The top 32-bits are zero-extended by the ldaxr so we don't
|
||||||
// mov x24, x26
|
// have to use UXTW, just the x-form of the register.
|
||||||
sink.put4(enc_arith_rrr(0b101_01010_00_0, 0b000000, x24wr, xzr, x26))
|
let (bit21, extend_op) = match ty {
|
||||||
} else {
|
I8 => (0b1, 0b000000),
|
||||||
// and x24, x26, 0xFF/0xFFFF/0xFFFFFFFF
|
I16 => (0b1, 0b001000),
|
||||||
let (mask, s) = match ty {
|
_ => (0b0, 0b000000),
|
||||||
I8 => (0xFF, 7),
|
|
||||||
I16 => (0xFFFF, 15),
|
|
||||||
I32 => (0xFFFFFFFF, 31),
|
|
||||||
_ => unreachable!(),
|
|
||||||
};
|
};
|
||||||
sink.put4(enc_arith_rr_imml(
|
let bits_31_21 = 0b111_01011_000 | bit21;
|
||||||
0b100_100100,
|
// cmp x27, x26 (== subs xzr, x27, x26)
|
||||||
ImmLogic::from_n_r_s(mask, true, 0, s, OperandSize::Size64).enc_bits(),
|
sink.put4(enc_arith_rrr(bits_31_21, extend_op, xzrwr, x27, x26));
|
||||||
x26,
|
|
||||||
x24wr,
|
|
||||||
))
|
|
||||||
}
|
|
||||||
|
|
||||||
// cmp x27, x24 (== subs xzr, x27, x24)
|
|
||||||
sink.put4(enc_arith_rrr(0b111_01011_00_0, 0b000000, xzrwr, x27, x24));
|
|
||||||
|
|
||||||
// b.ne out
|
// b.ne out
|
||||||
let br_out_offset = sink.cur_offset();
|
let br_out_offset = sink.cur_offset();
|
||||||
@@ -1518,11 +1524,11 @@ impl MachInstEmit for Inst {
|
|||||||
if srcloc != SourceLoc::default() {
|
if srcloc != SourceLoc::default() {
|
||||||
sink.add_trap(srcloc, TrapCode::HeapOutOfBounds);
|
sink.add_trap(srcloc, TrapCode::HeapOutOfBounds);
|
||||||
}
|
}
|
||||||
sink.put4(enc_stxr(ty, x24wr, x28, x25)); // stxr w24, x28, [x25]
|
sink.put4(enc_stlxr(ty, x24wr, x28, x25)); // stlxr w24, x28, [x25]
|
||||||
|
|
||||||
// cbnz w24, again.
|
// cbnz w24, again.
|
||||||
// Note, we're actually testing x24, and relying on the default zero-high-half
|
// Note, we're actually testing x24, and relying on the default zero-high-half
|
||||||
// rule in the assignment that `stxr` does.
|
// rule in the assignment that `stlxr` does.
|
||||||
let br_again_offset = sink.cur_offset();
|
let br_again_offset = sink.cur_offset();
|
||||||
sink.put4(enc_conditional_br(
|
sink.put4(enc_conditional_br(
|
||||||
BranchTarget::Label(again_label),
|
BranchTarget::Label(again_label),
|
||||||
@@ -1532,46 +1538,12 @@ impl MachInstEmit for Inst {
|
|||||||
|
|
||||||
// out:
|
// out:
|
||||||
sink.bind_label(out_label);
|
sink.bind_label(out_label);
|
||||||
sink.put4(enc_dmb_ish()); // dmb ish
|
|
||||||
}
|
}
|
||||||
&Inst::AtomicLoad { ty, r_data, r_addr } => {
|
&Inst::LoadAcquire { access_ty, rt, rn } => {
|
||||||
let op = match ty {
|
sink.put4(enc_ldar(access_ty, rt, rn));
|
||||||
I8 => 0b0011100001,
|
|
||||||
I16 => 0b0111100001,
|
|
||||||
I32 => 0b1011100001,
|
|
||||||
I64 => 0b1111100001,
|
|
||||||
_ => unreachable!(),
|
|
||||||
};
|
|
||||||
sink.put4(enc_dmb_ish()); // dmb ish
|
|
||||||
|
|
||||||
let srcloc = state.cur_srcloc();
|
|
||||||
if srcloc != SourceLoc::default() {
|
|
||||||
sink.add_trap(srcloc, TrapCode::HeapOutOfBounds);
|
|
||||||
}
|
}
|
||||||
let uimm12scaled_zero = UImm12Scaled::zero(I8 /*irrelevant*/);
|
&Inst::StoreRelease { access_ty, rt, rn } => {
|
||||||
sink.put4(enc_ldst_uimm12(
|
sink.put4(enc_stlr(access_ty, rt, rn));
|
||||||
op,
|
|
||||||
uimm12scaled_zero,
|
|
||||||
r_addr,
|
|
||||||
r_data.to_reg(),
|
|
||||||
));
|
|
||||||
}
|
|
||||||
&Inst::AtomicStore { ty, r_data, r_addr } => {
|
|
||||||
let op = match ty {
|
|
||||||
I8 => 0b0011100000,
|
|
||||||
I16 => 0b0111100000,
|
|
||||||
I32 => 0b1011100000,
|
|
||||||
I64 => 0b1111100000,
|
|
||||||
_ => unreachable!(),
|
|
||||||
};
|
|
||||||
|
|
||||||
let srcloc = state.cur_srcloc();
|
|
||||||
if srcloc != SourceLoc::default() {
|
|
||||||
sink.add_trap(srcloc, TrapCode::HeapOutOfBounds);
|
|
||||||
}
|
|
||||||
let uimm12scaled_zero = UImm12Scaled::zero(I8 /*irrelevant*/);
|
|
||||||
sink.put4(enc_ldst_uimm12(op, uimm12scaled_zero, r_addr, r_data));
|
|
||||||
sink.put4(enc_dmb_ish()); // dmb ish
|
|
||||||
}
|
}
|
||||||
&Inst::Fence {} => {
|
&Inst::Fence {} => {
|
||||||
sink.put4(enc_dmb_ish()); // dmb ish
|
sink.put4(enc_dmb_ish()); // dmb ish
|
||||||
|
|||||||
@@ -5891,7 +5891,7 @@ fn test_aarch64_binemit() {
|
|||||||
ty: I16,
|
ty: I16,
|
||||||
op: inst_common::AtomicRmwOp::Xor,
|
op: inst_common::AtomicRmwOp::Xor,
|
||||||
},
|
},
|
||||||
"BF3B03D53B7F5F487C031ACA3C7F1848B8FFFFB5BF3B03D5",
|
"3BFF5F487C031ACA3CFF1848B8FFFFB5",
|
||||||
"atomically { 16_bits_at_[x25]) Xor= x26 ; x27 = old_value_at_[x25]; x24,x28 = trash }",
|
"atomically { 16_bits_at_[x25]) Xor= x26 ; x27 = old_value_at_[x25]; x24,x28 = trash }",
|
||||||
));
|
));
|
||||||
|
|
||||||
@@ -5900,7 +5900,7 @@ fn test_aarch64_binemit() {
|
|||||||
ty: I32,
|
ty: I32,
|
||||||
op: inst_common::AtomicRmwOp::Xchg,
|
op: inst_common::AtomicRmwOp::Xchg,
|
||||||
},
|
},
|
||||||
"BF3B03D53B7F5F88FC031AAA3C7F1888B8FFFFB5BF3B03D5",
|
"3BFF5F88FC031AAA3CFF1888B8FFFFB5",
|
||||||
"atomically { 32_bits_at_[x25]) Xchg= x26 ; x27 = old_value_at_[x25]; x24,x28 = trash }",
|
"atomically { 32_bits_at_[x25]) Xchg= x26 ; x27 = old_value_at_[x25]; x24,x28 = trash }",
|
||||||
));
|
));
|
||||||
insns.push((
|
insns.push((
|
||||||
@@ -5947,56 +5947,112 @@ fn test_aarch64_binemit() {
|
|||||||
Inst::AtomicCASLoop {
|
Inst::AtomicCASLoop {
|
||||||
ty: I8,
|
ty: I8,
|
||||||
},
|
},
|
||||||
"BF3B03D53B7F5F08581F40927F0318EB610000543C7F180878FFFFB5BF3B03D5",
|
"3BFF5F087F033AEB610000543CFF180898FFFFB5",
|
||||||
"atomically { compare-and-swap(8_bits_at_[x25], x26 -> x28), x27 = old_value_at_[x25]; x24 = trash }"
|
"atomically { compare-and-swap(8_bits_at_[x25], x26 -> x28), x27 = old_value_at_[x25]; x24 = trash }"
|
||||||
));
|
));
|
||||||
|
|
||||||
|
insns.push((
|
||||||
|
Inst::AtomicCASLoop {
|
||||||
|
ty: I16,
|
||||||
|
},
|
||||||
|
"3BFF5F487F233AEB610000543CFF184898FFFFB5",
|
||||||
|
"atomically { compare-and-swap(16_bits_at_[x25], x26 -> x28), x27 = old_value_at_[x25]; x24 = trash }"
|
||||||
|
));
|
||||||
|
|
||||||
|
insns.push((
|
||||||
|
Inst::AtomicCASLoop {
|
||||||
|
ty: I32,
|
||||||
|
},
|
||||||
|
"3BFF5F887F031AEB610000543CFF188898FFFFB5",
|
||||||
|
"atomically { compare-and-swap(32_bits_at_[x25], x26 -> x28), x27 = old_value_at_[x25]; x24 = trash }"
|
||||||
|
));
|
||||||
|
|
||||||
insns.push((
|
insns.push((
|
||||||
Inst::AtomicCASLoop {
|
Inst::AtomicCASLoop {
|
||||||
ty: I64,
|
ty: I64,
|
||||||
},
|
},
|
||||||
"BF3B03D53B7F5FC8F8031AAA7F0318EB610000543C7F18C878FFFFB5BF3B03D5",
|
"3BFF5FC87F031AEB610000543CFF18C898FFFFB5",
|
||||||
"atomically { compare-and-swap(64_bits_at_[x25], x26 -> x28), x27 = old_value_at_[x25]; x24 = trash }"
|
"atomically { compare-and-swap(64_bits_at_[x25], x26 -> x28), x27 = old_value_at_[x25]; x24 = trash }"
|
||||||
));
|
));
|
||||||
|
|
||||||
insns.push((
|
insns.push((
|
||||||
Inst::AtomicLoad {
|
Inst::LoadAcquire {
|
||||||
ty: I8,
|
access_ty: I8,
|
||||||
r_data: writable_xreg(7),
|
rt: writable_xreg(7),
|
||||||
r_addr: xreg(28),
|
rn: xreg(28),
|
||||||
},
|
},
|
||||||
"BF3B03D587034039",
|
"87FFDF08",
|
||||||
"atomically { x7 = zero_extend_8_bits_at[x28] }",
|
"ldarb w7, [x28]",
|
||||||
));
|
));
|
||||||
|
|
||||||
insns.push((
|
insns.push((
|
||||||
Inst::AtomicLoad {
|
Inst::LoadAcquire {
|
||||||
ty: I64,
|
access_ty: I16,
|
||||||
r_data: writable_xreg(28),
|
rt: writable_xreg(2),
|
||||||
r_addr: xreg(7),
|
rn: xreg(3),
|
||||||
},
|
},
|
||||||
"BF3B03D5FC0040F9",
|
"62FCDF48",
|
||||||
"atomically { x28 = zero_extend_64_bits_at[x7] }",
|
"ldarh w2, [x3]",
|
||||||
));
|
));
|
||||||
|
|
||||||
insns.push((
|
insns.push((
|
||||||
Inst::AtomicStore {
|
Inst::LoadAcquire {
|
||||||
ty: I16,
|
access_ty: I32,
|
||||||
r_data: xreg(17),
|
rt: writable_xreg(15),
|
||||||
r_addr: xreg(8),
|
rn: xreg(0),
|
||||||
},
|
},
|
||||||
"11010079BF3B03D5",
|
"0FFCDF88",
|
||||||
"atomically { 16_bits_at[x8] = x17 }",
|
"ldar w15, [x0]",
|
||||||
));
|
));
|
||||||
|
|
||||||
insns.push((
|
insns.push((
|
||||||
Inst::AtomicStore {
|
Inst::LoadAcquire {
|
||||||
ty: I32,
|
access_ty: I64,
|
||||||
r_data: xreg(18),
|
rt: writable_xreg(28),
|
||||||
r_addr: xreg(7),
|
rn: xreg(7),
|
||||||
},
|
},
|
||||||
"F20000B9BF3B03D5",
|
"FCFCDFC8",
|
||||||
"atomically { 32_bits_at[x7] = x18 }",
|
"ldar x28, [x7]",
|
||||||
|
));
|
||||||
|
|
||||||
|
insns.push((
|
||||||
|
Inst::StoreRelease {
|
||||||
|
access_ty: I8,
|
||||||
|
rt: xreg(7),
|
||||||
|
rn: xreg(28),
|
||||||
|
},
|
||||||
|
"87FF9F08",
|
||||||
|
"stlrb w7, [x28]",
|
||||||
|
));
|
||||||
|
|
||||||
|
insns.push((
|
||||||
|
Inst::StoreRelease {
|
||||||
|
access_ty: I16,
|
||||||
|
rt: xreg(2),
|
||||||
|
rn: xreg(3),
|
||||||
|
},
|
||||||
|
"62FC9F48",
|
||||||
|
"stlrh w2, [x3]",
|
||||||
|
));
|
||||||
|
|
||||||
|
insns.push((
|
||||||
|
Inst::StoreRelease {
|
||||||
|
access_ty: I32,
|
||||||
|
rt: xreg(15),
|
||||||
|
rn: xreg(0),
|
||||||
|
},
|
||||||
|
"0FFC9F88",
|
||||||
|
"stlr w15, [x0]",
|
||||||
|
));
|
||||||
|
|
||||||
|
insns.push((
|
||||||
|
Inst::StoreRelease {
|
||||||
|
access_ty: I64,
|
||||||
|
rt: xreg(28),
|
||||||
|
rn: xreg(7),
|
||||||
|
},
|
||||||
|
"FCFC9FC8",
|
||||||
|
"stlr x28, [x7]",
|
||||||
));
|
));
|
||||||
|
|
||||||
insns.push((Inst::Fence {}, "BF3B03D5", "dmb ish"));
|
insns.push((Inst::Fence {}, "BF3B03D5", "dmb ish"));
|
||||||
|
|||||||
@@ -789,10 +789,9 @@ pub enum Inst {
|
|||||||
},
|
},
|
||||||
|
|
||||||
/// Similar to AtomicRMW, a compare-and-swap operation implemented using a load-linked
|
/// Similar to AtomicRMW, a compare-and-swap operation implemented using a load-linked
|
||||||
/// store-conditional loop. The sequence is both preceded and followed by a fence which is
|
/// store-conditional loop.
|
||||||
/// at least as comprehensive as that of the `Fence` instruction below. This instruction
|
/// This instruction is sequentially consistent.
|
||||||
/// is sequentially consistent. Note that the operand conventions, although very similar
|
/// Note that the operand conventions, although very similar to AtomicRMW, are different:
|
||||||
/// to AtomicRMW, are different:
|
|
||||||
///
|
///
|
||||||
/// x25 (rd) address
|
/// x25 (rd) address
|
||||||
/// x26 (rd) expected value
|
/// x26 (rd) expected value
|
||||||
@@ -803,22 +802,21 @@ pub enum Inst {
|
|||||||
ty: Type, // I8, I16, I32 or I64
|
ty: Type, // I8, I16, I32 or I64
|
||||||
},
|
},
|
||||||
|
|
||||||
/// Read `ty` bits from address `r_addr`, zero extend the loaded value to 64 bits and put it
|
/// Read `access_ty` bits from address `rt`, either 8, 16, 32 or 64-bits, and put
|
||||||
/// in `r_data`. The load instruction is preceded by a fence at least as comprehensive as
|
/// it in `rn`, optionally zero-extending to fill a word or double word result.
|
||||||
/// that of the `Fence` instruction below. This instruction is sequentially consistent.
|
/// This instruction is sequentially consistent.
|
||||||
AtomicLoad {
|
LoadAcquire {
|
||||||
ty: Type, // I8, I16, I32 or I64
|
access_ty: Type, // I8, I16, I32 or I64
|
||||||
r_data: Writable<Reg>,
|
rt: Writable<Reg>,
|
||||||
r_addr: Reg,
|
rn: Reg,
|
||||||
},
|
},
|
||||||
|
|
||||||
/// Write the lowest `ty` bits of `r_data` to address `r_addr`, with a memory fence
|
/// Write the lowest `ty` bits of `rt` to address `rn`.
|
||||||
/// instruction following the store. The fence is at least as comprehensive as that of the
|
/// This instruction is sequentially consistent.
|
||||||
/// `Fence` instruction below. This instruction is sequentially consistent.
|
StoreRelease {
|
||||||
AtomicStore {
|
access_ty: Type, // I8, I16, I32 or I64
|
||||||
ty: Type, // I8, I16, I32 or I64
|
rt: Reg,
|
||||||
r_data: Reg,
|
rn: Reg,
|
||||||
r_addr: Reg,
|
|
||||||
},
|
},
|
||||||
|
|
||||||
/// A memory fence. This must provide ordering to ensure that, at a minimum, neither loads
|
/// A memory fence. This must provide ordering to ensure that, at a minimum, neither loads
|
||||||
@@ -1940,13 +1938,13 @@ fn aarch64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) {
|
|||||||
collector.add_def(writable_xreg(24));
|
collector.add_def(writable_xreg(24));
|
||||||
collector.add_def(writable_xreg(27));
|
collector.add_def(writable_xreg(27));
|
||||||
}
|
}
|
||||||
&Inst::AtomicLoad { r_data, r_addr, .. } => {
|
&Inst::LoadAcquire { rt, rn, .. } => {
|
||||||
collector.add_use(r_addr);
|
collector.add_use(rn);
|
||||||
collector.add_def(r_data);
|
collector.add_def(rt);
|
||||||
}
|
}
|
||||||
&Inst::AtomicStore { r_data, r_addr, .. } => {
|
&Inst::StoreRelease { rt, rn, .. } => {
|
||||||
collector.add_use(r_addr);
|
collector.add_use(rn);
|
||||||
collector.add_use(r_data);
|
collector.add_use(rt);
|
||||||
}
|
}
|
||||||
&Inst::Fence {} => {}
|
&Inst::Fence {} => {}
|
||||||
&Inst::FpuMove64 { rd, rn } => {
|
&Inst::FpuMove64 { rd, rn } => {
|
||||||
@@ -2579,21 +2577,21 @@ fn aarch64_map_regs<RUM: RegUsageMapper>(inst: &mut Inst, mapper: &RUM) {
|
|||||||
&mut Inst::AtomicCASLoop { .. } => {
|
&mut Inst::AtomicCASLoop { .. } => {
|
||||||
// There are no vregs to map in this insn.
|
// There are no vregs to map in this insn.
|
||||||
}
|
}
|
||||||
&mut Inst::AtomicLoad {
|
&mut Inst::LoadAcquire {
|
||||||
ref mut r_data,
|
ref mut rt,
|
||||||
ref mut r_addr,
|
ref mut rn,
|
||||||
..
|
..
|
||||||
} => {
|
} => {
|
||||||
map_def(mapper, r_data);
|
map_def(mapper, rt);
|
||||||
map_use(mapper, r_addr);
|
map_use(mapper, rn);
|
||||||
}
|
}
|
||||||
&mut Inst::AtomicStore {
|
&mut Inst::StoreRelease {
|
||||||
ref mut r_data,
|
ref mut rt,
|
||||||
ref mut r_addr,
|
ref mut rn,
|
||||||
..
|
..
|
||||||
} => {
|
} => {
|
||||||
map_use(mapper, r_data);
|
map_use(mapper, rt);
|
||||||
map_use(mapper, r_addr);
|
map_use(mapper, rn);
|
||||||
}
|
}
|
||||||
&mut Inst::Fence {} => {}
|
&mut Inst::Fence {} => {}
|
||||||
&mut Inst::FpuMove64 {
|
&mut Inst::FpuMove64 {
|
||||||
@@ -3643,25 +3641,35 @@ impl Inst {
|
|||||||
"atomically {{ compare-and-swap({}_bits_at_[x25], x26 -> x28), x27 = old_value_at_[x25]; x24 = trash }}",
|
"atomically {{ compare-and-swap({}_bits_at_[x25], x26 -> x28), x27 = old_value_at_[x25]; x24 = trash }}",
|
||||||
ty.bits())
|
ty.bits())
|
||||||
}
|
}
|
||||||
&Inst::AtomicLoad {
|
&Inst::LoadAcquire {
|
||||||
ty, r_data, r_addr, ..
|
access_ty, rt, rn, ..
|
||||||
} => {
|
} => {
|
||||||
format!(
|
let (op, ty) = match access_ty {
|
||||||
"atomically {{ {} = zero_extend_{}_bits_at[{}] }}",
|
I8 => ("ldarb", I32),
|
||||||
r_data.show_rru(mb_rru),
|
I16 => ("ldarh", I32),
|
||||||
ty.bits(),
|
I32 => ("ldar", I32),
|
||||||
r_addr.show_rru(mb_rru)
|
I64 => ("ldar", I64),
|
||||||
)
|
_ => panic!("Unsupported type: {}", access_ty),
|
||||||
|
};
|
||||||
|
let size = OperandSize::from_ty(ty);
|
||||||
|
let rt = show_ireg_sized(rt.to_reg(), mb_rru, size);
|
||||||
|
let rn = rn.show_rru(mb_rru);
|
||||||
|
format!("{} {}, [{}]", op, rt, rn)
|
||||||
}
|
}
|
||||||
&Inst::AtomicStore {
|
&Inst::StoreRelease {
|
||||||
ty, r_data, r_addr, ..
|
access_ty, rt, rn, ..
|
||||||
} => {
|
} => {
|
||||||
format!(
|
let (op, ty) = match access_ty {
|
||||||
"atomically {{ {}_bits_at[{}] = {} }}",
|
I8 => ("stlrb", I32),
|
||||||
ty.bits(),
|
I16 => ("stlrh", I32),
|
||||||
r_addr.show_rru(mb_rru),
|
I32 => ("stlr", I32),
|
||||||
r_data.show_rru(mb_rru)
|
I64 => ("stlr", I64),
|
||||||
)
|
_ => panic!("Unsupported type: {}", access_ty),
|
||||||
|
};
|
||||||
|
let size = OperandSize::from_ty(ty);
|
||||||
|
let rt = show_ireg_sized(rt, mb_rru, size);
|
||||||
|
let rn = rn.show_rru(mb_rru);
|
||||||
|
format!("{} {}, [{}]", op, rt, rn)
|
||||||
}
|
}
|
||||||
&Inst::Fence {} => {
|
&Inst::Fence {} => {
|
||||||
format!("dmb ish")
|
format!("dmb ish")
|
||||||
|
|||||||
@@ -1522,28 +1522,40 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
Opcode::AtomicLoad => {
|
Opcode::AtomicLoad
|
||||||
let r_data = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
|
| Opcode::AtomicUload8
|
||||||
let r_addr = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
|
| Opcode::AtomicUload16
|
||||||
let ty_access = ty.unwrap();
|
| Opcode::AtomicUload32 => {
|
||||||
assert!(is_valid_atomic_transaction_ty(ty_access));
|
let rt = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
|
||||||
ctx.emit(Inst::AtomicLoad {
|
let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
|
||||||
ty: ty_access,
|
let ty = ty.unwrap();
|
||||||
r_data,
|
let access_ty = match op {
|
||||||
r_addr,
|
Opcode::AtomicLoad => ty,
|
||||||
});
|
Opcode::AtomicUload8 => I8,
|
||||||
|
Opcode::AtomicUload16 => I16,
|
||||||
|
Opcode::AtomicUload32 => I32,
|
||||||
|
_ => panic!(),
|
||||||
|
};
|
||||||
|
assert!(is_valid_atomic_transaction_ty(access_ty));
|
||||||
|
ctx.emit(Inst::LoadAcquire { access_ty, rt, rn });
|
||||||
}
|
}
|
||||||
|
|
||||||
Opcode::AtomicStore => {
|
Opcode::AtomicStore
|
||||||
let r_data = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
|
| Opcode::AtomicStore32
|
||||||
let r_addr = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None);
|
| Opcode::AtomicStore16
|
||||||
let ty_access = ctx.input_ty(insn, 0);
|
| Opcode::AtomicStore8 => {
|
||||||
assert!(is_valid_atomic_transaction_ty(ty_access));
|
let rt = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
|
||||||
ctx.emit(Inst::AtomicStore {
|
let rn = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None);
|
||||||
ty: ty_access,
|
let ty = ctx.input_ty(insn, 0);
|
||||||
r_data,
|
let access_ty = match op {
|
||||||
r_addr,
|
Opcode::AtomicStore => ty,
|
||||||
});
|
Opcode::AtomicStore32 => I32,
|
||||||
|
Opcode::AtomicStore16 => I16,
|
||||||
|
Opcode::AtomicStore8 => I8,
|
||||||
|
_ => unreachable!(),
|
||||||
|
};
|
||||||
|
assert!(is_valid_atomic_transaction_ty(access_ty));
|
||||||
|
ctx.emit(Inst::StoreRelease { access_ty, rt, rn });
|
||||||
}
|
}
|
||||||
|
|
||||||
Opcode::Fence => {
|
Opcode::Fence => {
|
||||||
|
|||||||
@@ -2734,37 +2734,61 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
|||||||
ctx.emit(Inst::AtomicCas64 { rd, rn, mem });
|
ctx.emit(Inst::AtomicCas64 { rd, rn, mem });
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
Opcode::AtomicLoad => {
|
Opcode::AtomicLoad
|
||||||
|
| Opcode::AtomicUload8
|
||||||
|
| Opcode::AtomicUload16
|
||||||
|
| Opcode::AtomicUload32 => {
|
||||||
let flags = ctx.memflags(insn).unwrap();
|
let flags = ctx.memflags(insn).unwrap();
|
||||||
let endianness = flags.endianness(Endianness::Big);
|
let endianness = flags.endianness(Endianness::Big);
|
||||||
let ty = ty.unwrap();
|
let ty = ty.unwrap();
|
||||||
assert!(is_valid_atomic_transaction_ty(ty));
|
let access_ty = match op {
|
||||||
|
Opcode::AtomicLoad => ty,
|
||||||
|
Opcode::AtomicUload8 => types::I8,
|
||||||
|
Opcode::AtomicUload16 => types::I16,
|
||||||
|
Opcode::AtomicUload32 => types::I32,
|
||||||
|
_ => unreachable!(),
|
||||||
|
};
|
||||||
|
assert!(is_valid_atomic_transaction_ty(access_ty));
|
||||||
|
|
||||||
let mem = lower_address(ctx, &inputs[..], 0, flags);
|
let mem = lower_address(ctx, &inputs[..], 0, flags);
|
||||||
let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
|
let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
|
||||||
|
|
||||||
if endianness == Endianness::Big {
|
if endianness == Endianness::Big {
|
||||||
ctx.emit(match ty_bits(ty) {
|
ctx.emit(match (ty_bits(access_ty), ty_bits(ty)) {
|
||||||
8 => Inst::Load32ZExt8 { rd, mem },
|
(8, 32) => Inst::Load32ZExt8 { rd, mem },
|
||||||
16 => Inst::Load32ZExt16 { rd, mem },
|
(8, 64) => Inst::Load64ZExt8 { rd, mem },
|
||||||
32 => Inst::Load32 { rd, mem },
|
(16, 32) => Inst::Load32ZExt16 { rd, mem },
|
||||||
64 => Inst::Load64 { rd, mem },
|
(16, 64) => Inst::Load64ZExt16 { rd, mem },
|
||||||
|
(32, 32) => Inst::Load32 { rd, mem },
|
||||||
|
(32, 64) => Inst::Load64ZExt32 { rd, mem },
|
||||||
|
(64, 64) => Inst::Load64 { rd, mem },
|
||||||
_ => panic!("Unsupported size in load"),
|
_ => panic!("Unsupported size in load"),
|
||||||
});
|
});
|
||||||
} else {
|
} else {
|
||||||
ctx.emit(match ty_bits(ty) {
|
ctx.emit(match (ty_bits(access_ty), ty_bits(ty)) {
|
||||||
8 => Inst::Load32ZExt8 { rd, mem },
|
(8, 32) => Inst::Load32ZExt8 { rd, mem },
|
||||||
16 => Inst::LoadRev16 { rd, mem },
|
(8, 64) => Inst::Load64ZExt8 { rd, mem },
|
||||||
32 => Inst::LoadRev32 { rd, mem },
|
(16, 32) => Inst::LoadRev16 { rd, mem },
|
||||||
64 => Inst::LoadRev64 { rd, mem },
|
(32, 32) => Inst::LoadRev32 { rd, mem },
|
||||||
|
(64, 64) => Inst::LoadRev64 { rd, mem },
|
||||||
_ => panic!("Unsupported size in load"),
|
_ => panic!("Unsupported size in load"),
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
Opcode::AtomicStore => {
|
Opcode::AtomicStore
|
||||||
|
| Opcode::AtomicStore32
|
||||||
|
| Opcode::AtomicStore16
|
||||||
|
| Opcode::AtomicStore8 => {
|
||||||
let flags = ctx.memflags(insn).unwrap();
|
let flags = ctx.memflags(insn).unwrap();
|
||||||
let endianness = flags.endianness(Endianness::Big);
|
let endianness = flags.endianness(Endianness::Big);
|
||||||
let ty = ctx.input_ty(insn, 0);
|
let data_ty = ctx.input_ty(insn, 0);
|
||||||
|
let ty = match op {
|
||||||
|
Opcode::AtomicStore => data_ty,
|
||||||
|
Opcode::AtomicStore32 => types::I32,
|
||||||
|
Opcode::AtomicStore16 => types::I16,
|
||||||
|
Opcode::AtomicStore8 => types::I8,
|
||||||
|
_ => unreachable!(),
|
||||||
|
};
|
||||||
assert!(is_valid_atomic_transaction_ty(ty));
|
assert!(is_valid_atomic_transaction_ty(ty));
|
||||||
|
|
||||||
let mem = lower_address(ctx, &inputs[1..], 0, flags);
|
let mem = lower_address(ctx, &inputs[1..], 0, flags);
|
||||||
|
|||||||
@@ -5825,7 +5825,10 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
|||||||
ctx.emit(Inst::gen_move(dst, regs::rax(), types::I64));
|
ctx.emit(Inst::gen_move(dst, regs::rax(), types::I64));
|
||||||
}
|
}
|
||||||
|
|
||||||
Opcode::AtomicLoad => {
|
Opcode::AtomicLoad
|
||||||
|
| Opcode::AtomicUload8
|
||||||
|
| Opcode::AtomicUload16
|
||||||
|
| Opcode::AtomicUload32 => {
|
||||||
// This is a normal load. The x86-TSO memory model provides sufficient sequencing
|
// This is a normal load. The x86-TSO memory model provides sufficient sequencing
|
||||||
// to satisfy the CLIF synchronisation requirements for `AtomicLoad` without the
|
// to satisfy the CLIF synchronisation requirements for `AtomicLoad` without the
|
||||||
// need for any fence instructions.
|
// need for any fence instructions.
|
||||||
@@ -5847,11 +5850,21 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
Opcode::AtomicStore => {
|
Opcode::AtomicStore
|
||||||
|
| Opcode::AtomicStore32
|
||||||
|
| Opcode::AtomicStore16
|
||||||
|
| Opcode::AtomicStore8 => {
|
||||||
// This is a normal store, followed by an `mfence` instruction.
|
// This is a normal store, followed by an `mfence` instruction.
|
||||||
let data = put_input_in_reg(ctx, inputs[0]);
|
let data = put_input_in_reg(ctx, inputs[0]);
|
||||||
let addr = lower_to_amode(ctx, inputs[1], 0);
|
let addr = lower_to_amode(ctx, inputs[1], 0);
|
||||||
let ty_access = ctx.input_ty(insn, 0);
|
let data_ty = ctx.input_ty(insn, 0);
|
||||||
|
let ty_access = match op {
|
||||||
|
Opcode::AtomicStore => data_ty,
|
||||||
|
Opcode::AtomicStore32 => types::I32,
|
||||||
|
Opcode::AtomicStore16 => types::I16,
|
||||||
|
Opcode::AtomicStore8 => types::I8,
|
||||||
|
_ => unreachable!(),
|
||||||
|
};
|
||||||
assert!(is_valid_atomic_transaction_ty(ty_access));
|
assert!(is_valid_atomic_transaction_ty(ty_access));
|
||||||
|
|
||||||
ctx.emit(Inst::store(ty_access, data, addr));
|
ctx.emit(Inst::store(ty_access, data, addr));
|
||||||
|
|||||||
72
cranelift/filetests/filetests/isa/aarch64/atomic_load.clif
Normal file
72
cranelift/filetests/filetests/isa/aarch64/atomic_load.clif
Normal file
@@ -0,0 +1,72 @@
|
|||||||
|
test compile
|
||||||
|
target aarch64
|
||||||
|
|
||||||
|
function %atomic_load_i64(i64) -> i64 {
|
||||||
|
block0(v0: i64):
|
||||||
|
v1 = atomic_load.i64 v0
|
||||||
|
return v1
|
||||||
|
}
|
||||||
|
|
||||||
|
; check: ldar x0, [x0]
|
||||||
|
; nextln: ldp fp, lr, [sp], #16
|
||||||
|
; nextln: ret
|
||||||
|
|
||||||
|
function %atomic_load_i32(i64) -> i32 {
|
||||||
|
block0(v0: i64):
|
||||||
|
v1 = atomic_load.i32 v0
|
||||||
|
return v1
|
||||||
|
}
|
||||||
|
|
||||||
|
; check: ldar w0, [x0]
|
||||||
|
; nextln: ldp fp, lr, [sp], #16
|
||||||
|
; nextln: ret
|
||||||
|
|
||||||
|
function %atomic_uload_i32_i64(i64) -> i64 {
|
||||||
|
block0(v0: i64):
|
||||||
|
v1 = atomic_uload32.i64 v0
|
||||||
|
return v1
|
||||||
|
}
|
||||||
|
|
||||||
|
; check: ldar w0, [x0]
|
||||||
|
; nextln: ldp fp, lr, [sp], #16
|
||||||
|
; nextln: ret
|
||||||
|
|
||||||
|
function %atomic_uload_i16_i32(i64) -> i32 {
|
||||||
|
block0(v0: i64):
|
||||||
|
v1 = atomic_uload16.i32 v0
|
||||||
|
return v1
|
||||||
|
}
|
||||||
|
|
||||||
|
; check: ldarh w0, [x0]
|
||||||
|
; nextln: ldp fp, lr, [sp], #16
|
||||||
|
; nextln: ret
|
||||||
|
|
||||||
|
function %atomic_uload_i16_i64(i64) -> i64 {
|
||||||
|
block0(v0: i64):
|
||||||
|
v1 = atomic_uload16.i64 v0
|
||||||
|
return v1
|
||||||
|
}
|
||||||
|
|
||||||
|
; check: ldarh w0, [x0]
|
||||||
|
; nextln: ldp fp, lr, [sp], #16
|
||||||
|
; nextln: ret
|
||||||
|
|
||||||
|
function %atomic_uload_i8_i32(i64) -> i32 {
|
||||||
|
block0(v0: i64):
|
||||||
|
v1 = atomic_uload8.i32 v0
|
||||||
|
return v1
|
||||||
|
}
|
||||||
|
|
||||||
|
; check: ldarb w0, [x0]
|
||||||
|
; nextln: ldp fp, lr, [sp], #16
|
||||||
|
; nextln: ret
|
||||||
|
|
||||||
|
function %atomic_uload_i8_i64(i64) -> i64 {
|
||||||
|
block0(v0: i64):
|
||||||
|
v1 = atomic_uload8.i64 v0
|
||||||
|
return v1
|
||||||
|
}
|
||||||
|
|
||||||
|
; check: ldarb w0, [x0]
|
||||||
|
; nextln: ldp fp, lr, [sp], #16
|
||||||
|
; nextln: ret
|
||||||
72
cranelift/filetests/filetests/isa/aarch64/atomic_store.clif
Normal file
72
cranelift/filetests/filetests/isa/aarch64/atomic_store.clif
Normal file
@@ -0,0 +1,72 @@
|
|||||||
|
test compile
|
||||||
|
target aarch64
|
||||||
|
|
||||||
|
function %atomic_store_i64(i64, i64) {
|
||||||
|
block0(v0: i64, v1: i64):
|
||||||
|
atomic_store.i64 v0, v1
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
; check: stlr x0, [x1]
|
||||||
|
; nextln: ldp fp, lr, [sp], #16
|
||||||
|
; nextln: ret
|
||||||
|
|
||||||
|
function %atomic_store_i32(i32, i64) {
|
||||||
|
block0(v0: i32, v1: i64):
|
||||||
|
atomic_store.i32 v0, v1
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
; check: stlr w0, [x1]
|
||||||
|
; nextln: ldp fp, lr, [sp], #16
|
||||||
|
; nextln: ret
|
||||||
|
|
||||||
|
function %atomic_ustore_i32_i64(i64, i64) {
|
||||||
|
block0(v0: i64, v1: i64):
|
||||||
|
atomic_store32.i64 v0, v1
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
; check: stlr w0, [x1]
|
||||||
|
; nextln: ldp fp, lr, [sp], #16
|
||||||
|
; nextln: ret
|
||||||
|
|
||||||
|
function %atomic_ustore_i16_i32(i32, i64) {
|
||||||
|
block0(v0: i32, v1: i64):
|
||||||
|
atomic_store16.i32 v0, v1
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
; check: stlrh w0, [x1]
|
||||||
|
; nextln: ldp fp, lr, [sp], #16
|
||||||
|
; nextln: ret
|
||||||
|
|
||||||
|
function %atomic_ustore_i16_i64(i64, i64) {
|
||||||
|
block0(v0: i64, v1: i64):
|
||||||
|
atomic_store16.i64 v0, v1
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
; check: stlrh w0, [x1]
|
||||||
|
; nextln: ldp fp, lr, [sp], #16
|
||||||
|
; nextln: ret
|
||||||
|
|
||||||
|
function %atomic_ustore_i8_i32(i32, i64) {
|
||||||
|
block0(v0: i32, v1: i64):
|
||||||
|
atomic_store8.i32 v0, v1
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
; check: stlrb w0, [x1]
|
||||||
|
; nextln: ldp fp, lr, [sp], #16
|
||||||
|
; nextln: ret
|
||||||
|
|
||||||
|
function %atomic_ustore_i8_i64(i64, i64) {
|
||||||
|
block0(v0: i64, v1: i64):
|
||||||
|
atomic_store8.i64 v0, v1
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
; check: stlrb w0, [x1]
|
||||||
|
; nextln: ldp fp, lr, [sp], #16
|
||||||
|
; nextln: ret
|
||||||
@@ -41,29 +41,29 @@ block0:
|
|||||||
; check: larl %r1, %sym + 0 ; lrv %r2, 0(%r1)
|
; check: larl %r1, %sym + 0 ; lrv %r2, 0(%r1)
|
||||||
; nextln: br %r14
|
; nextln: br %r14
|
||||||
|
|
||||||
function %atomic_load_i16(i64) -> i16 {
|
function %atomic_load_i16(i64) -> i32 {
|
||||||
block0(v0: i64):
|
block0(v0: i64):
|
||||||
v1 = atomic_load.i16 little v0
|
v1 = atomic_uload16.i32 little v0
|
||||||
return v1
|
return v1
|
||||||
}
|
}
|
||||||
|
|
||||||
; check: lrvh %r2, 0(%r2)
|
; check: lrvh %r2, 0(%r2)
|
||||||
; nextln: br %r14
|
; nextln: br %r14
|
||||||
|
|
||||||
function %atomic_load_i16_sym() -> i16 {
|
function %atomic_load_i16_sym() -> i32 {
|
||||||
gv0 = symbol colocated %sym
|
gv0 = symbol colocated %sym
|
||||||
block0:
|
block0:
|
||||||
v0 = symbol_value.i64 gv0
|
v0 = symbol_value.i64 gv0
|
||||||
v1 = atomic_load.i16 little v0
|
v1 = atomic_uload16.i32 little v0
|
||||||
return v1
|
return v1
|
||||||
}
|
}
|
||||||
|
|
||||||
; check: larl %r1, %sym + 0 ; lrvh %r2, 0(%r1)
|
; check: larl %r1, %sym + 0 ; lrvh %r2, 0(%r1)
|
||||||
; nextln: br %r14
|
; nextln: br %r14
|
||||||
|
|
||||||
function %atomic_load_i8(i64) -> i8 {
|
function %atomic_load_i8(i64) -> i32 {
|
||||||
block0(v0: i64):
|
block0(v0: i64):
|
||||||
v1 = atomic_load.i8 little v0
|
v1 = atomic_uload8.i32 little v0
|
||||||
return v1
|
return v1
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -41,29 +41,29 @@ block0:
|
|||||||
; check: lrl %r2, %sym + 0
|
; check: lrl %r2, %sym + 0
|
||||||
; nextln: br %r14
|
; nextln: br %r14
|
||||||
|
|
||||||
function %atomic_load_i16(i64) -> i16 {
|
function %atomic_load_i16(i64) -> i32 {
|
||||||
block0(v0: i64):
|
block0(v0: i64):
|
||||||
v1 = atomic_load.i16 v0
|
v1 = atomic_uload16.i32 v0
|
||||||
return v1
|
return v1
|
||||||
}
|
}
|
||||||
|
|
||||||
; check: llh %r2, 0(%r2)
|
; check: llh %r2, 0(%r2)
|
||||||
; nextln: br %r14
|
; nextln: br %r14
|
||||||
|
|
||||||
function %atomic_load_i16_sym() -> i16 {
|
function %atomic_load_i16_sym() -> i32 {
|
||||||
gv0 = symbol colocated %sym
|
gv0 = symbol colocated %sym
|
||||||
block0:
|
block0:
|
||||||
v0 = symbol_value.i64 gv0
|
v0 = symbol_value.i64 gv0
|
||||||
v1 = atomic_load.i16 v0
|
v1 = atomic_uload16.i32 v0
|
||||||
return v1
|
return v1
|
||||||
}
|
}
|
||||||
|
|
||||||
; check: llhrl %r2, %sym + 0
|
; check: llhrl %r2, %sym + 0
|
||||||
; nextln: br %r14
|
; nextln: br %r14
|
||||||
|
|
||||||
function %atomic_load_i8(i64) -> i8 {
|
function %atomic_load_i8(i64) -> i32 {
|
||||||
block0(v0: i64):
|
block0(v0: i64):
|
||||||
v1 = atomic_load.i8 v0
|
v1 = atomic_uload8.i32 v0
|
||||||
return v1
|
return v1
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -625,8 +625,14 @@ where
|
|||||||
Opcode::Iconcat => assign(Value::concat(arg(0)?, arg(1)?)?),
|
Opcode::Iconcat => assign(Value::concat(arg(0)?, arg(1)?)?),
|
||||||
Opcode::AtomicRmw => unimplemented!("AtomicRmw"),
|
Opcode::AtomicRmw => unimplemented!("AtomicRmw"),
|
||||||
Opcode::AtomicCas => unimplemented!("AtomicCas"),
|
Opcode::AtomicCas => unimplemented!("AtomicCas"),
|
||||||
Opcode::AtomicLoad => unimplemented!("AtomicLoad"),
|
Opcode::AtomicLoad
|
||||||
Opcode::AtomicStore => unimplemented!("AtomicStore"),
|
| Opcode::AtomicUload8
|
||||||
|
| Opcode::AtomicUload16
|
||||||
|
| Opcode::AtomicUload32 => unimplemented!("AtomicLoad"),
|
||||||
|
Opcode::AtomicStore
|
||||||
|
| Opcode::AtomicStore8
|
||||||
|
| Opcode::AtomicStore16
|
||||||
|
| Opcode::AtomicStore32 => unimplemented!("AtomicStore"),
|
||||||
Opcode::Fence => unimplemented!("Fence"),
|
Opcode::Fence => unimplemented!("Fence"),
|
||||||
Opcode::WideningPairwiseDotProductS => unimplemented!("WideningPairwiseDotProductS"),
|
Opcode::WideningPairwiseDotProductS => unimplemented!("WideningPairwiseDotProductS"),
|
||||||
Opcode::SqmulRoundSat => unimplemented!("SqmulRoundSat"),
|
Opcode::SqmulRoundSat => unimplemented!("SqmulRoundSat"),
|
||||||
|
|||||||
Reference in New Issue
Block a user