[AArch64] Improve AtomicRMWLoop (#3839)
Add more tests, use accurate disassembly, respect data sizes and simplify the Xchg implementation. Copyright (c) 2022, Arm Limited
This commit is contained in:
@@ -1338,10 +1338,6 @@ impl MachInstEmit for Inst {
|
||||
both the store-data and success-flag operands of stlxr. This causes the
|
||||
instruction's behaviour to be "CONSTRAINED UNPREDICTABLE", so we use x24
|
||||
instead for the success-flag.
|
||||
|
||||
In the case where the operation is 'xchg', the second insn is instead
|
||||
mov x28, x26
|
||||
so that we simply write in the destination, the "2nd arg for op".
|
||||
*/
|
||||
// TODO: We should not hardcode registers here, a better idea would be to
|
||||
// pass some scratch registers in the AtomicRMWLoop pseudo-instruction, and use those
|
||||
@@ -1363,19 +1359,17 @@ impl MachInstEmit for Inst {
|
||||
sink.add_trap(srcloc, TrapCode::HeapOutOfBounds);
|
||||
}
|
||||
sink.put4(enc_ldaxr(ty, x27wr, x25)); // ldaxr x27, [x25]
|
||||
let size = OperandSize::from_ty(ty);
|
||||
|
||||
match op {
|
||||
AtomicRmwOp::Xchg => {
|
||||
// mov x28, x26
|
||||
Inst::Mov64 { rd: x28wr, rm: x26 }.emit(sink, emit_info, state);
|
||||
}
|
||||
AtomicRmwOp::Xchg => {} // do nothing
|
||||
AtomicRmwOp::Nand => {
|
||||
// and x28, x27, x26
|
||||
// mvn x28, x28
|
||||
|
||||
Inst::AluRRR {
|
||||
alu_op: ALUOp::And,
|
||||
size: OperandSize::Size64,
|
||||
size,
|
||||
rd: x28wr,
|
||||
rn: x27,
|
||||
rm: x26,
|
||||
@@ -1384,7 +1378,7 @@ impl MachInstEmit for Inst {
|
||||
|
||||
Inst::AluRRR {
|
||||
alu_op: ALUOp::OrrNot,
|
||||
size: OperandSize::Size64,
|
||||
size,
|
||||
rd: x28wr,
|
||||
rn: xzr,
|
||||
rm: x28,
|
||||
@@ -1408,7 +1402,7 @@ impl MachInstEmit for Inst {
|
||||
|
||||
Inst::AluRRR {
|
||||
alu_op: ALUOp::SubS,
|
||||
size: OperandSize::from_ty(ty),
|
||||
size,
|
||||
rd: writable_zero_reg(),
|
||||
rn: x27,
|
||||
rm: x26,
|
||||
@@ -1441,7 +1435,7 @@ impl MachInstEmit for Inst {
|
||||
|
||||
Inst::AluRRR {
|
||||
alu_op,
|
||||
size: OperandSize::Size64,
|
||||
size,
|
||||
rd: x28wr,
|
||||
rn: x27,
|
||||
rm: x26,
|
||||
@@ -1454,7 +1448,11 @@ impl MachInstEmit for Inst {
|
||||
if srcloc != SourceLoc::default() {
|
||||
sink.add_trap(srcloc, TrapCode::HeapOutOfBounds);
|
||||
}
|
||||
sink.put4(enc_stlxr(ty, x24wr, x28, x25)); // stlxr w24, x28, [x25]
|
||||
if op == AtomicRmwOp::Xchg {
|
||||
sink.put4(enc_stlxr(ty, x24wr, x26, x25)); // stlxr w24, x26, [x25]
|
||||
} else {
|
||||
sink.put4(enc_stlxr(ty, x24wr, x28, x25)); // stlxr w24, x28, [x25]
|
||||
}
|
||||
|
||||
// cbnz w24, again
|
||||
// Note, we're actually testing x24, and relying on the default zero-high-half
|
||||
|
||||
@@ -6105,8 +6105,80 @@ fn test_aarch64_binemit() {
|
||||
ty: I16,
|
||||
op: inst_common::AtomicRmwOp::Xor,
|
||||
},
|
||||
"3BFF5F487C031ACA3CFF1848B8FFFFB5",
|
||||
"atomically { 16_bits_at_[x25]) Xor= x26 ; x27 = old_value_at_[x25]; x24,x28 = trash }",
|
||||
"3BFF5F487C031A4A3CFF1848B8FFFFB5",
|
||||
"1: ldaxrh w27, [x25]; eor w28, w27, w26; stlxrh w24, w28, [x25]; cbnz w24, 1b",
|
||||
));
|
||||
insns.push((
|
||||
Inst::AtomicRMWLoop {
|
||||
ty: I8,
|
||||
op: inst_common::AtomicRmwOp::Add,
|
||||
},
|
||||
"3BFF5F087C031A0B3CFF1808B8FFFFB5",
|
||||
"1: ldaxrb w27, [x25]; add w28, w27, w26; stlxrb w24, w28, [x25]; cbnz w24, 1b",
|
||||
));
|
||||
insns.push((
|
||||
Inst::AtomicRMWLoop {
|
||||
ty: I32,
|
||||
op: inst_common::AtomicRmwOp::Or,
|
||||
},
|
||||
"3BFF5F887C031A2A3CFF1888B8FFFFB5",
|
||||
"1: ldaxr w27, [x25]; orr w28, w27, w26; stlxr w24, w28, [x25]; cbnz w24, 1b",
|
||||
));
|
||||
insns.push((
|
||||
Inst::AtomicRMWLoop {
|
||||
ty: I64,
|
||||
op: inst_common::AtomicRmwOp::And,
|
||||
},
|
||||
"3BFF5FC87C031A8A3CFF18C8B8FFFFB5",
|
||||
"1: ldaxr x27, [x25]; and x28, x27, x26; stlxr w24, x28, [x25]; cbnz w24, 1b",
|
||||
));
|
||||
insns.push((
|
||||
Inst::AtomicRMWLoop {
|
||||
ty: I8,
|
||||
op: inst_common::AtomicRmwOp::Xchg,
|
||||
},
|
||||
"3BFF5F083AFF1808D8FFFFB5",
|
||||
"1: ldaxrb w27, [x25]; stlxrb w24, w26, [x25]; cbnz w24, 1b",
|
||||
));
|
||||
insns.push((
|
||||
Inst::AtomicRMWLoop {
|
||||
ty: I16,
|
||||
op: inst_common::AtomicRmwOp::Nand,
|
||||
},
|
||||
"3BFF5F487C031A0AFC033C2A3CFF184898FFFFB5",
|
||||
"1: ldaxrh w27, [x25]; and w28, w27, w26; mvn w28, w28; stlxrh w24, w28, [x25]; cbnz w24, 1b",
|
||||
));
|
||||
insns.push((
|
||||
Inst::AtomicRMWLoop {
|
||||
ty: I32,
|
||||
op: inst_common::AtomicRmwOp::Smin,
|
||||
},
|
||||
"3BFF5F887F031A6B7CB39A9A3CFF188898FFFFB5",
|
||||
"1: ldaxr w27, [x25]; cmp w27, w26; csel w28, w27, w26, lt; stlxr w24, w28, [x25]; cbnz w24, 1b",
|
||||
));
|
||||
insns.push((
|
||||
Inst::AtomicRMWLoop {
|
||||
ty: I64,
|
||||
op: inst_common::AtomicRmwOp::Smax,
|
||||
},
|
||||
"3BFF5FC87F031AEB7CC39A9A3CFF18C898FFFFB5",
|
||||
"1: ldaxr x27, [x25]; cmp x27, x26; csel x28, x27, x26, gt; stlxr w24, x28, [x25]; cbnz w24, 1b",
|
||||
));
|
||||
insns.push((
|
||||
Inst::AtomicRMWLoop {
|
||||
ty: I8,
|
||||
op: inst_common::AtomicRmwOp::Umin,
|
||||
},
|
||||
"3BFF5F087F031A6B7C339A9A3CFF180898FFFFB5",
|
||||
"1: ldaxrb w27, [x25]; cmp w27, w26; csel w28, w27, w26, lo; stlxrb w24, w28, [x25]; cbnz w24, 1b",
|
||||
));
|
||||
insns.push((
|
||||
Inst::AtomicRMWLoop {
|
||||
ty: I16,
|
||||
op: inst_common::AtomicRmwOp::Umax,
|
||||
},
|
||||
"3BFF5F487F031A6B7C839A9A3CFF184898FFFFB5",
|
||||
"1: ldaxrh w27, [x25]; cmp w27, w26; csel w28, w27, w26, hi; stlxrh w24, w28, [x25]; cbnz w24, 1b",
|
||||
));
|
||||
|
||||
insns.push((
|
||||
@@ -6462,14 +6534,6 @@ fn test_aarch64_binemit() {
|
||||
"lduminal x25, x26, [x27]",
|
||||
));
|
||||
|
||||
insns.push((
|
||||
Inst::AtomicRMWLoop {
|
||||
ty: I32,
|
||||
op: inst_common::AtomicRmwOp::Xchg,
|
||||
},
|
||||
"3BFF5F88FC031AAA3CFF1888B8FFFFB5",
|
||||
"atomically { 32_bits_at_[x25]) Xchg= x26 ; x27 = old_value_at_[x25]; x24,x28 = trash }",
|
||||
));
|
||||
insns.push((
|
||||
Inst::AtomicCAS {
|
||||
rs: writable_xreg(28),
|
||||
|
||||
@@ -688,12 +688,14 @@ fn aarch64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) {
|
||||
&Inst::CCmpImm { rn, .. } => {
|
||||
collector.add_use(rn);
|
||||
}
|
||||
&Inst::AtomicRMWLoop { .. } => {
|
||||
&Inst::AtomicRMWLoop { op, .. } => {
|
||||
collector.add_use(xreg(25));
|
||||
collector.add_use(xreg(26));
|
||||
collector.add_def(writable_xreg(24));
|
||||
collector.add_def(writable_xreg(27));
|
||||
collector.add_def(writable_xreg(28));
|
||||
if op != AtomicRmwOp::Xchg {
|
||||
collector.add_def(writable_xreg(28));
|
||||
}
|
||||
}
|
||||
&Inst::AtomicRMW { rs, rt, rn, .. } => {
|
||||
collector.add_use(rs);
|
||||
@@ -2399,9 +2401,60 @@ impl Inst {
|
||||
format!("{}{} {}, {}, [{}]", op, ty_suffix, rs, rt, rn)
|
||||
}
|
||||
&Inst::AtomicRMWLoop { ty, op, .. } => {
|
||||
format!(
|
||||
"atomically {{ {}_bits_at_[x25]) {:?}= x26 ; x27 = old_value_at_[x25]; x24,x28 = trash }}",
|
||||
ty.bits(), op)
|
||||
let ty_suffix = match ty {
|
||||
I8 => "b",
|
||||
I16 => "h",
|
||||
_ => "",
|
||||
};
|
||||
let size = OperandSize::from_ty(ty);
|
||||
let r_status = show_ireg_sized(xreg(24), mb_rru, OperandSize::Size32);
|
||||
let r_arg2 = show_ireg_sized(xreg(26), mb_rru, size);
|
||||
let r_tmp = show_ireg_sized(xreg(27), mb_rru, size);
|
||||
let mut r_dst = show_ireg_sized(xreg(28), mb_rru, size);
|
||||
|
||||
let mut loop_str: String = "1: ".to_string();
|
||||
loop_str.push_str(&format!("ldaxr{} {}, [x25]; ", ty_suffix, r_tmp));
|
||||
|
||||
let op_str = match op {
|
||||
inst_common::AtomicRmwOp::Add => "add",
|
||||
inst_common::AtomicRmwOp::Sub => "sub",
|
||||
inst_common::AtomicRmwOp::Xor => "eor",
|
||||
inst_common::AtomicRmwOp::Or => "orr",
|
||||
inst_common::AtomicRmwOp::And => "and",
|
||||
_ => "",
|
||||
};
|
||||
|
||||
if op_str.is_empty() {
|
||||
match op {
|
||||
inst_common::AtomicRmwOp::Xchg => r_dst = r_arg2,
|
||||
inst_common::AtomicRmwOp::Nand => {
|
||||
loop_str.push_str(&format!("and {}, {}, {}; ", r_dst, r_tmp, r_arg2));
|
||||
loop_str.push_str(&format!("mvn {}, {}; ", r_dst, r_dst));
|
||||
}
|
||||
_ => {
|
||||
loop_str.push_str(&format!("cmp {}, {}; ", r_tmp, r_arg2));
|
||||
let cond = match op {
|
||||
inst_common::AtomicRmwOp::Smin => "lt",
|
||||
inst_common::AtomicRmwOp::Smax => "gt",
|
||||
inst_common::AtomicRmwOp::Umin => "lo",
|
||||
inst_common::AtomicRmwOp::Umax => "hi",
|
||||
_ => unreachable!(),
|
||||
};
|
||||
loop_str.push_str(&format!(
|
||||
"csel {}, {}, {}, {}; ",
|
||||
r_dst, r_tmp, r_arg2, cond
|
||||
));
|
||||
}
|
||||
};
|
||||
} else {
|
||||
loop_str.push_str(&format!("{} {}, {}, {}; ", op_str, r_dst, r_tmp, r_arg2));
|
||||
}
|
||||
loop_str.push_str(&format!(
|
||||
"stlxr{} {}, {}, [x25]; ",
|
||||
ty_suffix, r_status, r_dst
|
||||
));
|
||||
loop_str.push_str(&format!("cbnz {}, 1b", r_status));
|
||||
loop_str
|
||||
}
|
||||
&Inst::AtomicCAS { rs, rt, rn, ty } => {
|
||||
let op = match ty {
|
||||
|
||||
Reference in New Issue
Block a user