[AArch64] Port atomic rmw to ISLE (#4021)

Also fix and extend the current implementation:
- AtomicRMWOp::Clr != AtomicRmwOp::And, as the input needs to be
  inverted first.
- Inputs to the cmp for the RMWLoop case are sign-extended when
  needed.
- Lower Xchg to Swp.
- Lower Sub to Add with a negated input.
- Added more runtests.

Copyright (c) 2022, Arm Limited.
This commit is contained in:
Sam Parker
2022-04-27 21:13:59 +01:00
committed by GitHub
parent 8381179503
commit 12b4374cd5
26 changed files with 1632 additions and 1281 deletions

View File

@@ -583,6 +583,13 @@ impl OperandSize {
}
}
pub fn bits(&self) -> u8 {
match self {
OperandSize::Size32 => 32,
OperandSize::Size64 => 64,
}
}
/// Convert from an integer type into the smallest size that fits.
pub fn from_ty(ty: Type) -> OperandSize {
debug_assert!(!ty.is_vector());

View File

@@ -7,6 +7,7 @@ use crate::ir::constant::ConstantData;
use crate::ir::types::*;
use crate::ir::{LibCall, MemFlags, TrapCode};
use crate::isa::aarch64::inst::*;
use crate::isa::aarch64::lower::is_valid_atomic_transaction_ty;
use crate::machinst::{ty_bits, Reg, RegClass, Writable};
use core::convert::TryFrom;
@@ -505,7 +506,7 @@ fn enc_dmb_ish() -> u32 {
0xD5033BBF
}
fn enc_ldal(ty: Type, op: AtomicRMWOp, rs: Reg, rt: Writable<Reg>, rn: Reg) -> u32 {
fn enc_acq_rel(ty: Type, op: AtomicRMWOp, rs: Reg, rt: Writable<Reg>, rn: Reg) -> u32 {
assert!(machreg_to_gpr(rt.to_reg()) != 31);
let sz = match ty {
I64 => 0b11,
@@ -514,6 +515,10 @@ fn enc_ldal(ty: Type, op: AtomicRMWOp, rs: Reg, rt: Writable<Reg>, rn: Reg) -> u
I8 => 0b00,
_ => unreachable!(),
};
let bit15 = match op {
AtomicRMWOp::Swp => 0b1,
_ => 0b0,
};
let op = match op {
AtomicRMWOp::Add => 0b000,
AtomicRMWOp::Clr => 0b001,
@@ -523,10 +528,12 @@ fn enc_ldal(ty: Type, op: AtomicRMWOp, rs: Reg, rt: Writable<Reg>, rn: Reg) -> u
AtomicRMWOp::Smin => 0b101,
AtomicRMWOp::Umax => 0b110,
AtomicRMWOp::Umin => 0b111,
AtomicRMWOp::Swp => 0b000,
};
0b00_111_000_111_00000_0_000_00_00000_00000
| (sz << 30)
| (machreg_to_gpr(rs) << 16)
| bit15 << 15
| (op << 12)
| (machreg_to_gpr(rn) << 5)
| machreg_to_gpr(rt.to_reg())
@@ -1371,15 +1378,18 @@ impl MachInstEmit for Inst {
sink.put4(enc_ccmp_imm(size, rn, imm, nzcv, cond));
}
&Inst::AtomicRMW { ty, op, rs, rt, rn } => {
assert!(is_valid_atomic_transaction_ty(ty));
let rs = allocs.next(rs);
let rt = allocs.next_writable(rt);
let rn = allocs.next(rn);
sink.put4(enc_ldal(ty, op, rs, rt, rn));
sink.put4(enc_acq_rel(ty, op, rs, rt, rn));
}
&Inst::AtomicRMWLoop { ty, op } => {
assert!(is_valid_atomic_transaction_ty(ty));
/* Emit this:
again:
ldaxr{,b,h} x/w27, [x25]
// maybe sign extend
op x28, x27, x26 // op is add,sub,and,orr,eor
stlxr{,b,h} w24, x/w28, [x25]
cbnz x24, again
@@ -1414,10 +1424,31 @@ impl MachInstEmit for Inst {
}
sink.put4(enc_ldaxr(ty, x27wr, x25)); // ldaxr x27, [x25]
let size = OperandSize::from_ty(ty);
let sign_ext = match op {
AtomicRMWLoopOp::Smin | AtomicRMWLoopOp::Smax => match ty {
I16 => Some((ExtendOp::SXTH, 16)),
I8 => Some((ExtendOp::SXTB, 8)),
_ => None,
},
_ => None,
};
// sxt{b|h} the loaded result if necessary.
if sign_ext.is_some() {
let (_, from_bits) = sign_ext.unwrap();
Inst::Extend {
rd: x27wr,
rn: x27,
signed: true,
from_bits,
to_bits: size.bits(),
}
.emit(&[], sink, emit_info, state);
}
match op {
AtomicRmwOp::Xchg => {} // do nothing
AtomicRmwOp::Nand => {
AtomicRMWLoopOp::Xchg => {} // do nothing
AtomicRMWLoopOp::Nand => {
// and x28, x27, x26
// mvn x28, x28
@@ -1439,29 +1470,42 @@ impl MachInstEmit for Inst {
}
.emit(&[], sink, emit_info, state);
}
AtomicRmwOp::Umin
| AtomicRmwOp::Umax
| AtomicRmwOp::Smin
| AtomicRmwOp::Smax => {
// cmp x27, x26
AtomicRMWLoopOp::Umin
| AtomicRMWLoopOp::Umax
| AtomicRMWLoopOp::Smin
| AtomicRMWLoopOp::Smax => {
// cmp x27, x26 {?sxt}
// csel.op x28, x27, x26
let cond = match op {
AtomicRmwOp::Umin => Cond::Lo,
AtomicRmwOp::Umax => Cond::Hi,
AtomicRmwOp::Smin => Cond::Lt,
AtomicRmwOp::Smax => Cond::Gt,
AtomicRMWLoopOp::Umin => Cond::Lo,
AtomicRMWLoopOp::Umax => Cond::Hi,
AtomicRMWLoopOp::Smin => Cond::Lt,
AtomicRMWLoopOp::Smax => Cond::Gt,
_ => unreachable!(),
};
Inst::AluRRR {
alu_op: ALUOp::SubS,
size,
rd: writable_zero_reg(),
rn: x27,
rm: x26,
if sign_ext.is_some() {
let (extendop, _) = sign_ext.unwrap();
Inst::AluRRRExtend {
alu_op: ALUOp::SubS,
size,
rd: writable_zero_reg(),
rn: x27,
rm: x26,
extendop,
}
.emit(&[], sink, emit_info, state);
} else {
Inst::AluRRR {
alu_op: ALUOp::SubS,
size,
rd: writable_zero_reg(),
rn: x27,
rm: x26,
}
.emit(&[], sink, emit_info, state);
}
.emit(&[], sink, emit_info, state);
Inst::CSel {
cond,
@@ -1474,17 +1518,17 @@ impl MachInstEmit for Inst {
_ => {
// add/sub/and/orr/eor x28, x27, x26
let alu_op = match op {
AtomicRmwOp::Add => ALUOp::Add,
AtomicRmwOp::Sub => ALUOp::Sub,
AtomicRmwOp::And => ALUOp::And,
AtomicRmwOp::Or => ALUOp::Orr,
AtomicRmwOp::Xor => ALUOp::Eor,
AtomicRmwOp::Nand
| AtomicRmwOp::Umin
| AtomicRmwOp::Umax
| AtomicRmwOp::Smin
| AtomicRmwOp::Smax
| AtomicRmwOp::Xchg => unreachable!(),
AtomicRMWLoopOp::Add => ALUOp::Add,
AtomicRMWLoopOp::Sub => ALUOp::Sub,
AtomicRMWLoopOp::And => ALUOp::And,
AtomicRMWLoopOp::Orr => ALUOp::Orr,
AtomicRMWLoopOp::Eor => ALUOp::Eor,
AtomicRMWLoopOp::Nand
| AtomicRMWLoopOp::Umin
| AtomicRMWLoopOp::Umax
| AtomicRMWLoopOp::Smin
| AtomicRMWLoopOp::Smax
| AtomicRMWLoopOp::Xchg => unreachable!(),
};
Inst::AluRRR {
@@ -1502,7 +1546,7 @@ impl MachInstEmit for Inst {
if srcloc != SourceLoc::default() {
sink.add_trap(srcloc, TrapCode::HeapOutOfBounds);
}
if op == AtomicRmwOp::Xchg {
if op == AtomicRMWLoopOp::Xchg {
sink.put4(enc_stlxr(ty, x24wr, x26, x25)); // stlxr w24, x26, [x25]
} else {
sink.put4(enc_stlxr(ty, x24wr, x28, x25)); // stlxr w24, x28, [x25]

View File

@@ -6205,10 +6205,18 @@ fn test_aarch64_binemit() {
"frintn d23, d24",
));
insns.push((
Inst::AtomicRMWLoop {
ty: I8,
op: AtomicRMWLoopOp::Sub,
},
"3BFF5F087C031A4B3CFF1808B8FFFFB5",
"1: ldaxrb w27, [x25]; sub w28, w27, w26; stlxrb w24, w28, [x25]; cbnz w24, 1b",
));
insns.push((
Inst::AtomicRMWLoop {
ty: I16,
op: inst_common::AtomicRmwOp::Xor,
op: AtomicRMWLoopOp::Eor,
},
"3BFF5F487C031A4A3CFF1848B8FFFFB5",
"1: ldaxrh w27, [x25]; eor w28, w27, w26; stlxrh w24, w28, [x25]; cbnz w24, 1b",
@@ -6216,7 +6224,7 @@ fn test_aarch64_binemit() {
insns.push((
Inst::AtomicRMWLoop {
ty: I8,
op: inst_common::AtomicRmwOp::Add,
op: AtomicRMWLoopOp::Add,
},
"3BFF5F087C031A0B3CFF1808B8FFFFB5",
"1: ldaxrb w27, [x25]; add w28, w27, w26; stlxrb w24, w28, [x25]; cbnz w24, 1b",
@@ -6224,7 +6232,7 @@ fn test_aarch64_binemit() {
insns.push((
Inst::AtomicRMWLoop {
ty: I32,
op: inst_common::AtomicRmwOp::Or,
op: AtomicRMWLoopOp::Orr,
},
"3BFF5F887C031A2A3CFF1888B8FFFFB5",
"1: ldaxr w27, [x25]; orr w28, w27, w26; stlxr w24, w28, [x25]; cbnz w24, 1b",
@@ -6232,7 +6240,7 @@ fn test_aarch64_binemit() {
insns.push((
Inst::AtomicRMWLoop {
ty: I64,
op: inst_common::AtomicRmwOp::And,
op: AtomicRMWLoopOp::And,
},
"3BFF5FC87C031A8A3CFF18C8B8FFFFB5",
"1: ldaxr x27, [x25]; and x28, x27, x26; stlxr w24, x28, [x25]; cbnz w24, 1b",
@@ -6240,7 +6248,7 @@ fn test_aarch64_binemit() {
insns.push((
Inst::AtomicRMWLoop {
ty: I8,
op: inst_common::AtomicRmwOp::Xchg,
op: AtomicRMWLoopOp::Xchg,
},
"3BFF5F083AFF1808D8FFFFB5",
"1: ldaxrb w27, [x25]; stlxrb w24, w26, [x25]; cbnz w24, 1b",
@@ -6248,15 +6256,23 @@ fn test_aarch64_binemit() {
insns.push((
Inst::AtomicRMWLoop {
ty: I16,
op: inst_common::AtomicRmwOp::Nand,
op: AtomicRMWLoopOp::Nand,
},
"3BFF5F487C031A0AFC033C2A3CFF184898FFFFB5",
"1: ldaxrh w27, [x25]; and w28, w27, w26; mvn w28, w28; stlxrh w24, w28, [x25]; cbnz w24, 1b",
));
insns.push((
Inst::AtomicRMWLoop {
ty: I16,
op: AtomicRMWLoopOp::Smin,
},
"3BFF5F487B3F00137FA33A6B7CB39A9A3CFF184878FFFFB5",
"1: ldaxrh w27, [x25]; sxth w27, w27; cmp w27, w26, sxth; csel w28, w27, w26, lt; stlxrh w24, w28, [x25]; cbnz w24, 1b",
));
insns.push((
Inst::AtomicRMWLoop {
ty: I32,
op: inst_common::AtomicRmwOp::Smin,
op: AtomicRMWLoopOp::Smin,
},
"3BFF5F887F031A6B7CB39A9A3CFF188898FFFFB5",
"1: ldaxr w27, [x25]; cmp w27, w26; csel w28, w27, w26, lt; stlxr w24, w28, [x25]; cbnz w24, 1b",
@@ -6264,7 +6280,7 @@ fn test_aarch64_binemit() {
insns.push((
Inst::AtomicRMWLoop {
ty: I64,
op: inst_common::AtomicRmwOp::Smax,
op: AtomicRMWLoopOp::Smax,
},
"3BFF5FC87F031AEB7CC39A9A3CFF18C898FFFFB5",
"1: ldaxr x27, [x25]; cmp x27, x26; csel x28, x27, x26, gt; stlxr w24, x28, [x25]; cbnz w24, 1b",
@@ -6272,7 +6288,15 @@ fn test_aarch64_binemit() {
insns.push((
Inst::AtomicRMWLoop {
ty: I8,
op: inst_common::AtomicRmwOp::Umin,
op: AtomicRMWLoopOp::Smax,
},
"3BFF5F087B1F00137F833A6B7CC39A9A3CFF180878FFFFB5",
"1: ldaxrb w27, [x25]; sxtb w27, w27; cmp w27, w26, sxtb; csel w28, w27, w26, gt; stlxrb w24, w28, [x25]; cbnz w24, 1b",
));
insns.push((
Inst::AtomicRMWLoop {
ty: I8,
op: AtomicRMWLoopOp::Umin,
},
"3BFF5F087F031A6B7C339A9A3CFF180898FFFFB5",
"1: ldaxrb w27, [x25]; cmp w27, w26; csel w28, w27, w26, lo; stlxrb w24, w28, [x25]; cbnz w24, 1b",
@@ -6280,7 +6304,7 @@ fn test_aarch64_binemit() {
insns.push((
Inst::AtomicRMWLoop {
ty: I16,
op: inst_common::AtomicRmwOp::Umax,
op: AtomicRMWLoopOp::Umax,
},
"3BFF5F487F031A6B7C839A9A3CFF184898FFFFB5",
"1: ldaxrh w27, [x25]; cmp w27, w26; csel w28, w27, w26, hi; stlxrh w24, w28, [x25]; cbnz w24, 1b",
@@ -6638,6 +6662,50 @@ fn test_aarch64_binemit() {
"7A73F9F8",
"lduminal x25, x26, [x27]",
));
insns.push((
Inst::AtomicRMW {
ty: I8,
op: AtomicRMWOp::Swp,
rs: xreg(28),
rt: writable_xreg(29),
rn: xreg(30),
},
"DD83FC38",
"swpalb w28, fp, [lr]",
));
insns.push((
Inst::AtomicRMW {
ty: I16,
op: AtomicRMWOp::Swp,
rs: xreg(0),
rt: writable_xreg(1),
rn: xreg(2),
},
"4180E078",
"swpalh w0, w1, [x2]",
));
insns.push((
Inst::AtomicRMW {
ty: I32,
op: AtomicRMWOp::Swp,
rs: xreg(3),
rt: writable_xreg(4),
rn: xreg(5),
},
"A480E3B8",
"swpal w3, w4, [x5]",
));
insns.push((
Inst::AtomicRMW {
ty: I64,
op: AtomicRMWOp::Swp,
rs: xreg(6),
rt: writable_xreg(7),
rn: xreg(8),
},
"0781E6F8",
"swpal x6, x7, [x8]",
));
insns.push((
Inst::AtomicCAS {

View File

@@ -39,9 +39,9 @@ mod emit_tests;
// Instructions (top level): definition
pub use crate::isa::aarch64::lower::isle::generated_code::{
ALUOp, ALUOp3, AtomicRMWOp, BitOp, FPUOp1, FPUOp2, FPUOp3, FpuRoundMode, FpuToIntOp,
IntToFpuOp, MInst as Inst, MoveWideOp, VecALUOp, VecExtendOp, VecLanesOp, VecMisc2, VecPairOp,
VecRRLongOp, VecRRNarrowOp, VecRRPairLongOp, VecRRRLongOp, VecShiftImmOp,
ALUOp, ALUOp3, AtomicRMWLoopOp, AtomicRMWOp, BitOp, FPUOp1, FPUOp2, FPUOp3, FpuRoundMode,
FpuToIntOp, IntToFpuOp, MInst as Inst, MoveWideOp, VecALUOp, VecExtendOp, VecLanesOp, VecMisc2,
VecPairOp, VecRRLongOp, VecRRNarrowOp, VecRRPairLongOp, VecRRRLongOp, VecShiftImmOp,
};
/// A floating-point unit (FPU) operation with two args, a register and an immediate.
@@ -676,12 +676,14 @@ fn aarch64_get_operands<F: Fn(VReg) -> VReg>(inst: &Inst, collector: &mut Operan
&Inst::CCmpImm { rn, .. } => {
collector.reg_use(rn);
}
&Inst::AtomicRMWLoop { .. } => {
&Inst::AtomicRMWLoop { op, .. } => {
collector.reg_use(xreg(25));
collector.reg_use(xreg(26));
collector.reg_def(writable_xreg(24));
collector.reg_def(writable_xreg(27));
collector.reg_def(writable_xreg(28));
if op != AtomicRMWLoopOp::Xchg {
collector.reg_def(writable_xreg(28));
}
}
&Inst::AtomicRMW { rs, rt, rn, .. } => {
collector.reg_use(rs);
@@ -1538,6 +1540,7 @@ impl Inst {
AtomicRMWOp::Umax => "ldumaxal",
AtomicRMWOp::Smin => "ldsminal",
AtomicRMWOp::Umin => "lduminal",
AtomicRMWOp::Swp => "swpal",
};
let size = OperandSize::from_ty(ty);
@@ -1569,28 +1572,39 @@ impl Inst {
loop_str.push_str(&format!("ldaxr{} {}, [{}]; ", ty_suffix, r_tmp, r_addr));
let op_str = match op {
inst_common::AtomicRmwOp::Add => "add",
inst_common::AtomicRmwOp::Sub => "sub",
inst_common::AtomicRmwOp::Xor => "eor",
inst_common::AtomicRmwOp::Or => "orr",
inst_common::AtomicRmwOp::And => "and",
AtomicRMWLoopOp::Add => "add",
AtomicRMWLoopOp::Sub => "sub",
AtomicRMWLoopOp::Eor => "eor",
AtomicRMWLoopOp::Orr => "orr",
AtomicRMWLoopOp::And => "and",
_ => "",
};
if op_str.is_empty() {
match op {
inst_common::AtomicRmwOp::Xchg => r_dst = r_arg2,
inst_common::AtomicRmwOp::Nand => {
AtomicRMWLoopOp::Xchg => r_dst = r_arg2,
AtomicRMWLoopOp::Nand => {
loop_str.push_str(&format!("and {}, {}, {}; ", r_dst, r_tmp, r_arg2));
loop_str.push_str(&format!("mvn {}, {}; ", r_dst, r_dst));
}
_ => {
loop_str.push_str(&format!("cmp {}, {}; ", r_tmp, r_arg2));
if (op == AtomicRMWLoopOp::Smin || op == AtomicRMWLoopOp::Smax)
&& (ty == I8 || ty == I16)
{
loop_str
.push_str(&format!("sxt{} {}, {}; ", ty_suffix, r_tmp, r_tmp));
loop_str.push_str(&format!(
"cmp {}, {}, sxt{}; ",
r_tmp, r_arg2, ty_suffix
));
} else {
loop_str.push_str(&format!("cmp {}, {}; ", r_tmp, r_arg2));
}
let cond = match op {
inst_common::AtomicRmwOp::Smin => "lt",
inst_common::AtomicRmwOp::Smax => "gt",
inst_common::AtomicRmwOp::Umin => "lo",
inst_common::AtomicRmwOp::Umax => "hi",
AtomicRMWLoopOp::Smin => "lt",
AtomicRMWLoopOp::Smax => "gt",
AtomicRMWLoopOp::Umin => "lo",
AtomicRMWLoopOp::Umax => "hi",
_ => unreachable!(),
};
loop_str.push_str(&format!(