x64: port atomic_rmw to ISLE (#4389)

* x64: port `atomic_rmw` to ISLE

This change ports `atomic_rmw` to ISLE for the x64 backend. It does not
change the lowering in any way, though it seems possible that the fixed
regs need not be as fixed and that there are opportunities for single
instruction lowerings. It does rename `inst_common::AtomicRmwOp` to
`MachAtomicRmwOp` to disambiguate with the IR enum with the same name.

* x64: remove remaining hardcoded register constraints for `atomic_rmw`

* x64: use `SyntheticAmode` in `AtomicRmwSeq`

* review: add missing reg collector for amode

* review: collect memory registers in the 'late' phase
This commit is contained in:
Andrew Brown
2022-07-06 16:58:59 -07:00
committed by GitHub
parent f98076ae88
commit 8629cbc6a4
10 changed files with 196 additions and 172 deletions

View File

@@ -415,10 +415,10 @@
;; The sequence consists of an initial "normal" load from `dst`, followed ;; The sequence consists of an initial "normal" load from `dst`, followed
;; by a loop which computes the new value and tries to compare-and-swap ;; by a loop which computes the new value and tries to compare-and-swap
;; ("CAS") it into `dst`, using the native instruction `lock ;; ("CAS") it into `dst`, using the native instruction `lock
;; cmpxchg{b,w,l,q}` . The loop iterates until the CAS is successful. ;; cmpxchg{b,w,l,q}`. The loop iterates until the CAS is successful. If
;; If there is no contention, there will be only one pass through the ;; there is no contention, there will be only one pass through the loop
;; loop body. The sequence does *not* perform any explicit memory fence ;; body. The sequence does *not* perform any explicit memory fence
;; instructions (mfence/sfence/lfence). ;; instructions (`mfence`/`sfence`/`lfence`).
;; ;;
;; Note that the transaction is atomic in the sense that, as observed by ;; Note that the transaction is atomic in the sense that, as observed by
;; some other thread, `dst` either has the initial or final value, but no ;; some other thread, `dst` either has the initial or final value, but no
@@ -430,15 +430,12 @@
;; problem. ;; problem.
;; ;;
;; This instruction sequence has fixed register uses as follows: ;; This instruction sequence has fixed register uses as follows:
;; ;; - %rax (written) the old value at `mem`
;; %r9 (read) address ;; - %rflags is written. Do not assume anything about it after the
;; %r10 (read) second operand for `op` ;; instruction.
;; %r11 (written) scratch reg; value afterwards has no meaning
;; %rax (written) the old value at %r9
;; %rflags is written. Do not assume anything about it after the instruction.
(AtomicRmwSeq (ty Type) ;; I8, I16, I32, or I64 (AtomicRmwSeq (ty Type) ;; I8, I16, I32, or I64
(op AtomicRmwOp) (op MachAtomicRmwOp)
(address Reg) (mem SyntheticAmode)
(operand Reg) (operand Reg)
(temp WritableReg) (temp WritableReg)
(dst_old WritableReg)) (dst_old WritableReg))
@@ -2921,6 +2918,19 @@
(_ Unit (emit (MInst.LockCmpxchg ty replacement expected addr dst)))) (_ Unit (emit (MInst.LockCmpxchg ty replacement expected addr dst))))
dst)) dst))
(decl x64_atomic_rmw_seq (Type MachAtomicRmwOp SyntheticAmode Gpr) Gpr)
(rule (x64_atomic_rmw_seq ty op mem input)
(let ((dst WritableGpr (temp_writable_gpr))
(tmp WritableGpr (temp_writable_gpr))
(_ Unit (emit (MInst.AtomicRmwSeq ty op mem input tmp dst))))
dst))
;; CLIF IR has one enumeration for atomic operations (`AtomicRmwOp`) while the
;; mach backend has another (`MachAtomicRmwOp`)--this converts one to the other.
(type MachAtomicRmwOp extern (enum))
(decl atomic_rmw_op_to_mach_atomic_rmw_op (AtomicRmwOp) MachAtomicRmwOp)
(extern constructor atomic_rmw_op_to_mach_atomic_rmw_op atomic_rmw_op_to_mach_atomic_rmw_op)
;;;; Automatic conversions ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;;; Automatic conversions ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(convert Gpr InstOutput output_gpr) (convert Gpr InstOutput output_gpr)
@@ -2973,6 +2983,7 @@
(convert SyntheticAmode XmmMem synthetic_amode_to_xmm_mem) (convert SyntheticAmode XmmMem synthetic_amode_to_xmm_mem)
(convert IntCC CC intcc_to_cc) (convert IntCC CC intcc_to_cc)
(convert AtomicRmwOp MachAtomicRmwOp atomic_rmw_op_to_mach_atomic_rmw_op)
(decl reg_to_xmm_mem (Reg) XmmMem) (decl reg_to_xmm_mem (Reg) XmmMem)
(rule (reg_to_xmm_mem r) (rule (reg_to_xmm_mem r)

View File

@@ -306,7 +306,7 @@ impl Amode {
} }
} }
/// Add the regs mentioned by `self` to `collector`. /// Add the registers mentioned by `self` to `collector`.
pub(crate) fn get_operands<F: Fn(VReg) -> VReg>( pub(crate) fn get_operands<F: Fn(VReg) -> VReg>(
&self, &self,
collector: &mut OperandCollector<'_, F>, collector: &mut OperandCollector<'_, F>,
@@ -325,6 +325,25 @@ impl Amode {
} }
} }
/// Same as `get_operands`, but add the registers in the "late" phase.
pub(crate) fn get_operands_late<F: Fn(VReg) -> VReg>(
&self,
collector: &mut OperandCollector<'_, F>,
) {
match self {
Amode::ImmReg { base, .. } => {
collector.reg_late_use(*base);
}
Amode::ImmRegRegShift { base, index, .. } => {
collector.reg_late_use(base.to_reg());
collector.reg_late_use(index.to_reg());
}
Amode::RipRelative { .. } => {
// RIP isn't involved in regalloc.
}
}
}
pub(crate) fn get_flags(&self) -> MemFlags { pub(crate) fn get_flags(&self) -> MemFlags {
match self { match self {
Amode::ImmReg { flags, .. } => *flags, Amode::ImmReg { flags, .. } => *flags,
@@ -426,7 +445,7 @@ impl SyntheticAmode {
SyntheticAmode::NominalSPOffset { simm32 } SyntheticAmode::NominalSPOffset { simm32 }
} }
/// Add the regs mentioned by `self` to `collector`. /// Add the registers mentioned by `self` to `collector`.
pub(crate) fn get_operands<F: Fn(VReg) -> VReg>( pub(crate) fn get_operands<F: Fn(VReg) -> VReg>(
&self, &self,
collector: &mut OperandCollector<'_, F>, collector: &mut OperandCollector<'_, F>,
@@ -440,6 +459,20 @@ impl SyntheticAmode {
} }
} }
/// Same as `get_operands`, but add the register in the "late" phase.
pub(crate) fn get_operands_late<F: Fn(VReg) -> VReg>(
&self,
collector: &mut OperandCollector<'_, F>,
) {
match self {
SyntheticAmode::Real(addr) => addr.get_operands_late(collector),
SyntheticAmode::NominalSPOffset { .. } => {
// Nothing to do; the base is SP and isn't involved in regalloc.
}
SyntheticAmode::ConstantOffset(_) => {}
}
}
pub(crate) fn finalize(&self, state: &mut EmitState, buffer: &MachBuffer<Inst>) -> Amode { pub(crate) fn finalize(&self, state: &mut EmitState, buffer: &MachBuffer<Inst>) -> Amode {
match self { match self {
SyntheticAmode::Real(addr) => addr.clone(), SyntheticAmode::Real(addr) => addr.clone(),

View File

@@ -2613,118 +2613,116 @@ pub(crate) fn emit(
Inst::AtomicRmwSeq { Inst::AtomicRmwSeq {
ty, ty,
op, op,
address, mem,
operand, operand,
temp, temp,
dst_old, dst_old,
} => { } => {
// FIXME: use real vregs for this seq. let operand = allocs.next(*operand);
debug_assert_eq!(*address, regs::r9()); let temp = allocs.next_writable(*temp);
debug_assert_eq!(*operand, regs::r10()); let dst_old = allocs.next_writable(*dst_old);
debug_assert_eq!(temp.to_reg(), regs::r11());
debug_assert_eq!(dst_old.to_reg(), regs::rax()); debug_assert_eq!(dst_old.to_reg(), regs::rax());
let mem = mem.finalize(state, sink).with_allocs(allocs);
// Emit this: // Emit this:
// // mov{zbq,zwq,zlq,q} (%r_address), %rax // rax = old value
// mov{zbq,zwq,zlq,q} (%r9), %rax // rax = old value // again:
// again: // movq %rax, %r_temp // rax = old value, r_temp = old value
// movq %rax, %r11 // rax = old value, r11 = old value // `op`q %r_operand, %r_temp // rax = old value, r_temp = new value
// `op`q %r10, %r11 // rax = old value, r11 = new value // lock cmpxchg{b,w,l,q} %r_temp, (%r_address) // try to store new value
// lock cmpxchg{b,w,l,q} %r11, (%r9) // try to store new value
// jnz again // If this is taken, rax will have a "revised" old value // jnz again // If this is taken, rax will have a "revised" old value
// //
// Operand conventions: // Operand conventions: IN: %r_address, %r_operand OUT: %rax (old
// IN: %r9 (addr), %r10 (2nd arg for `op`) // value), %r_temp (trashed), %rflags (trashed)
// OUT: %rax (old value), %r11 (trashed), %rflags (trashed)
// //
// In the case where the operation is 'xchg', the "`op`q" instruction is instead // In the case where the operation is 'xchg', the "`op`q"
// movq %r10, %r11 // instruction is instead: movq %r_operand,
// so that we simply write in the destination, the "2nd arg for `op`". // %r_temp so that we simply write in the destination, the "2nd
let rax = regs::rax(); // arg for `op`".
let r9 = regs::r9(); //
let r10 = regs::r10(); // TODO: this sequence can be significantly improved (e.g., to `lock
let r11 = regs::r11(); // <op>`) when it is known that `dst_old` is not used later, see
let rax_w = Writable::from_reg(rax); // https://github.com/bytecodealliance/wasmtime/issues/2153.
let r11_w = Writable::from_reg(r11);
let amode = Amode::imm_reg(0, r9);
let again_label = sink.get_label(); let again_label = sink.get_label();
// mov{zbq,zwq,zlq,q} (%r9), %rax // mov{zbq,zwq,zlq,q} (%r_address), %rax
// No need to call `add_trap` here, since the `i1` emit will do that. // No need to call `add_trap` here, since the `i1` emit will do that.
let i1 = Inst::load(*ty, amode.clone(), rax_w, ExtKind::ZeroExtend); let i1 = Inst::load(*ty, mem.clone(), dst_old, ExtKind::ZeroExtend);
i1.emit(&[], sink, info, state); i1.emit(&[], sink, info, state);
// again: // again:
sink.bind_label(again_label); sink.bind_label(again_label);
// movq %rax, %r11 // movq %rax, %r_temp
let i2 = Inst::mov_r_r(OperandSize::Size64, rax, r11_w); let i2 = Inst::mov_r_r(OperandSize::Size64, dst_old.to_reg(), temp);
i2.emit(&[], sink, info, state); i2.emit(&[], sink, info, state);
let r10_rmi = RegMemImm::reg(r10); let operand_rmi = RegMemImm::reg(operand);
use inst_common::MachAtomicRmwOp as RmwOp;
match op { match op {
inst_common::AtomicRmwOp::Xchg => { RmwOp::Xchg => {
// movq %r10, %r11 // movq %r_operand, %r_temp
let i3 = Inst::mov_r_r(OperandSize::Size64, r10, r11_w); let i3 = Inst::mov_r_r(OperandSize::Size64, operand, temp);
i3.emit(&[], sink, info, state); i3.emit(&[], sink, info, state);
} }
inst_common::AtomicRmwOp::Nand => { RmwOp::Nand => {
// andq %r10, %r11 // andq %r_operand, %r_temp
let i3 = let i3 =
Inst::alu_rmi_r(OperandSize::Size64, AluRmiROpcode::And, r10_rmi, r11_w); Inst::alu_rmi_r(OperandSize::Size64, AluRmiROpcode::And, operand_rmi, temp);
i3.emit(&[], sink, info, state); i3.emit(&[], sink, info, state);
// notq %r11 // notq %r_temp
let i4 = Inst::not(OperandSize::Size64, r11_w); let i4 = Inst::not(OperandSize::Size64, temp);
i4.emit(&[], sink, info, state); i4.emit(&[], sink, info, state);
} }
inst_common::AtomicRmwOp::Umin RmwOp::Umin | RmwOp::Umax | RmwOp::Smin | RmwOp::Smax => {
| inst_common::AtomicRmwOp::Umax // cmp %r_temp, %r_operand
| inst_common::AtomicRmwOp::Smin let i3 = Inst::cmp_rmi_r(
| inst_common::AtomicRmwOp::Smax => { OperandSize::from_ty(*ty),
// cmp %r11, %r10 RegMemImm::reg(temp.to_reg()),
let i3 = Inst::cmp_rmi_r(OperandSize::from_ty(*ty), RegMemImm::reg(r11), r10); operand,
);
i3.emit(&[], sink, info, state); i3.emit(&[], sink, info, state);
// cmovcc %r10, %r11 // cmovcc %r_operand, %r_temp
let cc = match op { let cc = match op {
inst_common::AtomicRmwOp::Umin => CC::BE, RmwOp::Umin => CC::BE,
inst_common::AtomicRmwOp::Umax => CC::NB, RmwOp::Umax => CC::NB,
inst_common::AtomicRmwOp::Smin => CC::LE, RmwOp::Smin => CC::LE,
inst_common::AtomicRmwOp::Smax => CC::NL, RmwOp::Smax => CC::NL,
_ => unreachable!(), _ => unreachable!(),
}; };
let i4 = Inst::cmove(OperandSize::Size64, cc, RegMem::reg(r10), r11_w); let i4 = Inst::cmove(OperandSize::Size64, cc, RegMem::reg(operand), temp);
i4.emit(&[], sink, info, state); i4.emit(&[], sink, info, state);
} }
_ => { _ => {
// opq %r10, %r11 // opq %r_operand, %r_temp
let alu_op = match op { let alu_op = match op {
inst_common::AtomicRmwOp::Add => AluRmiROpcode::Add, RmwOp::Add => AluRmiROpcode::Add,
inst_common::AtomicRmwOp::Sub => AluRmiROpcode::Sub, RmwOp::Sub => AluRmiROpcode::Sub,
inst_common::AtomicRmwOp::And => AluRmiROpcode::And, RmwOp::And => AluRmiROpcode::And,
inst_common::AtomicRmwOp::Or => AluRmiROpcode::Or, RmwOp::Or => AluRmiROpcode::Or,
inst_common::AtomicRmwOp::Xor => AluRmiROpcode::Xor, RmwOp::Xor => AluRmiROpcode::Xor,
inst_common::AtomicRmwOp::Xchg RmwOp::Xchg
| inst_common::AtomicRmwOp::Nand | RmwOp::Nand
| inst_common::AtomicRmwOp::Umin | RmwOp::Umin
| inst_common::AtomicRmwOp::Umax | RmwOp::Umax
| inst_common::AtomicRmwOp::Smin | RmwOp::Smin
| inst_common::AtomicRmwOp::Smax => unreachable!(), | RmwOp::Smax => unreachable!(),
}; };
let i3 = Inst::alu_rmi_r(OperandSize::Size64, alu_op, r10_rmi, r11_w); let i3 = Inst::alu_rmi_r(OperandSize::Size64, alu_op, operand_rmi, temp);
i3.emit(&[], sink, info, state); i3.emit(&[], sink, info, state);
} }
} }
// lock cmpxchg{b,w,l,q} %r11, (%r9) // lock cmpxchg{b,w,l,q} %r_temp, (%r_address)
// No need to call `add_trap` here, since the `i4` emit will do that. // No need to call `add_trap` here, since the `i4` emit will do that.
let i4 = Inst::LockCmpxchg { let i4 = Inst::LockCmpxchg {
ty: *ty, ty: *ty,
replacement: r11, replacement: temp.to_reg(),
expected: regs::rax(), expected: dst_old.to_reg(),
mem: amode.into(), mem: mem.into(),
dst_old: Writable::from_reg(regs::rax()), dst_old,
}; };
i4.emit(&[], sink, info, state); i4.emit(&[], sink, info, state);

View File

@@ -4611,6 +4611,8 @@ fn test_x64_emit() {
3, 3,
) )
.into(); .into();
// Use `r9` with a 0 offset.
let am3: SyntheticAmode = Amode::imm_reg(0, r9).into();
// A general 8-bit case. // A general 8-bit case.
insns.push(( insns.push((
@@ -4743,8 +4745,8 @@ fn test_x64_emit() {
insns.push(( insns.push((
Inst::AtomicRmwSeq { Inst::AtomicRmwSeq {
ty: types::I8, ty: types::I8,
op: inst_common::AtomicRmwOp::Or, op: inst_common::MachAtomicRmwOp::Or,
address: r9, mem: am3.clone(),
operand: r10, operand: r10,
temp: w_r11, temp: w_r11,
dst_old: w_rax dst_old: w_rax
@@ -4755,8 +4757,8 @@ fn test_x64_emit() {
insns.push(( insns.push((
Inst::AtomicRmwSeq { Inst::AtomicRmwSeq {
ty: types::I16, ty: types::I16,
op: inst_common::AtomicRmwOp::And, op: inst_common::MachAtomicRmwOp::And,
address: r9, mem: am3.clone(),
operand: r10, operand: r10,
temp: w_r11, temp: w_r11,
dst_old: w_rax dst_old: w_rax
@@ -4767,8 +4769,8 @@ fn test_x64_emit() {
insns.push(( insns.push((
Inst::AtomicRmwSeq { Inst::AtomicRmwSeq {
ty: types::I32, ty: types::I32,
op: inst_common::AtomicRmwOp::Xchg, op: inst_common::MachAtomicRmwOp::Xchg,
address: r9, mem: am3.clone(),
operand: r10, operand: r10,
temp: w_r11, temp: w_r11,
dst_old: w_rax dst_old: w_rax
@@ -4779,8 +4781,8 @@ fn test_x64_emit() {
insns.push(( insns.push((
Inst::AtomicRmwSeq { Inst::AtomicRmwSeq {
ty: types::I32, ty: types::I32,
op: inst_common::AtomicRmwOp::Umin, op: inst_common::MachAtomicRmwOp::Umin,
address: r9, mem: am3.clone(),
operand: r10, operand: r10,
temp: w_r11, temp: w_r11,
dst_old: w_rax dst_old: w_rax
@@ -4791,8 +4793,8 @@ fn test_x64_emit() {
insns.push(( insns.push((
Inst::AtomicRmwSeq { Inst::AtomicRmwSeq {
ty: types::I64, ty: types::I64,
op: inst_common::AtomicRmwOp::Add, op: inst_common::MachAtomicRmwOp::Add,
address: r9, mem: am3.clone(),
operand: r10, operand: r10,
temp: w_r11, temp: w_r11,
dst_old: w_rax dst_old: w_rax

View File

@@ -2052,13 +2052,19 @@ fn x64_get_operands<F: Fn(VReg) -> VReg>(inst: &Inst, collector: &mut OperandCol
mem.get_operands(collector); mem.get_operands(collector);
} }
Inst::AtomicRmwSeq { .. } => { Inst::AtomicRmwSeq {
// FIXME: take vreg args, not fixed regs, and just use operand,
// reg_fixed_use here. temp,
collector.reg_use(regs::r9()); dst_old,
collector.reg_use(regs::r10()); mem,
collector.reg_def(Writable::from_reg(regs::r11())); ..
collector.reg_def(Writable::from_reg(regs::rax())); } => {
collector.reg_late_use(*operand);
collector.reg_early_def(*temp);
// This `fixed_def` is needed because `CMPXCHG` always uses this
// register implicitly.
collector.reg_fixed_def(*dst_old, regs::rax());
mem.get_operands_late(collector)
} }
Inst::Ret { rets } => { Inst::Ret { rets } => {

View File

@@ -2851,3 +2851,19 @@
(rule (lower (has_type (and (fits_in_64 ty) (ty_int _)) (rule (lower (has_type (and (fits_in_64 ty) (ty_int _))
(atomic_cas flags address expected replacement))) (atomic_cas flags address expected replacement)))
(x64_cmpxchg ty expected replacement (to_amode flags address (zero_offset)))) (x64_cmpxchg ty expected replacement (to_amode flags address (zero_offset))))
;; Rules for `atomic_rmw` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; This is a simple, general-case atomic update, based on a loop involving
;; `cmpxchg`. Note that we could do much better than this in the case where the
;; old value at the location (that is to say, the SSA `Value` computed by this
;; CLIF instruction) is not required. In that case, we could instead implement
;; this using a single `lock`-prefixed x64 read-modify-write instruction. Also,
;; even in the case where the old value is required, for the `add` and `sub`
;; cases, we can use the single instruction `lock xadd`. However, those
;; improvements have been left for another day. TODO: filed as
;; https://github.com/bytecodealliance/wasmtime/issues/2153.
(rule (lower (has_type (and (fits_in_64 ty) (ty_int _))
(atomic_rmw flags op address input)))
(x64_atomic_rmw_seq ty op (to_amode flags address (zero_offset)) input))

View File

@@ -44,14 +44,6 @@ fn is_bool_ty(ty: Type) -> bool {
} }
} }
/// This is target-word-size dependent. And it excludes booleans and reftypes.
fn is_valid_atomic_transaction_ty(ty: Type) -> bool {
match ty {
types::I8 | types::I16 | types::I32 | types::I64 => true,
_ => false,
}
}
/// Returns whether the given specified `input` is a result produced by an instruction with Opcode /// Returns whether the given specified `input` is a result produced by an instruction with Opcode
/// `op`. /// `op`.
// TODO investigate failures with checking against the result index. // TODO investigate failures with checking against the result index.
@@ -2136,54 +2128,7 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
} }
Opcode::AtomicRmw => { Opcode::AtomicRmw => {
// This is a simple, general-case atomic update, based on a loop involving implemented_in_isle(ctx);
// `cmpxchg`. Note that we could do much better than this in the case where the old
// value at the location (that is to say, the SSA `Value` computed by this CLIF
// instruction) is not required. In that case, we could instead implement this
// using a single `lock`-prefixed x64 read-modify-write instruction. Also, even in
// the case where the old value is required, for the `add` and `sub` cases, we can
// use the single instruction `lock xadd`. However, those improvements have been
// left for another day.
// TODO: filed as https://github.com/bytecodealliance/wasmtime/issues/2153
let dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
let mut addr = put_input_in_reg(ctx, inputs[0]);
let mut arg2 = put_input_in_reg(ctx, inputs[1]);
let ty_access = ty.unwrap();
assert!(is_valid_atomic_transaction_ty(ty_access));
// Make sure that both args are in virtual regs, since in effect we have to do a
// parallel copy to get them safely to the AtomicRmwSeq input regs, and that's not
// guaranteed safe if either is in a real reg.
addr = ctx.ensure_in_vreg(addr, types::I64);
arg2 = ctx.ensure_in_vreg(arg2, types::I64);
// Move the args to the preordained AtomicRMW input regs. Note that `AtomicRmwSeq`
// operates at whatever width is specified by `ty`, so there's no need to
// zero-extend `arg2` in the case of `ty` being I8/I16/I32.
ctx.emit(Inst::gen_move(
Writable::from_reg(regs::r9()),
addr,
types::I64,
));
ctx.emit(Inst::gen_move(
Writable::from_reg(regs::r10()),
arg2,
types::I64,
));
// Now the AtomicRmwSeq (pseudo-) instruction itself
let op = inst_common::AtomicRmwOp::from(ctx.data(insn).atomic_rmw_op().unwrap());
ctx.emit(Inst::AtomicRmwSeq {
ty: ty_access,
op,
address: regs::r9(),
operand: regs::r10(),
temp: Writable::from_reg(regs::r11()),
dst_old: Writable::from_reg(regs::rax()),
});
// And finally, copy the preordained AtomicRmwSeq output reg to its destination.
ctx.emit(Inst::gen_move(dst, regs::rax(), types::I64));
} }
Opcode::AtomicCas => { Opcode::AtomicCas => {

View File

@@ -2,7 +2,10 @@
// Pull in the ISLE generated code. // Pull in the ISLE generated code.
pub(crate) mod generated_code; pub(crate) mod generated_code;
use crate::machinst::{InputSourceInst, Reg, Writable}; use crate::{
ir::AtomicRmwOp,
machinst::{InputSourceInst, Reg, Writable},
};
use generated_code::MInst; use generated_code::MInst;
// Types that the generated ISLE code uses via `use super::*`. // Types that the generated ISLE code uses via `use super::*`.
@@ -23,7 +26,7 @@ use crate::{
}, },
}, },
machinst::{ machinst::{
isle::*, AtomicRmwOp, InsnInput, InsnOutput, LowerCtx, VCodeConstant, VCodeConstantData, isle::*, InsnInput, InsnOutput, LowerCtx, MachAtomicRmwOp, VCodeConstant, VCodeConstantData,
}, },
}; };
use std::boxed::Box; use std::boxed::Box;
@@ -565,6 +568,11 @@ where
fn zero_offset(&mut self) -> Offset32 { fn zero_offset(&mut self) -> Offset32 {
Offset32::new(0) Offset32::new(0)
} }
#[inline]
fn atomic_rmw_op_to_mach_atomic_rmw_op(&mut self, op: &AtomicRmwOp) -> MachAtomicRmwOp {
MachAtomicRmwOp::from(*op)
}
} }
// Since x64 doesn't have 8x16 shifts and we must use a 16x8 shift instead, we // Since x64 doesn't have 8x16 shifts and we must use a 16x8 shift instead, we

View File

@@ -45,11 +45,10 @@ pub(crate) fn insn_outputs<I: VCodeInst, C: LowerCtx<I = I>>(
//============================================================================ //============================================================================
// Atomic instructions. // Atomic instructions.
/// Atomic memory update operations. As of 21 Aug 2020 these are used for the aarch64 and x64 /// Atomic memory update operations.
/// targets.
#[derive(Clone, Copy, Debug, PartialEq, Eq)] #[derive(Clone, Copy, Debug, PartialEq, Eq)]
#[repr(u8)] #[repr(u8)]
pub enum AtomicRmwOp { pub enum MachAtomicRmwOp {
/// Add /// Add
Add, Add,
/// Sub /// Sub
@@ -74,21 +73,22 @@ pub enum AtomicRmwOp {
Smax, Smax,
} }
impl AtomicRmwOp { impl MachAtomicRmwOp {
/// Converts an `ir::AtomicRmwOp` to the corresponding `inst_common::AtomicRmwOp`. /// Converts an `ir::AtomicRmwOp` to the corresponding
/// `inst_common::AtomicRmwOp`.
pub fn from(ir_op: ir::AtomicRmwOp) -> Self { pub fn from(ir_op: ir::AtomicRmwOp) -> Self {
match ir_op { match ir_op {
ir::AtomicRmwOp::Add => AtomicRmwOp::Add, ir::AtomicRmwOp::Add => MachAtomicRmwOp::Add,
ir::AtomicRmwOp::Sub => AtomicRmwOp::Sub, ir::AtomicRmwOp::Sub => MachAtomicRmwOp::Sub,
ir::AtomicRmwOp::And => AtomicRmwOp::And, ir::AtomicRmwOp::And => MachAtomicRmwOp::And,
ir::AtomicRmwOp::Nand => AtomicRmwOp::Nand, ir::AtomicRmwOp::Nand => MachAtomicRmwOp::Nand,
ir::AtomicRmwOp::Or => AtomicRmwOp::Or, ir::AtomicRmwOp::Or => MachAtomicRmwOp::Or,
ir::AtomicRmwOp::Xor => AtomicRmwOp::Xor, ir::AtomicRmwOp::Xor => MachAtomicRmwOp::Xor,
ir::AtomicRmwOp::Xchg => AtomicRmwOp::Xchg, ir::AtomicRmwOp::Xchg => MachAtomicRmwOp::Xchg,
ir::AtomicRmwOp::Umin => AtomicRmwOp::Umin, ir::AtomicRmwOp::Umin => MachAtomicRmwOp::Umin,
ir::AtomicRmwOp::Umax => AtomicRmwOp::Umax, ir::AtomicRmwOp::Umax => MachAtomicRmwOp::Umax,
ir::AtomicRmwOp::Smin => AtomicRmwOp::Smin, ir::AtomicRmwOp::Smin => MachAtomicRmwOp::Smin,
ir::AtomicRmwOp::Smax => AtomicRmwOp::Smax, ir::AtomicRmwOp::Smax => MachAtomicRmwOp::Smax,
} }
} }
} }

View File

@@ -328,6 +328,11 @@ impl<'a, F: Fn(VReg) -> VReg> OperandCollector<'a, F> {
self.add_operand(Operand::reg_use(reg.into())); self.add_operand(Operand::reg_use(reg.into()));
} }
/// Add a register use, at the end of the instruction (`After` position).
pub fn reg_late_use(&mut self, reg: Reg) {
self.add_operand(Operand::reg_use_at_end(reg.into()));
}
/// Add multiple register uses. /// Add multiple register uses.
pub fn reg_uses(&mut self, regs: &[Reg]) { pub fn reg_uses(&mut self, regs: &[Reg]) {
for &reg in regs { for &reg in regs {