diff --git a/cranelift/codegen/src/isa/x64/inst.isle b/cranelift/codegen/src/isa/x64/inst.isle index aaede3ab56..0df0f53c5b 100644 --- a/cranelift/codegen/src/isa/x64/inst.isle +++ b/cranelift/codegen/src/isa/x64/inst.isle @@ -415,10 +415,10 @@ ;; The sequence consists of an initial "normal" load from `dst`, followed ;; by a loop which computes the new value and tries to compare-and-swap ;; ("CAS") it into `dst`, using the native instruction `lock - ;; cmpxchg{b,w,l,q}` . The loop iterates until the CAS is successful. - ;; If there is no contention, there will be only one pass through the - ;; loop body. The sequence does *not* perform any explicit memory fence - ;; instructions (mfence/sfence/lfence). + ;; cmpxchg{b,w,l,q}`. The loop iterates until the CAS is successful. If + ;; there is no contention, there will be only one pass through the loop + ;; body. The sequence does *not* perform any explicit memory fence + ;; instructions (`mfence`/`sfence`/`lfence`). ;; ;; Note that the transaction is atomic in the sense that, as observed by ;; some other thread, `dst` either has the initial or final value, but no @@ -430,15 +430,12 @@ ;; problem. ;; ;; This instruction sequence has fixed register uses as follows: - ;; - ;; %r9 (read) address - ;; %r10 (read) second operand for `op` - ;; %r11 (written) scratch reg; value afterwards has no meaning - ;; %rax (written) the old value at %r9 - ;; %rflags is written. Do not assume anything about it after the instruction. + ;; - %rax (written) the old value at `mem` + ;; - %rflags is written. Do not assume anything about it after the + ;; instruction. (AtomicRmwSeq (ty Type) ;; I8, I16, I32, or I64 - (op AtomicRmwOp) - (address Reg) + (op MachAtomicRmwOp) + (mem SyntheticAmode) (operand Reg) (temp WritableReg) (dst_old WritableReg)) @@ -2921,6 +2918,19 @@ (_ Unit (emit (MInst.LockCmpxchg ty replacement expected addr dst)))) dst)) +(decl x64_atomic_rmw_seq (Type MachAtomicRmwOp SyntheticAmode Gpr) Gpr) +(rule (x64_atomic_rmw_seq ty op mem input) + (let ((dst WritableGpr (temp_writable_gpr)) + (tmp WritableGpr (temp_writable_gpr)) + (_ Unit (emit (MInst.AtomicRmwSeq ty op mem input tmp dst)))) + dst)) + +;; CLIF IR has one enumeration for atomic operations (`AtomicRmwOp`) while the +;; mach backend has another (`MachAtomicRmwOp`)--this converts one to the other. +(type MachAtomicRmwOp extern (enum)) +(decl atomic_rmw_op_to_mach_atomic_rmw_op (AtomicRmwOp) MachAtomicRmwOp) +(extern constructor atomic_rmw_op_to_mach_atomic_rmw_op atomic_rmw_op_to_mach_atomic_rmw_op) + ;;;; Automatic conversions ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (convert Gpr InstOutput output_gpr) @@ -2973,6 +2983,7 @@ (convert SyntheticAmode XmmMem synthetic_amode_to_xmm_mem) (convert IntCC CC intcc_to_cc) +(convert AtomicRmwOp MachAtomicRmwOp atomic_rmw_op_to_mach_atomic_rmw_op) (decl reg_to_xmm_mem (Reg) XmmMem) (rule (reg_to_xmm_mem r) diff --git a/cranelift/codegen/src/isa/x64/inst/args.rs b/cranelift/codegen/src/isa/x64/inst/args.rs index 08f2331afd..6d5e29b999 100644 --- a/cranelift/codegen/src/isa/x64/inst/args.rs +++ b/cranelift/codegen/src/isa/x64/inst/args.rs @@ -306,7 +306,7 @@ impl Amode { } } - /// Add the regs mentioned by `self` to `collector`. + /// Add the registers mentioned by `self` to `collector`. pub(crate) fn get_operands VReg>( &self, collector: &mut OperandCollector<'_, F>, @@ -325,6 +325,25 @@ impl Amode { } } + /// Same as `get_operands`, but add the registers in the "late" phase. + pub(crate) fn get_operands_late VReg>( + &self, + collector: &mut OperandCollector<'_, F>, + ) { + match self { + Amode::ImmReg { base, .. } => { + collector.reg_late_use(*base); + } + Amode::ImmRegRegShift { base, index, .. } => { + collector.reg_late_use(base.to_reg()); + collector.reg_late_use(index.to_reg()); + } + Amode::RipRelative { .. } => { + // RIP isn't involved in regalloc. + } + } + } + pub(crate) fn get_flags(&self) -> MemFlags { match self { Amode::ImmReg { flags, .. } => *flags, @@ -426,7 +445,7 @@ impl SyntheticAmode { SyntheticAmode::NominalSPOffset { simm32 } } - /// Add the regs mentioned by `self` to `collector`. + /// Add the registers mentioned by `self` to `collector`. pub(crate) fn get_operands VReg>( &self, collector: &mut OperandCollector<'_, F>, @@ -440,6 +459,20 @@ impl SyntheticAmode { } } + /// Same as `get_operands`, but add the register in the "late" phase. + pub(crate) fn get_operands_late VReg>( + &self, + collector: &mut OperandCollector<'_, F>, + ) { + match self { + SyntheticAmode::Real(addr) => addr.get_operands_late(collector), + SyntheticAmode::NominalSPOffset { .. } => { + // Nothing to do; the base is SP and isn't involved in regalloc. + } + SyntheticAmode::ConstantOffset(_) => {} + } + } + pub(crate) fn finalize(&self, state: &mut EmitState, buffer: &MachBuffer) -> Amode { match self { SyntheticAmode::Real(addr) => addr.clone(), diff --git a/cranelift/codegen/src/isa/x64/inst/emit.rs b/cranelift/codegen/src/isa/x64/inst/emit.rs index 3002f8dd67..9e9a66f881 100644 --- a/cranelift/codegen/src/isa/x64/inst/emit.rs +++ b/cranelift/codegen/src/isa/x64/inst/emit.rs @@ -2613,118 +2613,116 @@ pub(crate) fn emit( Inst::AtomicRmwSeq { ty, op, - address, + mem, operand, temp, dst_old, } => { - // FIXME: use real vregs for this seq. - debug_assert_eq!(*address, regs::r9()); - debug_assert_eq!(*operand, regs::r10()); - debug_assert_eq!(temp.to_reg(), regs::r11()); + let operand = allocs.next(*operand); + let temp = allocs.next_writable(*temp); + let dst_old = allocs.next_writable(*dst_old); debug_assert_eq!(dst_old.to_reg(), regs::rax()); + let mem = mem.finalize(state, sink).with_allocs(allocs); // Emit this: - // - // mov{zbq,zwq,zlq,q} (%r9), %rax // rax = old value - // again: - // movq %rax, %r11 // rax = old value, r11 = old value - // `op`q %r10, %r11 // rax = old value, r11 = new value - // lock cmpxchg{b,w,l,q} %r11, (%r9) // try to store new value + // mov{zbq,zwq,zlq,q} (%r_address), %rax // rax = old value + // again: + // movq %rax, %r_temp // rax = old value, r_temp = old value + // `op`q %r_operand, %r_temp // rax = old value, r_temp = new value + // lock cmpxchg{b,w,l,q} %r_temp, (%r_address) // try to store new value // jnz again // If this is taken, rax will have a "revised" old value // - // Operand conventions: - // IN: %r9 (addr), %r10 (2nd arg for `op`) - // OUT: %rax (old value), %r11 (trashed), %rflags (trashed) + // Operand conventions: IN: %r_address, %r_operand OUT: %rax (old + // value), %r_temp (trashed), %rflags (trashed) // - // In the case where the operation is 'xchg', the "`op`q" instruction is instead - // movq %r10, %r11 - // so that we simply write in the destination, the "2nd arg for `op`". - let rax = regs::rax(); - let r9 = regs::r9(); - let r10 = regs::r10(); - let r11 = regs::r11(); - let rax_w = Writable::from_reg(rax); - let r11_w = Writable::from_reg(r11); - let amode = Amode::imm_reg(0, r9); + // In the case where the operation is 'xchg', the "`op`q" + // instruction is instead: movq %r_operand, + // %r_temp so that we simply write in the destination, the "2nd + // arg for `op`". + // + // TODO: this sequence can be significantly improved (e.g., to `lock + // `) when it is known that `dst_old` is not used later, see + // https://github.com/bytecodealliance/wasmtime/issues/2153. let again_label = sink.get_label(); - // mov{zbq,zwq,zlq,q} (%r9), %rax + // mov{zbq,zwq,zlq,q} (%r_address), %rax // No need to call `add_trap` here, since the `i1` emit will do that. - let i1 = Inst::load(*ty, amode.clone(), rax_w, ExtKind::ZeroExtend); + let i1 = Inst::load(*ty, mem.clone(), dst_old, ExtKind::ZeroExtend); i1.emit(&[], sink, info, state); // again: sink.bind_label(again_label); - // movq %rax, %r11 - let i2 = Inst::mov_r_r(OperandSize::Size64, rax, r11_w); + // movq %rax, %r_temp + let i2 = Inst::mov_r_r(OperandSize::Size64, dst_old.to_reg(), temp); i2.emit(&[], sink, info, state); - let r10_rmi = RegMemImm::reg(r10); + let operand_rmi = RegMemImm::reg(operand); + use inst_common::MachAtomicRmwOp as RmwOp; match op { - inst_common::AtomicRmwOp::Xchg => { - // movq %r10, %r11 - let i3 = Inst::mov_r_r(OperandSize::Size64, r10, r11_w); + RmwOp::Xchg => { + // movq %r_operand, %r_temp + let i3 = Inst::mov_r_r(OperandSize::Size64, operand, temp); i3.emit(&[], sink, info, state); } - inst_common::AtomicRmwOp::Nand => { - // andq %r10, %r11 + RmwOp::Nand => { + // andq %r_operand, %r_temp let i3 = - Inst::alu_rmi_r(OperandSize::Size64, AluRmiROpcode::And, r10_rmi, r11_w); + Inst::alu_rmi_r(OperandSize::Size64, AluRmiROpcode::And, operand_rmi, temp); i3.emit(&[], sink, info, state); - // notq %r11 - let i4 = Inst::not(OperandSize::Size64, r11_w); + // notq %r_temp + let i4 = Inst::not(OperandSize::Size64, temp); i4.emit(&[], sink, info, state); } - inst_common::AtomicRmwOp::Umin - | inst_common::AtomicRmwOp::Umax - | inst_common::AtomicRmwOp::Smin - | inst_common::AtomicRmwOp::Smax => { - // cmp %r11, %r10 - let i3 = Inst::cmp_rmi_r(OperandSize::from_ty(*ty), RegMemImm::reg(r11), r10); + RmwOp::Umin | RmwOp::Umax | RmwOp::Smin | RmwOp::Smax => { + // cmp %r_temp, %r_operand + let i3 = Inst::cmp_rmi_r( + OperandSize::from_ty(*ty), + RegMemImm::reg(temp.to_reg()), + operand, + ); i3.emit(&[], sink, info, state); - // cmovcc %r10, %r11 + // cmovcc %r_operand, %r_temp let cc = match op { - inst_common::AtomicRmwOp::Umin => CC::BE, - inst_common::AtomicRmwOp::Umax => CC::NB, - inst_common::AtomicRmwOp::Smin => CC::LE, - inst_common::AtomicRmwOp::Smax => CC::NL, + RmwOp::Umin => CC::BE, + RmwOp::Umax => CC::NB, + RmwOp::Smin => CC::LE, + RmwOp::Smax => CC::NL, _ => unreachable!(), }; - let i4 = Inst::cmove(OperandSize::Size64, cc, RegMem::reg(r10), r11_w); + let i4 = Inst::cmove(OperandSize::Size64, cc, RegMem::reg(operand), temp); i4.emit(&[], sink, info, state); } _ => { - // opq %r10, %r11 + // opq %r_operand, %r_temp let alu_op = match op { - inst_common::AtomicRmwOp::Add => AluRmiROpcode::Add, - inst_common::AtomicRmwOp::Sub => AluRmiROpcode::Sub, - inst_common::AtomicRmwOp::And => AluRmiROpcode::And, - inst_common::AtomicRmwOp::Or => AluRmiROpcode::Or, - inst_common::AtomicRmwOp::Xor => AluRmiROpcode::Xor, - inst_common::AtomicRmwOp::Xchg - | inst_common::AtomicRmwOp::Nand - | inst_common::AtomicRmwOp::Umin - | inst_common::AtomicRmwOp::Umax - | inst_common::AtomicRmwOp::Smin - | inst_common::AtomicRmwOp::Smax => unreachable!(), + RmwOp::Add => AluRmiROpcode::Add, + RmwOp::Sub => AluRmiROpcode::Sub, + RmwOp::And => AluRmiROpcode::And, + RmwOp::Or => AluRmiROpcode::Or, + RmwOp::Xor => AluRmiROpcode::Xor, + RmwOp::Xchg + | RmwOp::Nand + | RmwOp::Umin + | RmwOp::Umax + | RmwOp::Smin + | RmwOp::Smax => unreachable!(), }; - let i3 = Inst::alu_rmi_r(OperandSize::Size64, alu_op, r10_rmi, r11_w); + let i3 = Inst::alu_rmi_r(OperandSize::Size64, alu_op, operand_rmi, temp); i3.emit(&[], sink, info, state); } } - // lock cmpxchg{b,w,l,q} %r11, (%r9) + // lock cmpxchg{b,w,l,q} %r_temp, (%r_address) // No need to call `add_trap` here, since the `i4` emit will do that. let i4 = Inst::LockCmpxchg { ty: *ty, - replacement: r11, - expected: regs::rax(), - mem: amode.into(), - dst_old: Writable::from_reg(regs::rax()), + replacement: temp.to_reg(), + expected: dst_old.to_reg(), + mem: mem.into(), + dst_old, }; i4.emit(&[], sink, info, state); diff --git a/cranelift/codegen/src/isa/x64/inst/emit_tests.rs b/cranelift/codegen/src/isa/x64/inst/emit_tests.rs index 06166a55bd..4bcf936f14 100644 --- a/cranelift/codegen/src/isa/x64/inst/emit_tests.rs +++ b/cranelift/codegen/src/isa/x64/inst/emit_tests.rs @@ -4611,6 +4611,8 @@ fn test_x64_emit() { 3, ) .into(); + // Use `r9` with a 0 offset. + let am3: SyntheticAmode = Amode::imm_reg(0, r9).into(); // A general 8-bit case. insns.push(( @@ -4743,8 +4745,8 @@ fn test_x64_emit() { insns.push(( Inst::AtomicRmwSeq { ty: types::I8, - op: inst_common::AtomicRmwOp::Or, - address: r9, + op: inst_common::MachAtomicRmwOp::Or, + mem: am3.clone(), operand: r10, temp: w_r11, dst_old: w_rax @@ -4755,8 +4757,8 @@ fn test_x64_emit() { insns.push(( Inst::AtomicRmwSeq { ty: types::I16, - op: inst_common::AtomicRmwOp::And, - address: r9, + op: inst_common::MachAtomicRmwOp::And, + mem: am3.clone(), operand: r10, temp: w_r11, dst_old: w_rax @@ -4767,8 +4769,8 @@ fn test_x64_emit() { insns.push(( Inst::AtomicRmwSeq { ty: types::I32, - op: inst_common::AtomicRmwOp::Xchg, - address: r9, + op: inst_common::MachAtomicRmwOp::Xchg, + mem: am3.clone(), operand: r10, temp: w_r11, dst_old: w_rax @@ -4779,8 +4781,8 @@ fn test_x64_emit() { insns.push(( Inst::AtomicRmwSeq { ty: types::I32, - op: inst_common::AtomicRmwOp::Umin, - address: r9, + op: inst_common::MachAtomicRmwOp::Umin, + mem: am3.clone(), operand: r10, temp: w_r11, dst_old: w_rax @@ -4791,8 +4793,8 @@ fn test_x64_emit() { insns.push(( Inst::AtomicRmwSeq { ty: types::I64, - op: inst_common::AtomicRmwOp::Add, - address: r9, + op: inst_common::MachAtomicRmwOp::Add, + mem: am3.clone(), operand: r10, temp: w_r11, dst_old: w_rax diff --git a/cranelift/codegen/src/isa/x64/inst/mod.rs b/cranelift/codegen/src/isa/x64/inst/mod.rs index f6250859a2..a9d0a79146 100644 --- a/cranelift/codegen/src/isa/x64/inst/mod.rs +++ b/cranelift/codegen/src/isa/x64/inst/mod.rs @@ -2052,13 +2052,19 @@ fn x64_get_operands VReg>(inst: &Inst, collector: &mut OperandCol mem.get_operands(collector); } - Inst::AtomicRmwSeq { .. } => { - // FIXME: take vreg args, not fixed regs, and just use - // reg_fixed_use here. - collector.reg_use(regs::r9()); - collector.reg_use(regs::r10()); - collector.reg_def(Writable::from_reg(regs::r11())); - collector.reg_def(Writable::from_reg(regs::rax())); + Inst::AtomicRmwSeq { + operand, + temp, + dst_old, + mem, + .. + } => { + collector.reg_late_use(*operand); + collector.reg_early_def(*temp); + // This `fixed_def` is needed because `CMPXCHG` always uses this + // register implicitly. + collector.reg_fixed_def(*dst_old, regs::rax()); + mem.get_operands_late(collector) } Inst::Ret { rets } => { diff --git a/cranelift/codegen/src/isa/x64/lower.isle b/cranelift/codegen/src/isa/x64/lower.isle index ed1cfe5579..848794f85e 100644 --- a/cranelift/codegen/src/isa/x64/lower.isle +++ b/cranelift/codegen/src/isa/x64/lower.isle @@ -2851,3 +2851,19 @@ (rule (lower (has_type (and (fits_in_64 ty) (ty_int _)) (atomic_cas flags address expected replacement))) (x64_cmpxchg ty expected replacement (to_amode flags address (zero_offset)))) + +;; Rules for `atomic_rmw` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; This is a simple, general-case atomic update, based on a loop involving +;; `cmpxchg`. Note that we could do much better than this in the case where the +;; old value at the location (that is to say, the SSA `Value` computed by this +;; CLIF instruction) is not required. In that case, we could instead implement +;; this using a single `lock`-prefixed x64 read-modify-write instruction. Also, +;; even in the case where the old value is required, for the `add` and `sub` +;; cases, we can use the single instruction `lock xadd`. However, those +;; improvements have been left for another day. TODO: filed as +;; https://github.com/bytecodealliance/wasmtime/issues/2153. + +(rule (lower (has_type (and (fits_in_64 ty) (ty_int _)) + (atomic_rmw flags op address input))) + (x64_atomic_rmw_seq ty op (to_amode flags address (zero_offset)) input)) diff --git a/cranelift/codegen/src/isa/x64/lower.rs b/cranelift/codegen/src/isa/x64/lower.rs index 98b56bb6ca..2c4641296d 100644 --- a/cranelift/codegen/src/isa/x64/lower.rs +++ b/cranelift/codegen/src/isa/x64/lower.rs @@ -44,14 +44,6 @@ fn is_bool_ty(ty: Type) -> bool { } } -/// This is target-word-size dependent. And it excludes booleans and reftypes. -fn is_valid_atomic_transaction_ty(ty: Type) -> bool { - match ty { - types::I8 | types::I16 | types::I32 | types::I64 => true, - _ => false, - } -} - /// Returns whether the given specified `input` is a result produced by an instruction with Opcode /// `op`. // TODO investigate failures with checking against the result index. @@ -2136,54 +2128,7 @@ fn lower_insn_to_regs>( } Opcode::AtomicRmw => { - // This is a simple, general-case atomic update, based on a loop involving - // `cmpxchg`. Note that we could do much better than this in the case where the old - // value at the location (that is to say, the SSA `Value` computed by this CLIF - // instruction) is not required. In that case, we could instead implement this - // using a single `lock`-prefixed x64 read-modify-write instruction. Also, even in - // the case where the old value is required, for the `add` and `sub` cases, we can - // use the single instruction `lock xadd`. However, those improvements have been - // left for another day. - // TODO: filed as https://github.com/bytecodealliance/wasmtime/issues/2153 - let dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); - let mut addr = put_input_in_reg(ctx, inputs[0]); - let mut arg2 = put_input_in_reg(ctx, inputs[1]); - let ty_access = ty.unwrap(); - assert!(is_valid_atomic_transaction_ty(ty_access)); - - // Make sure that both args are in virtual regs, since in effect we have to do a - // parallel copy to get them safely to the AtomicRmwSeq input regs, and that's not - // guaranteed safe if either is in a real reg. - addr = ctx.ensure_in_vreg(addr, types::I64); - arg2 = ctx.ensure_in_vreg(arg2, types::I64); - - // Move the args to the preordained AtomicRMW input regs. Note that `AtomicRmwSeq` - // operates at whatever width is specified by `ty`, so there's no need to - // zero-extend `arg2` in the case of `ty` being I8/I16/I32. - ctx.emit(Inst::gen_move( - Writable::from_reg(regs::r9()), - addr, - types::I64, - )); - ctx.emit(Inst::gen_move( - Writable::from_reg(regs::r10()), - arg2, - types::I64, - )); - - // Now the AtomicRmwSeq (pseudo-) instruction itself - let op = inst_common::AtomicRmwOp::from(ctx.data(insn).atomic_rmw_op().unwrap()); - ctx.emit(Inst::AtomicRmwSeq { - ty: ty_access, - op, - address: regs::r9(), - operand: regs::r10(), - temp: Writable::from_reg(regs::r11()), - dst_old: Writable::from_reg(regs::rax()), - }); - - // And finally, copy the preordained AtomicRmwSeq output reg to its destination. - ctx.emit(Inst::gen_move(dst, regs::rax(), types::I64)); + implemented_in_isle(ctx); } Opcode::AtomicCas => { diff --git a/cranelift/codegen/src/isa/x64/lower/isle.rs b/cranelift/codegen/src/isa/x64/lower/isle.rs index b8a71206a6..9b068b1eba 100644 --- a/cranelift/codegen/src/isa/x64/lower/isle.rs +++ b/cranelift/codegen/src/isa/x64/lower/isle.rs @@ -2,7 +2,10 @@ // Pull in the ISLE generated code. pub(crate) mod generated_code; -use crate::machinst::{InputSourceInst, Reg, Writable}; +use crate::{ + ir::AtomicRmwOp, + machinst::{InputSourceInst, Reg, Writable}, +}; use generated_code::MInst; // Types that the generated ISLE code uses via `use super::*`. @@ -23,7 +26,7 @@ use crate::{ }, }, machinst::{ - isle::*, AtomicRmwOp, InsnInput, InsnOutput, LowerCtx, VCodeConstant, VCodeConstantData, + isle::*, InsnInput, InsnOutput, LowerCtx, MachAtomicRmwOp, VCodeConstant, VCodeConstantData, }, }; use std::boxed::Box; @@ -565,6 +568,11 @@ where fn zero_offset(&mut self) -> Offset32 { Offset32::new(0) } + + #[inline] + fn atomic_rmw_op_to_mach_atomic_rmw_op(&mut self, op: &AtomicRmwOp) -> MachAtomicRmwOp { + MachAtomicRmwOp::from(*op) + } } // Since x64 doesn't have 8x16 shifts and we must use a 16x8 shift instead, we diff --git a/cranelift/codegen/src/machinst/inst_common.rs b/cranelift/codegen/src/machinst/inst_common.rs index a4fb41ec76..740a0346cc 100644 --- a/cranelift/codegen/src/machinst/inst_common.rs +++ b/cranelift/codegen/src/machinst/inst_common.rs @@ -45,11 +45,10 @@ pub(crate) fn insn_outputs>( //============================================================================ // Atomic instructions. -/// Atomic memory update operations. As of 21 Aug 2020 these are used for the aarch64 and x64 -/// targets. +/// Atomic memory update operations. #[derive(Clone, Copy, Debug, PartialEq, Eq)] #[repr(u8)] -pub enum AtomicRmwOp { +pub enum MachAtomicRmwOp { /// Add Add, /// Sub @@ -74,21 +73,22 @@ pub enum AtomicRmwOp { Smax, } -impl AtomicRmwOp { - /// Converts an `ir::AtomicRmwOp` to the corresponding `inst_common::AtomicRmwOp`. +impl MachAtomicRmwOp { + /// Converts an `ir::AtomicRmwOp` to the corresponding + /// `inst_common::AtomicRmwOp`. pub fn from(ir_op: ir::AtomicRmwOp) -> Self { match ir_op { - ir::AtomicRmwOp::Add => AtomicRmwOp::Add, - ir::AtomicRmwOp::Sub => AtomicRmwOp::Sub, - ir::AtomicRmwOp::And => AtomicRmwOp::And, - ir::AtomicRmwOp::Nand => AtomicRmwOp::Nand, - ir::AtomicRmwOp::Or => AtomicRmwOp::Or, - ir::AtomicRmwOp::Xor => AtomicRmwOp::Xor, - ir::AtomicRmwOp::Xchg => AtomicRmwOp::Xchg, - ir::AtomicRmwOp::Umin => AtomicRmwOp::Umin, - ir::AtomicRmwOp::Umax => AtomicRmwOp::Umax, - ir::AtomicRmwOp::Smin => AtomicRmwOp::Smin, - ir::AtomicRmwOp::Smax => AtomicRmwOp::Smax, + ir::AtomicRmwOp::Add => MachAtomicRmwOp::Add, + ir::AtomicRmwOp::Sub => MachAtomicRmwOp::Sub, + ir::AtomicRmwOp::And => MachAtomicRmwOp::And, + ir::AtomicRmwOp::Nand => MachAtomicRmwOp::Nand, + ir::AtomicRmwOp::Or => MachAtomicRmwOp::Or, + ir::AtomicRmwOp::Xor => MachAtomicRmwOp::Xor, + ir::AtomicRmwOp::Xchg => MachAtomicRmwOp::Xchg, + ir::AtomicRmwOp::Umin => MachAtomicRmwOp::Umin, + ir::AtomicRmwOp::Umax => MachAtomicRmwOp::Umax, + ir::AtomicRmwOp::Smin => MachAtomicRmwOp::Smin, + ir::AtomicRmwOp::Smax => MachAtomicRmwOp::Smax, } } } diff --git a/cranelift/codegen/src/machinst/reg.rs b/cranelift/codegen/src/machinst/reg.rs index 671edd59df..5c4bd494a3 100644 --- a/cranelift/codegen/src/machinst/reg.rs +++ b/cranelift/codegen/src/machinst/reg.rs @@ -328,6 +328,11 @@ impl<'a, F: Fn(VReg) -> VReg> OperandCollector<'a, F> { self.add_operand(Operand::reg_use(reg.into())); } + /// Add a register use, at the end of the instruction (`After` position). + pub fn reg_late_use(&mut self, reg: Reg) { + self.add_operand(Operand::reg_use_at_end(reg.into())); + } + /// Add multiple register uses. pub fn reg_uses(&mut self, regs: &[Reg]) { for ® in regs {