diff --git a/cranelift/codegen/meta/src/shared/immediates.rs b/cranelift/codegen/meta/src/shared/immediates.rs index 0aa4129daf..9811dcc0ca 100644 --- a/cranelift/codegen/meta/src/shared/immediates.rs +++ b/cranelift/codegen/meta/src/shared/immediates.rs @@ -164,9 +164,14 @@ impl Immediates { atomic_rmw_op_values.insert("add", "Add"); atomic_rmw_op_values.insert("sub", "Sub"); atomic_rmw_op_values.insert("and", "And"); + atomic_rmw_op_values.insert("nand", "Nand"); atomic_rmw_op_values.insert("or", "Or"); atomic_rmw_op_values.insert("xor", "Xor"); atomic_rmw_op_values.insert("xchg", "Xchg"); + atomic_rmw_op_values.insert("umin", "Umin"); + atomic_rmw_op_values.insert("umax", "Umax"); + atomic_rmw_op_values.insert("smin", "Smin"); + atomic_rmw_op_values.insert("smax", "Smax"); new_enum("op", "ir::AtomicRmwOp", atomic_rmw_op_values) .with_doc("Atomic Read-Modify-Write Ops") }, diff --git a/cranelift/codegen/src/ir/atomic_rmw_op.rs b/cranelift/codegen/src/ir/atomic_rmw_op.rs index c93756147a..e9873e55d3 100644 --- a/cranelift/codegen/src/ir/atomic_rmw_op.rs +++ b/cranelift/codegen/src/ir/atomic_rmw_op.rs @@ -14,12 +14,22 @@ pub enum AtomicRmwOp { Sub, /// And And, + /// Nand + Nand, /// Or Or, /// Xor Xor, /// Exchange Xchg, + /// Unsigned min + Umin, + /// Unsigned max + Umax, + /// Signed min + Smin, + /// Signed max + Smax, } impl Display for AtomicRmwOp { @@ -28,9 +38,14 @@ impl Display for AtomicRmwOp { AtomicRmwOp::Add => "add", AtomicRmwOp::Sub => "sub", AtomicRmwOp::And => "and", + AtomicRmwOp::Nand => "nand", AtomicRmwOp::Or => "or", AtomicRmwOp::Xor => "xor", AtomicRmwOp::Xchg => "xchg", + AtomicRmwOp::Umin => "umin", + AtomicRmwOp::Umax => "umax", + AtomicRmwOp::Smin => "smin", + AtomicRmwOp::Smax => "smax", }; f.write_str(s) } @@ -43,9 +58,14 @@ impl FromStr for AtomicRmwOp { "add" => Ok(AtomicRmwOp::Add), "sub" => Ok(AtomicRmwOp::Sub), "and" => Ok(AtomicRmwOp::And), + "nand" => Ok(AtomicRmwOp::Nand), "or" => Ok(AtomicRmwOp::Or), "xor" => Ok(AtomicRmwOp::Xor), "xchg" => Ok(AtomicRmwOp::Xchg), + "umin" => Ok(AtomicRmwOp::Umin), + "umax" => Ok(AtomicRmwOp::Umax), + "smin" => Ok(AtomicRmwOp::Smin), + "smax" => Ok(AtomicRmwOp::Smax), _ => Err(()), } } diff --git a/cranelift/codegen/src/isa/aarch64/inst/emit.rs b/cranelift/codegen/src/isa/aarch64/inst/emit.rs index 599a8edacd..61485b75d9 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/emit.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/emit.rs @@ -1137,6 +1137,11 @@ impl MachInstEmit for Inst { inst_common::AtomicRmwOp::And => 0b100_01010_00_0, inst_common::AtomicRmwOp::Or => 0b101_01010_00_0, inst_common::AtomicRmwOp::Xor => 0b110_01010_00_0, + inst_common::AtomicRmwOp::Nand + | inst_common::AtomicRmwOp::Umin + | inst_common::AtomicRmwOp::Umax + | inst_common::AtomicRmwOp::Smin + | inst_common::AtomicRmwOp::Smax => todo!("{:?}", op), inst_common::AtomicRmwOp::Xchg => unreachable!(), }; sink.put4(enc_arith_rrr(bits_31_21, 0b000000, x28wr, x27, x26)); diff --git a/cranelift/codegen/src/isa/x64/inst/emit.rs b/cranelift/codegen/src/isa/x64/inst/emit.rs index 36ca96a4a6..51775e0a23 100644 --- a/cranelift/codegen/src/isa/x64/inst/emit.rs +++ b/cranelift/codegen/src/isa/x64/inst/emit.rs @@ -2817,22 +2817,61 @@ pub(crate) fn emit( let i2 = Inst::mov_r_r(OperandSize::Size64, rax, r11_w); i2.emit(sink, info, state); - // opq %r10, %r11 let r10_rmi = RegMemImm::reg(r10); - let i3 = if *op == inst_common::AtomicRmwOp::Xchg { - Inst::mov_r_r(OperandSize::Size64, r10, r11_w) - } else { - let alu_op = match op { - inst_common::AtomicRmwOp::Add => AluRmiROpcode::Add, - inst_common::AtomicRmwOp::Sub => AluRmiROpcode::Sub, - inst_common::AtomicRmwOp::And => AluRmiROpcode::And, - inst_common::AtomicRmwOp::Or => AluRmiROpcode::Or, - inst_common::AtomicRmwOp::Xor => AluRmiROpcode::Xor, - inst_common::AtomicRmwOp::Xchg => unreachable!(), - }; - Inst::alu_rmi_r(OperandSize::Size64, alu_op, r10_rmi, r11_w) - }; - i3.emit(sink, info, state); + match op { + inst_common::AtomicRmwOp::Xchg => { + // movq %r10, %r11 + let i3 = Inst::mov_r_r(OperandSize::Size64, r10, r11_w); + i3.emit(sink, info, state); + } + inst_common::AtomicRmwOp::Nand => { + // andq %r10, %r11 + let i3 = + Inst::alu_rmi_r(OperandSize::Size64, AluRmiROpcode::And, r10_rmi, r11_w); + i3.emit(sink, info, state); + + // notq %r11 + let i4 = Inst::not(OperandSize::Size64, r11_w); + i4.emit(sink, info, state); + } + inst_common::AtomicRmwOp::Umin + | inst_common::AtomicRmwOp::Umax + | inst_common::AtomicRmwOp::Smin + | inst_common::AtomicRmwOp::Smax => { + // cmp %r11, %r10 + let i3 = Inst::cmp_rmi_r(OperandSize::from_ty(*ty), RegMemImm::reg(r11), r10); + i3.emit(sink, info, state); + + // cmovcc %r10, %r11 + let cc = match op { + inst_common::AtomicRmwOp::Umin => CC::BE, + inst_common::AtomicRmwOp::Umax => CC::NB, + inst_common::AtomicRmwOp::Smin => CC::LE, + inst_common::AtomicRmwOp::Smax => CC::NL, + _ => unreachable!(), + }; + let i4 = Inst::cmove(OperandSize::Size64, cc, RegMem::reg(r10), r11_w); + i4.emit(sink, info, state); + } + _ => { + // opq %r10, %r11 + let alu_op = match op { + inst_common::AtomicRmwOp::Add => AluRmiROpcode::Add, + inst_common::AtomicRmwOp::Sub => AluRmiROpcode::Sub, + inst_common::AtomicRmwOp::And => AluRmiROpcode::And, + inst_common::AtomicRmwOp::Or => AluRmiROpcode::Or, + inst_common::AtomicRmwOp::Xor => AluRmiROpcode::Xor, + inst_common::AtomicRmwOp::Xchg + | inst_common::AtomicRmwOp::Nand + | inst_common::AtomicRmwOp::Umin + | inst_common::AtomicRmwOp::Umax + | inst_common::AtomicRmwOp::Smin + | inst_common::AtomicRmwOp::Smax => unreachable!(), + }; + let i3 = Inst::alu_rmi_r(OperandSize::Size64, alu_op, r10_rmi, r11_w); + i3.emit(sink, info, state); + } + } // lock cmpxchg{b,w,l,q} %r11, (%r9) // No need to call `add_trap` here, since the `i4` emit will do that. diff --git a/cranelift/codegen/src/isa/x64/inst/emit_tests.rs b/cranelift/codegen/src/isa/x64/inst/emit_tests.rs index b3394f096c..3178b1dd11 100644 --- a/cranelift/codegen/src/isa/x64/inst/emit_tests.rs +++ b/cranelift/codegen/src/isa/x64/inst/emit_tests.rs @@ -4185,6 +4185,11 @@ fn test_x64_emit() { "418B014989C34D89D3F0450FB1190F85EFFFFFFF", "atomically { 32_bits_at_[%r9]) Xchg= %r10; %rax = old_value_at_[%r9]; %r11, %rflags = trash }" )); + insns.push(( + Inst::AtomicRmwSeq { ty: types::I32, op: inst_common::AtomicRmwOp::Umin, }, + "418B014989C34539DA4D0F46DAF0450FB1190F85EBFFFFFF", + "atomically { 32_bits_at_[%r9]) Umin= %r10; %rax = old_value_at_[%r9]; %r11, %rflags = trash }" + )); insns.push(( Inst::AtomicRmwSeq { ty: types::I64, op: inst_common::AtomicRmwOp::Add, }, "498B014989C34D01D3F04D0FB1190F85EFFFFFFF", diff --git a/cranelift/codegen/src/machinst/inst_common.rs b/cranelift/codegen/src/machinst/inst_common.rs index ff2c2ae737..a4fb41ec76 100644 --- a/cranelift/codegen/src/machinst/inst_common.rs +++ b/cranelift/codegen/src/machinst/inst_common.rs @@ -56,12 +56,22 @@ pub enum AtomicRmwOp { Sub, /// And And, + /// Nand + Nand, /// Or Or, /// Exclusive Or Xor, /// Exchange (swap operands) Xchg, + /// Unsigned min + Umin, + /// Unsigned max + Umax, + /// Signed min + Smin, + /// Signed max + Smax, } impl AtomicRmwOp { @@ -71,9 +81,14 @@ impl AtomicRmwOp { ir::AtomicRmwOp::Add => AtomicRmwOp::Add, ir::AtomicRmwOp::Sub => AtomicRmwOp::Sub, ir::AtomicRmwOp::And => AtomicRmwOp::And, + ir::AtomicRmwOp::Nand => AtomicRmwOp::Nand, ir::AtomicRmwOp::Or => AtomicRmwOp::Or, ir::AtomicRmwOp::Xor => AtomicRmwOp::Xor, ir::AtomicRmwOp::Xchg => AtomicRmwOp::Xchg, + ir::AtomicRmwOp::Umin => AtomicRmwOp::Umin, + ir::AtomicRmwOp::Umax => AtomicRmwOp::Umax, + ir::AtomicRmwOp::Smin => AtomicRmwOp::Smin, + ir::AtomicRmwOp::Smax => AtomicRmwOp::Smax, } } }