From 20198d94c63bc431e3ee43ce3bc948184e0f9f26 Mon Sep 17 00:00:00 2001 From: yuyang <96557710+yuyang-ok@users.noreply.github.com> Date: Mon, 6 Mar 2023 19:27:46 +0800 Subject: [PATCH] Codegen fix atomic_rmw_loop missing move result to `dst` register On riscv64. (#5898) * fix issue5884. * fix issue5884 * fix test failure * fix atomic rmw missing move result to dst register. * specify little endian some s390x can pass test. --- .../codegen/src/isa/riscv64/inst/emit.rs | 33 +++++++++++-------- .../filetests/runtests/issue5884.clif | 20 +++++++++++ 2 files changed, 40 insertions(+), 13 deletions(-) create mode 100644 cranelift/filetests/filetests/runtests/issue5884.clif diff --git a/cranelift/codegen/src/isa/riscv64/inst/emit.rs b/cranelift/codegen/src/isa/riscv64/inst/emit.rs index 007a2c38ff..6ae0b541d7 100644 --- a/cranelift/codegen/src/isa/riscv64/inst/emit.rs +++ b/cranelift/codegen/src/isa/riscv64/inst/emit.rs @@ -1375,7 +1375,7 @@ impl MachInstEmit for Inst { | crate::ir::AtomicRmwOp::And | crate::ir::AtomicRmwOp::Or | crate::ir::AtomicRmwOp::Xor => { - AtomicOP::extract(t0, offset, dst.to_reg(), ty) + AtomicOP::extract(dst, offset, dst.to_reg(), ty) .iter() .for_each(|i| i.emit(&[], sink, emit_info, state)); Inst::AluRRR { @@ -1388,7 +1388,7 @@ impl MachInstEmit for Inst { _ => unreachable!(), }, rd: t0, - rs1: t0.to_reg(), + rs1: dst.to_reg(), rs2: x, } .emit(&[], sink, emit_info, state); @@ -1412,19 +1412,16 @@ impl MachInstEmit for Inst { spilltmp_reg2() } crate::ir::AtomicRmwOp::Nand => { - let x2 = if ty.bits() < 32 { - AtomicOP::extract(t0, offset, dst.to_reg(), ty) + if ty.bits() < 32 { + AtomicOP::extract(dst, offset, dst.to_reg(), ty) .iter() .for_each(|i| i.emit(&[], sink, emit_info, state)); - t0.to_reg() - } else { - dst.to_reg() - }; + } Inst::AluRRR { alu_op: AluOPRRR::And, rd: t0, rs1: x, - rs2: x2, + rs2: dst.to_reg(), } .emit(&[], sink, emit_info, state); Inst::construct_bit_not(t0, t0.to_reg()).emit(&[], sink, emit_info, state); @@ -1456,12 +1453,13 @@ impl MachInstEmit for Inst { | crate::ir::AtomicRmwOp::Umax | crate::ir::AtomicRmwOp::Smin | crate::ir::AtomicRmwOp::Smax => { + let label_select_dst = sink.get_label(); let label_select_done = sink.get_label(); if op == crate::ir::AtomicRmwOp::Umin || op == crate::ir::AtomicRmwOp::Umax { - AtomicOP::extract(t0, offset, dst.to_reg(), ty) + AtomicOP::extract(dst, offset, dst.to_reg(), ty) } else { - AtomicOP::extract_sext(t0, offset, dst.to_reg(), ty) + AtomicOP::extract_sext(dst, offset, dst.to_reg(), ty) } .iter() .for_each(|i| i.emit(&[], sink, emit_info, state)); @@ -1473,9 +1471,9 @@ impl MachInstEmit for Inst { crate::ir::AtomicRmwOp::Smax => IntCC::SignedGreaterThan, _ => unreachable!(), }, - ValueRegs::one(t0.to_reg()), + ValueRegs::one(dst.to_reg()), ValueRegs::one(x), - BranchTarget::Label(label_select_done), + BranchTarget::Label(label_select_dst), BranchTarget::zero(), ty, ) @@ -1483,6 +1481,12 @@ impl MachInstEmit for Inst { .for_each(|i| i.emit(&[], sink, emit_info, state)); // here we select x. Inst::gen_move(t0, x, I64).emit(&[], sink, emit_info, state); + Inst::Jal { + dest: BranchTarget::Label(label_select_done), + } + .emit(&[], sink, emit_info, state); + sink.bind_label(label_select_dst); + Inst::gen_move(t0, dst.to_reg(), I64).emit(&[], sink, emit_info, state); sink.bind_label(label_select_done); Inst::Atomic { op: AtomicOP::load_op(ty), @@ -1504,6 +1508,9 @@ impl MachInstEmit for Inst { spilltmp_reg2() } crate::ir::AtomicRmwOp::Xchg => { + AtomicOP::extract(dst, offset, dst.to_reg(), ty) + .iter() + .for_each(|i| i.emit(&[], sink, emit_info, state)); Inst::Atomic { op: AtomicOP::load_op(ty), rd: writable_spilltmp_reg2(), diff --git a/cranelift/filetests/filetests/runtests/issue5884.clif b/cranelift/filetests/filetests/runtests/issue5884.clif new file mode 100644 index 0000000000..e3bea5e652 --- /dev/null +++ b/cranelift/filetests/filetests/runtests/issue5884.clif @@ -0,0 +1,20 @@ +test run +target aarch64 +target x86_64 +target riscv64 +target s390x + + +function %a(i64, i16) -> i16 { + ss0 = explicit_slot 8 + +block0(v0: i64, v1: i16): + v2 = stack_addr.i64 ss0+0 + store little v0, v2 + + v3 = stack_addr.i64 ss0+6 + v4 = atomic_rmw.i16 little or v3, v1 + return v4 +} + +; run: %a(8608481011852310776, 0) == 30583 \ No newline at end of file