s390x: Add support for atomic operations (part 1)

This adds full back-end support for the Fence, AtomicLoad
and AtomicStore operations, and partial support for the
AtomicCas and AtomicRmw operations.

The missing pieces include sub-word operations, operations
on little-endian memory requiring byte-swapping, and some
of the subtypes of AtomicRmw -- everything that cannot be
implemented without a compare-and-swap loop.  This will be
done in a follow-up patch.

This patch already suffices to make the test suite green
again after a recent change that now requires atomic
operations when accessing the heap.
This commit is contained in:
Ulrich Weigand
2021-06-15 17:00:29 +02:00
parent a7dad4e38f
commit 46b73431ca
11 changed files with 1584 additions and 7 deletions

View File

@@ -181,6 +181,56 @@ pub fn mem_emit(
} }
} }
pub fn mem_rs_emit(
rd: Reg,
rn: Reg,
mem: &MemArg,
opcode_rs: Option<u16>,
opcode_rsy: Option<u16>,
add_trap: bool,
sink: &mut MachBuffer<Inst>,
emit_info: &EmitInfo,
state: &mut EmitState,
) {
let (mem_insts, mem) = mem_finalize(
mem,
state,
opcode_rs.is_some(),
opcode_rsy.is_some(),
false,
false,
);
for inst in mem_insts.into_iter() {
inst.emit(sink, emit_info, state);
}
if add_trap && mem.can_trap() {
let srcloc = state.cur_srcloc();
if srcloc != SourceLoc::default() {
sink.add_trap(srcloc, TrapCode::HeapOutOfBounds);
}
}
match &mem {
&MemArg::BXD12 {
base, index, disp, ..
} => {
assert!(index == zero_reg());
put(sink, &enc_rs(opcode_rs.unwrap(), rd, rn, base, disp.bits()));
}
&MemArg::BXD20 {
base, index, disp, ..
} => {
assert!(index == zero_reg());
put(
sink,
&enc_rsy(opcode_rsy.unwrap(), rd, rn, base, disp.bits()),
);
}
_ => unreachable!(),
}
}
pub fn mem_imm8_emit( pub fn mem_imm8_emit(
imm: u8, imm: u8,
mem: &MemArg, mem: &MemArg,
@@ -1301,6 +1351,53 @@ impl MachInstEmit for Inst {
); );
} }
&Inst::AtomicRmw {
alu_op,
rd,
rn,
ref mem,
} => {
let opcode = match alu_op {
ALUOp::Add32 => 0xebf8, // LAA
ALUOp::Add64 => 0xebe8, // LAAG
ALUOp::And32 => 0xebf4, // LAN
ALUOp::And64 => 0xebe4, // LANG
ALUOp::Orr32 => 0xebf6, // LAO
ALUOp::Orr64 => 0xebe6, // LAOG
ALUOp::Xor32 => 0xebf7, // LAX
ALUOp::Xor64 => 0xebe7, // LAXG
_ => unreachable!(),
};
let rd = rd.to_reg();
mem_rs_emit(
rd,
rn,
mem,
None,
Some(opcode),
true,
sink,
emit_info,
state,
);
}
&Inst::AtomicCas32 { rd, rn, ref mem } | &Inst::AtomicCas64 { rd, rn, ref mem } => {
let (opcode_rs, opcode_rsy) = match self {
&Inst::AtomicCas32 { .. } => (Some(0xba), Some(0xeb14)), // CS(Y)
&Inst::AtomicCas64 { .. } => (None, Some(0xeb30)), // CSG
_ => unreachable!(),
};
let rd = rd.to_reg();
mem_rs_emit(
rd, rn, mem, opcode_rs, opcode_rsy, true, sink, emit_info, state,
);
}
&Inst::Fence => {
put(sink, &enc_e(0x07e0));
}
&Inst::Load32 { rd, ref mem } &Inst::Load32 { rd, ref mem }
| &Inst::Load32ZExt8 { rd, ref mem } | &Inst::Load32ZExt8 { rd, ref mem }
| &Inst::Load32SExt8 { rd, ref mem } | &Inst::Load32SExt8 { rd, ref mem }

View File

@@ -2204,6 +2204,656 @@ fn test_s390x_binemit() {
"srag %r4, %r5, 524287(%r6)", "srag %r4, %r5, 524287(%r6)",
)); ));
insns.push((
Inst::AtomicRmw {
alu_op: ALUOp::Add32,
rd: writable_gpr(4),
rn: gpr(5),
mem: MemArg::BXD20 {
base: zero_reg(),
index: zero_reg(),
disp: SImm20::maybe_from_i64(-524288).unwrap(),
flags: MemFlags::trusted(),
},
},
"EB45000080F8",
"laa %r4, %r5, -524288",
));
insns.push((
Inst::AtomicRmw {
alu_op: ALUOp::Add32,
rd: writable_gpr(4),
rn: gpr(5),
mem: MemArg::BXD20 {
base: zero_reg(),
index: zero_reg(),
disp: SImm20::maybe_from_i64(524287).unwrap(),
flags: MemFlags::trusted(),
},
},
"EB450FFF7FF8",
"laa %r4, %r5, 524287",
));
insns.push((
Inst::AtomicRmw {
alu_op: ALUOp::Add32,
rd: writable_gpr(4),
rn: gpr(5),
mem: MemArg::BXD20 {
base: gpr(6),
index: zero_reg(),
disp: SImm20::maybe_from_i64(-524288).unwrap(),
flags: MemFlags::trusted(),
},
},
"EB45600080F8",
"laa %r4, %r5, -524288(%r6)",
));
insns.push((
Inst::AtomicRmw {
alu_op: ALUOp::Add32,
rd: writable_gpr(4),
rn: gpr(5),
mem: MemArg::BXD20 {
base: gpr(6),
index: zero_reg(),
disp: SImm20::maybe_from_i64(524287).unwrap(),
flags: MemFlags::trusted(),
},
},
"EB456FFF7FF8",
"laa %r4, %r5, 524287(%r6)",
));
insns.push((
Inst::AtomicRmw {
alu_op: ALUOp::Add64,
rd: writable_gpr(4),
rn: gpr(5),
mem: MemArg::BXD20 {
base: zero_reg(),
index: zero_reg(),
disp: SImm20::maybe_from_i64(-524288).unwrap(),
flags: MemFlags::trusted(),
},
},
"EB45000080E8",
"laag %r4, %r5, -524288",
));
insns.push((
Inst::AtomicRmw {
alu_op: ALUOp::Add64,
rd: writable_gpr(4),
rn: gpr(5),
mem: MemArg::BXD20 {
base: zero_reg(),
index: zero_reg(),
disp: SImm20::maybe_from_i64(524287).unwrap(),
flags: MemFlags::trusted(),
},
},
"EB450FFF7FE8",
"laag %r4, %r5, 524287",
));
insns.push((
Inst::AtomicRmw {
alu_op: ALUOp::Add64,
rd: writable_gpr(4),
rn: gpr(5),
mem: MemArg::BXD20 {
base: gpr(6),
index: zero_reg(),
disp: SImm20::maybe_from_i64(-524288).unwrap(),
flags: MemFlags::trusted(),
},
},
"EB45600080E8",
"laag %r4, %r5, -524288(%r6)",
));
insns.push((
Inst::AtomicRmw {
alu_op: ALUOp::Add64,
rd: writable_gpr(4),
rn: gpr(5),
mem: MemArg::BXD20 {
base: gpr(6),
index: zero_reg(),
disp: SImm20::maybe_from_i64(524287).unwrap(),
flags: MemFlags::trusted(),
},
},
"EB456FFF7FE8",
"laag %r4, %r5, 524287(%r6)",
));
insns.push((
Inst::AtomicRmw {
alu_op: ALUOp::And32,
rd: writable_gpr(4),
rn: gpr(5),
mem: MemArg::BXD20 {
base: zero_reg(),
index: zero_reg(),
disp: SImm20::maybe_from_i64(-524288).unwrap(),
flags: MemFlags::trusted(),
},
},
"EB45000080F4",
"lan %r4, %r5, -524288",
));
insns.push((
Inst::AtomicRmw {
alu_op: ALUOp::And32,
rd: writable_gpr(4),
rn: gpr(5),
mem: MemArg::BXD20 {
base: zero_reg(),
index: zero_reg(),
disp: SImm20::maybe_from_i64(524287).unwrap(),
flags: MemFlags::trusted(),
},
},
"EB450FFF7FF4",
"lan %r4, %r5, 524287",
));
insns.push((
Inst::AtomicRmw {
alu_op: ALUOp::And32,
rd: writable_gpr(4),
rn: gpr(5),
mem: MemArg::BXD20 {
base: gpr(6),
index: zero_reg(),
disp: SImm20::maybe_from_i64(-524288).unwrap(),
flags: MemFlags::trusted(),
},
},
"EB45600080F4",
"lan %r4, %r5, -524288(%r6)",
));
insns.push((
Inst::AtomicRmw {
alu_op: ALUOp::And32,
rd: writable_gpr(4),
rn: gpr(5),
mem: MemArg::BXD20 {
base: gpr(6),
index: zero_reg(),
disp: SImm20::maybe_from_i64(524287).unwrap(),
flags: MemFlags::trusted(),
},
},
"EB456FFF7FF4",
"lan %r4, %r5, 524287(%r6)",
));
insns.push((
Inst::AtomicRmw {
alu_op: ALUOp::And64,
rd: writable_gpr(4),
rn: gpr(5),
mem: MemArg::BXD20 {
base: zero_reg(),
index: zero_reg(),
disp: SImm20::maybe_from_i64(-524288).unwrap(),
flags: MemFlags::trusted(),
},
},
"EB45000080E4",
"lang %r4, %r5, -524288",
));
insns.push((
Inst::AtomicRmw {
alu_op: ALUOp::And64,
rd: writable_gpr(4),
rn: gpr(5),
mem: MemArg::BXD20 {
base: zero_reg(),
index: zero_reg(),
disp: SImm20::maybe_from_i64(524287).unwrap(),
flags: MemFlags::trusted(),
},
},
"EB450FFF7FE4",
"lang %r4, %r5, 524287",
));
insns.push((
Inst::AtomicRmw {
alu_op: ALUOp::And64,
rd: writable_gpr(4),
rn: gpr(5),
mem: MemArg::BXD20 {
base: gpr(6),
index: zero_reg(),
disp: SImm20::maybe_from_i64(-524288).unwrap(),
flags: MemFlags::trusted(),
},
},
"EB45600080E4",
"lang %r4, %r5, -524288(%r6)",
));
insns.push((
Inst::AtomicRmw {
alu_op: ALUOp::And64,
rd: writable_gpr(4),
rn: gpr(5),
mem: MemArg::BXD20 {
base: gpr(6),
index: zero_reg(),
disp: SImm20::maybe_from_i64(524287).unwrap(),
flags: MemFlags::trusted(),
},
},
"EB456FFF7FE4",
"lang %r4, %r5, 524287(%r6)",
));
insns.push((
Inst::AtomicRmw {
alu_op: ALUOp::Orr32,
rd: writable_gpr(4),
rn: gpr(5),
mem: MemArg::BXD20 {
base: zero_reg(),
index: zero_reg(),
disp: SImm20::maybe_from_i64(-524288).unwrap(),
flags: MemFlags::trusted(),
},
},
"EB45000080F6",
"lao %r4, %r5, -524288",
));
insns.push((
Inst::AtomicRmw {
alu_op: ALUOp::Orr32,
rd: writable_gpr(4),
rn: gpr(5),
mem: MemArg::BXD20 {
base: zero_reg(),
index: zero_reg(),
disp: SImm20::maybe_from_i64(524287).unwrap(),
flags: MemFlags::trusted(),
},
},
"EB450FFF7FF6",
"lao %r4, %r5, 524287",
));
insns.push((
Inst::AtomicRmw {
alu_op: ALUOp::Orr32,
rd: writable_gpr(4),
rn: gpr(5),
mem: MemArg::BXD20 {
base: gpr(6),
index: zero_reg(),
disp: SImm20::maybe_from_i64(-524288).unwrap(),
flags: MemFlags::trusted(),
},
},
"EB45600080F6",
"lao %r4, %r5, -524288(%r6)",
));
insns.push((
Inst::AtomicRmw {
alu_op: ALUOp::Orr32,
rd: writable_gpr(4),
rn: gpr(5),
mem: MemArg::BXD20 {
base: gpr(6),
index: zero_reg(),
disp: SImm20::maybe_from_i64(524287).unwrap(),
flags: MemFlags::trusted(),
},
},
"EB456FFF7FF6",
"lao %r4, %r5, 524287(%r6)",
));
insns.push((
Inst::AtomicRmw {
alu_op: ALUOp::Orr64,
rd: writable_gpr(4),
rn: gpr(5),
mem: MemArg::BXD20 {
base: zero_reg(),
index: zero_reg(),
disp: SImm20::maybe_from_i64(-524288).unwrap(),
flags: MemFlags::trusted(),
},
},
"EB45000080E6",
"laog %r4, %r5, -524288",
));
insns.push((
Inst::AtomicRmw {
alu_op: ALUOp::Orr64,
rd: writable_gpr(4),
rn: gpr(5),
mem: MemArg::BXD20 {
base: zero_reg(),
index: zero_reg(),
disp: SImm20::maybe_from_i64(524287).unwrap(),
flags: MemFlags::trusted(),
},
},
"EB450FFF7FE6",
"laog %r4, %r5, 524287",
));
insns.push((
Inst::AtomicRmw {
alu_op: ALUOp::Orr64,
rd: writable_gpr(4),
rn: gpr(5),
mem: MemArg::BXD20 {
base: gpr(6),
index: zero_reg(),
disp: SImm20::maybe_from_i64(-524288).unwrap(),
flags: MemFlags::trusted(),
},
},
"EB45600080E6",
"laog %r4, %r5, -524288(%r6)",
));
insns.push((
Inst::AtomicRmw {
alu_op: ALUOp::Orr64,
rd: writable_gpr(4),
rn: gpr(5),
mem: MemArg::BXD20 {
base: gpr(6),
index: zero_reg(),
disp: SImm20::maybe_from_i64(524287).unwrap(),
flags: MemFlags::trusted(),
},
},
"EB456FFF7FE6",
"laog %r4, %r5, 524287(%r6)",
));
insns.push((
Inst::AtomicRmw {
alu_op: ALUOp::Xor32,
rd: writable_gpr(4),
rn: gpr(5),
mem: MemArg::BXD20 {
base: zero_reg(),
index: zero_reg(),
disp: SImm20::maybe_from_i64(-524288).unwrap(),
flags: MemFlags::trusted(),
},
},
"EB45000080F7",
"lax %r4, %r5, -524288",
));
insns.push((
Inst::AtomicRmw {
alu_op: ALUOp::Xor32,
rd: writable_gpr(4),
rn: gpr(5),
mem: MemArg::BXD20 {
base: zero_reg(),
index: zero_reg(),
disp: SImm20::maybe_from_i64(524287).unwrap(),
flags: MemFlags::trusted(),
},
},
"EB450FFF7FF7",
"lax %r4, %r5, 524287",
));
insns.push((
Inst::AtomicRmw {
alu_op: ALUOp::Xor32,
rd: writable_gpr(4),
rn: gpr(5),
mem: MemArg::BXD20 {
base: gpr(6),
index: zero_reg(),
disp: SImm20::maybe_from_i64(-524288).unwrap(),
flags: MemFlags::trusted(),
},
},
"EB45600080F7",
"lax %r4, %r5, -524288(%r6)",
));
insns.push((
Inst::AtomicRmw {
alu_op: ALUOp::Xor32,
rd: writable_gpr(4),
rn: gpr(5),
mem: MemArg::BXD20 {
base: gpr(6),
index: zero_reg(),
disp: SImm20::maybe_from_i64(524287).unwrap(),
flags: MemFlags::trusted(),
},
},
"EB456FFF7FF7",
"lax %r4, %r5, 524287(%r6)",
));
insns.push((
Inst::AtomicRmw {
alu_op: ALUOp::Xor64,
rd: writable_gpr(4),
rn: gpr(5),
mem: MemArg::BXD20 {
base: zero_reg(),
index: zero_reg(),
disp: SImm20::maybe_from_i64(-524288).unwrap(),
flags: MemFlags::trusted(),
},
},
"EB45000080E7",
"laxg %r4, %r5, -524288",
));
insns.push((
Inst::AtomicRmw {
alu_op: ALUOp::Xor64,
rd: writable_gpr(4),
rn: gpr(5),
mem: MemArg::BXD20 {
base: zero_reg(),
index: zero_reg(),
disp: SImm20::maybe_from_i64(524287).unwrap(),
flags: MemFlags::trusted(),
},
},
"EB450FFF7FE7",
"laxg %r4, %r5, 524287",
));
insns.push((
Inst::AtomicRmw {
alu_op: ALUOp::Xor64,
rd: writable_gpr(4),
rn: gpr(5),
mem: MemArg::BXD20 {
base: gpr(6),
index: zero_reg(),
disp: SImm20::maybe_from_i64(-524288).unwrap(),
flags: MemFlags::trusted(),
},
},
"EB45600080E7",
"laxg %r4, %r5, -524288(%r6)",
));
insns.push((
Inst::AtomicRmw {
alu_op: ALUOp::Xor64,
rd: writable_gpr(4),
rn: gpr(5),
mem: MemArg::BXD20 {
base: gpr(6),
index: zero_reg(),
disp: SImm20::maybe_from_i64(524287).unwrap(),
flags: MemFlags::trusted(),
},
},
"EB456FFF7FE7",
"laxg %r4, %r5, 524287(%r6)",
));
insns.push((
Inst::AtomicCas32 {
rd: writable_gpr(4),
rn: gpr(5),
mem: MemArg::BXD12 {
base: zero_reg(),
index: zero_reg(),
disp: UImm12::maybe_from_u64(0).unwrap(),
flags: MemFlags::trusted(),
},
},
"BA450000",
"cs %r4, %r5, 0",
));
insns.push((
Inst::AtomicCas32 {
rd: writable_gpr(4),
rn: gpr(5),
mem: MemArg::BXD12 {
base: zero_reg(),
index: zero_reg(),
disp: UImm12::maybe_from_u64(4095).unwrap(),
flags: MemFlags::trusted(),
},
},
"BA450FFF",
"cs %r4, %r5, 4095",
));
insns.push((
Inst::AtomicCas32 {
rd: writable_gpr(4),
rn: gpr(5),
mem: MemArg::BXD20 {
base: zero_reg(),
index: zero_reg(),
disp: SImm20::maybe_from_i64(-524288).unwrap(),
flags: MemFlags::trusted(),
},
},
"EB4500008014",
"csy %r4, %r5, -524288",
));
insns.push((
Inst::AtomicCas32 {
rd: writable_gpr(4),
rn: gpr(5),
mem: MemArg::BXD20 {
base: zero_reg(),
index: zero_reg(),
disp: SImm20::maybe_from_i64(524287).unwrap(),
flags: MemFlags::trusted(),
},
},
"EB450FFF7F14",
"csy %r4, %r5, 524287",
));
insns.push((
Inst::AtomicCas32 {
rd: writable_gpr(4),
rn: gpr(5),
mem: MemArg::BXD12 {
base: gpr(6),
index: zero_reg(),
disp: UImm12::maybe_from_u64(0).unwrap(),
flags: MemFlags::trusted(),
},
},
"BA456000",
"cs %r4, %r5, 0(%r6)",
));
insns.push((
Inst::AtomicCas32 {
rd: writable_gpr(4),
rn: gpr(5),
mem: MemArg::BXD12 {
base: gpr(6),
index: zero_reg(),
disp: UImm12::maybe_from_u64(4095).unwrap(),
flags: MemFlags::trusted(),
},
},
"BA456FFF",
"cs %r4, %r5, 4095(%r6)",
));
insns.push((
Inst::AtomicCas32 {
rd: writable_gpr(4),
rn: gpr(5),
mem: MemArg::BXD20 {
base: gpr(6),
index: zero_reg(),
disp: SImm20::maybe_from_i64(-524288).unwrap(),
flags: MemFlags::trusted(),
},
},
"EB4560008014",
"csy %r4, %r5, -524288(%r6)",
));
insns.push((
Inst::AtomicCas32 {
rd: writable_gpr(4),
rn: gpr(5),
mem: MemArg::BXD20 {
base: gpr(6),
index: zero_reg(),
disp: SImm20::maybe_from_i64(524287).unwrap(),
flags: MemFlags::trusted(),
},
},
"EB456FFF7F14",
"csy %r4, %r5, 524287(%r6)",
));
insns.push((
Inst::AtomicCas64 {
rd: writable_gpr(4),
rn: gpr(5),
mem: MemArg::BXD20 {
base: zero_reg(),
index: zero_reg(),
disp: SImm20::maybe_from_i64(-524288).unwrap(),
flags: MemFlags::trusted(),
},
},
"EB4500008030",
"csg %r4, %r5, -524288",
));
insns.push((
Inst::AtomicCas64 {
rd: writable_gpr(4),
rn: gpr(5),
mem: MemArg::BXD20 {
base: zero_reg(),
index: zero_reg(),
disp: SImm20::maybe_from_i64(524287).unwrap(),
flags: MemFlags::trusted(),
},
},
"EB450FFF7F30",
"csg %r4, %r5, 524287",
));
insns.push((
Inst::AtomicCas64 {
rd: writable_gpr(4),
rn: gpr(5),
mem: MemArg::BXD20 {
base: gpr(6),
index: zero_reg(),
disp: SImm20::maybe_from_i64(-524288).unwrap(),
flags: MemFlags::trusted(),
},
},
"EB4560008030",
"csg %r4, %r5, -524288(%r6)",
));
insns.push((
Inst::AtomicCas64 {
rd: writable_gpr(4),
rn: gpr(5),
mem: MemArg::BXD20 {
base: gpr(6),
index: zero_reg(),
disp: SImm20::maybe_from_i64(524287).unwrap(),
flags: MemFlags::trusted(),
},
},
"EB456FFF7F30",
"csg %r4, %r5, 524287(%r6)",
));
insns.push((Inst::Fence, "07E0", "bcr 14, 0"));
insns.push(( insns.push((
Inst::Load32 { Inst::Load32 {
rd: writable_gpr(1), rd: writable_gpr(1),

View File

@@ -404,6 +404,30 @@ pub enum Inst {
trap_code: TrapCode, trap_code: TrapCode,
}, },
/// An atomic read-modify-write operation with a memory in-/out operand,
/// a register destination, and a register source.
/// a memory source.
AtomicRmw {
alu_op: ALUOp,
rd: Writable<Reg>,
rn: Reg,
mem: MemArg,
},
/// A 32-bit atomic compare-and-swap operation.
AtomicCas32 {
rd: Writable<Reg>,
rn: Reg,
mem: MemArg,
},
/// A 64-bit atomic compare-and-swap operation.
AtomicCas64 {
rd: Writable<Reg>,
rn: Reg,
mem: MemArg,
},
/// A memory fence operation.
Fence,
/// A 32-bit load. /// A 32-bit load.
Load32 { Load32 {
rd: Writable<Reg>, rd: Writable<Reg>,
@@ -1190,6 +1214,24 @@ fn s390x_get_regs(inst: &Inst, collector: &mut RegUsageCollector) {
&Inst::CmpTrapRUImm16 { rn, .. } => { &Inst::CmpTrapRUImm16 { rn, .. } => {
collector.add_use(rn); collector.add_use(rn);
} }
&Inst::AtomicRmw {
rd, rn, ref mem, ..
} => {
collector.add_def(rd);
collector.add_use(rn);
memarg_regs(mem, collector);
}
&Inst::AtomicCas32 {
rd, rn, ref mem, ..
}
| &Inst::AtomicCas64 {
rd, rn, ref mem, ..
} => {
collector.add_mod(rd);
collector.add_use(rn);
memarg_regs(mem, collector);
}
&Inst::Fence => {}
&Inst::Load32 { rd, ref mem, .. } &Inst::Load32 { rd, ref mem, .. }
| &Inst::Load32ZExt8 { rd, ref mem, .. } | &Inst::Load32ZExt8 { rd, ref mem, .. }
| &Inst::Load32SExt8 { rd, ref mem, .. } | &Inst::Load32SExt8 { rd, ref mem, .. }
@@ -1589,6 +1631,38 @@ fn s390x_map_regs<RUM: RegUsageMapper>(inst: &mut Inst, mapper: &RUM) {
map_use(mapper, rn); map_use(mapper, rn);
} }
&mut Inst::AtomicRmw {
ref mut rd,
ref mut rn,
ref mut mem,
..
} => {
map_def(mapper, rd);
map_use(mapper, rn);
map_mem(mapper, mem);
}
&mut Inst::AtomicCas32 {
ref mut rd,
ref mut rn,
ref mut mem,
..
} => {
map_mod(mapper, rd);
map_use(mapper, rn);
map_mem(mapper, mem);
}
&mut Inst::AtomicCas64 {
ref mut rd,
ref mut rn,
ref mut mem,
..
} => {
map_mod(mapper, rd);
map_use(mapper, rn);
map_mem(mapper, mem);
}
&mut Inst::Fence => {}
&mut Inst::Load32 { &mut Inst::Load32 {
ref mut rd, ref mut rd,
ref mut mem, ref mut mem,
@@ -2735,6 +2809,61 @@ impl Inst {
let cond = cond.show_rru(mb_rru); let cond = cond.show_rru(mb_rru);
format!("{}{} {}, {}", op, cond, rn, imm) format!("{}{} {}, {}", op, cond, rn, imm)
} }
&Inst::AtomicRmw {
alu_op,
rd,
rn,
ref mem,
} => {
let op = match alu_op {
ALUOp::Add32 => "laa",
ALUOp::Add64 => "laag",
ALUOp::And32 => "lan",
ALUOp::And64 => "lang",
ALUOp::Orr32 => "lao",
ALUOp::Orr64 => "laog",
ALUOp::Xor32 => "lax",
ALUOp::Xor64 => "laxg",
_ => unreachable!(),
};
let (mem_str, mem) =
mem_finalize_for_show(mem, mb_rru, state, false, true, false, false);
let rd = rd.to_reg().show_rru(mb_rru);
let rn = rn.show_rru(mb_rru);
let mem = mem.show_rru(mb_rru);
format!("{}{} {}, {}, {}", mem_str, op, rd, rn, mem)
}
&Inst::AtomicCas32 { rd, rn, ref mem } | &Inst::AtomicCas64 { rd, rn, ref mem } => {
let (opcode_rs, opcode_rsy) = match self {
&Inst::AtomicCas32 { .. } => (Some("cs"), Some("csy")),
&Inst::AtomicCas64 { .. } => (None, Some("csg")),
_ => unreachable!(),
};
let (mem_str, mem) = mem_finalize_for_show(
mem,
mb_rru,
state,
opcode_rs.is_some(),
opcode_rsy.is_some(),
false,
false,
);
let op = match &mem {
&MemArg::BXD12 { .. } => opcode_rs,
&MemArg::BXD20 { .. } => opcode_rsy,
_ => unreachable!(),
};
let rd = rd.to_reg().show_rru(mb_rru);
let rn = rn.show_rru(mb_rru);
let mem = mem.show_rru(mb_rru);
format!("{}{} {}, {}, {}", mem_str, op.unwrap(), rd, rn, mem)
}
&Inst::Fence => "bcr 14, 0".to_string(),
&Inst::Load32 { rd, ref mem } &Inst::Load32 { rd, ref mem }
| &Inst::Load32ZExt8 { rd, ref mem } | &Inst::Load32ZExt8 { rd, ref mem }
| &Inst::Load32SExt8 { rd, ref mem } | &Inst::Load32SExt8 { rd, ref mem }

View File

@@ -33,6 +33,13 @@ fn ty_is_float(ty: Type) -> bool {
!ty_is_int(ty) !ty_is_int(ty)
} }
fn is_valid_atomic_transaction_ty(ty: Type) -> bool {
match ty {
types::I8 | types::I16 | types::I32 | types::I64 => true,
_ => false,
}
}
fn choose_32_64<T: Copy>(ty: Type, op32: T, op64: T) -> T { fn choose_32_64<T: Copy>(ty: Type, op32: T, op64: T) -> T {
let bits = ty_bits(ty); let bits = ty_bits(ty);
if bits <= 32 { if bits <= 32 {
@@ -2500,13 +2507,159 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
// N.B.: the Ret itself is generated by the ABI. // N.B.: the Ret itself is generated by the ABI.
} }
Opcode::AtomicRmw Opcode::AtomicRmw => {
| Opcode::AtomicCas let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
| Opcode::AtomicLoad let addr = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
| Opcode::AtomicStore let rn = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None);
| Opcode::Fence => { let flags = ctx.memflags(insn).unwrap();
// TODO let endianness = flags.endianness(Endianness::Big);
panic!("Atomic operations not implemented"); let ty = ty.unwrap();
assert!(is_valid_atomic_transaction_ty(ty));
if endianness == Endianness::Little {
panic!("Little-endian atomic operations not implemented");
}
if ty_bits(ty) < 32 {
panic!("Sub-word atomic operations not implemented");
}
let op = inst_common::AtomicRmwOp::from(ctx.data(insn).atomic_rmw_op().unwrap());
let (alu_op, rn) = match op {
AtomicRmwOp::And => (choose_32_64(ty, ALUOp::And32, ALUOp::And64), rn),
AtomicRmwOp::Or => (choose_32_64(ty, ALUOp::Orr32, ALUOp::Orr64), rn),
AtomicRmwOp::Xor => (choose_32_64(ty, ALUOp::Xor32, ALUOp::Xor64), rn),
AtomicRmwOp::Add => (choose_32_64(ty, ALUOp::Add32, ALUOp::Add64), rn),
AtomicRmwOp::Sub => {
let tmp_ty = choose_32_64(ty, types::I32, types::I64);
let tmp = ctx.alloc_tmp(tmp_ty).only_reg().unwrap();
let neg_op = choose_32_64(ty, UnaryOp::Neg32, UnaryOp::Neg64);
ctx.emit(Inst::UnaryRR {
op: neg_op,
rd: tmp,
rn,
});
(choose_32_64(ty, ALUOp::Add32, ALUOp::Add64), tmp.to_reg())
}
_ => panic!("AtomicRmw operation type {:?} not implemented", op),
};
let mem = MemArg::reg(addr, flags);
ctx.emit(Inst::AtomicRmw {
alu_op,
rd,
rn,
mem,
});
}
Opcode::AtomicCas => {
let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
let addr = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
let rm = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None);
let rn = put_input_in_reg(ctx, inputs[2], NarrowValueMode::None);
let flags = ctx.memflags(insn).unwrap();
let endianness = flags.endianness(Endianness::Big);
let ty = ty.unwrap();
assert!(is_valid_atomic_transaction_ty(ty));
if endianness == Endianness::Little {
panic!("Little-endian atomic operations not implemented");
}
if ty_bits(ty) < 32 {
panic!("Sub-word atomic operations not implemented");
}
let mem = MemArg::reg(addr, flags);
ctx.emit(Inst::gen_move(rd, rm, ty));
if ty_bits(ty) == 32 {
ctx.emit(Inst::AtomicCas32 { rd, rn, mem });
} else {
ctx.emit(Inst::AtomicCas64 { rd, rn, mem });
}
}
Opcode::AtomicLoad => {
let flags = ctx.memflags(insn).unwrap();
let endianness = flags.endianness(Endianness::Big);
let ty = ty.unwrap();
assert!(is_valid_atomic_transaction_ty(ty));
let mem = lower_address(ctx, &inputs[..], 0, flags);
let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
if endianness == Endianness::Big {
ctx.emit(match ty_bits(ty) {
8 => Inst::Load32ZExt8 { rd, mem },
16 => Inst::Load32ZExt16 { rd, mem },
32 => Inst::Load32 { rd, mem },
64 => Inst::Load64 { rd, mem },
_ => panic!("Unsupported size in load"),
});
} else {
ctx.emit(match ty_bits(ty) {
8 => Inst::Load32ZExt8 { rd, mem },
16 => Inst::LoadRev16 { rd, mem },
32 => Inst::LoadRev32 { rd, mem },
64 => Inst::LoadRev64 { rd, mem },
_ => panic!("Unsupported size in load"),
});
}
}
Opcode::AtomicStore => {
let flags = ctx.memflags(insn).unwrap();
let endianness = flags.endianness(Endianness::Big);
let ty = ctx.input_ty(insn, 0);
assert!(is_valid_atomic_transaction_ty(ty));
let mem = lower_address(ctx, &inputs[1..], 0, flags);
if ty_bits(ty) <= 16 {
if let Some(imm) = input_matches_const(ctx, inputs[0]) {
ctx.emit(match (endianness, ty_bits(ty)) {
(_, 8) => Inst::StoreImm8 {
imm: imm as u8,
mem,
},
(Endianness::Big, 16) => Inst::StoreImm16 {
imm: imm as i16,
mem,
},
(Endianness::Little, 16) => Inst::StoreImm16 {
imm: (imm as i16).swap_bytes(),
mem,
},
_ => panic!("Unsupported size in store"),
});
} else {
let rd = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
ctx.emit(match (endianness, ty_bits(ty)) {
(_, 8) => Inst::Store8 { rd, mem },
(Endianness::Big, 16) => Inst::Store16 { rd, mem },
(Endianness::Little, 16) => Inst::StoreRev16 { rd, mem },
_ => panic!("Unsupported size in store"),
});
}
} else if endianness == Endianness::Big {
if let Some(imm) = input_matches_simm16(ctx, inputs[0]) {
ctx.emit(match ty_bits(ty) {
32 => Inst::StoreImm32SExt16 { imm, mem },
64 => Inst::StoreImm64SExt16 { imm, mem },
_ => panic!("Unsupported size in store"),
});
} else {
let rd = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
ctx.emit(match ty_bits(ty) {
32 => Inst::Store32 { rd, mem },
64 => Inst::Store64 { rd, mem },
_ => panic!("Unsupported size in store"),
});
}
} else {
let rd = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
ctx.emit(match ty_bits(ty) {
32 => Inst::StoreRev32 { rd, mem },
64 => Inst::StoreRev64 { rd, mem },
_ => panic!("Unsupported size in store"),
});
}
ctx.emit(Inst::Fence);
}
Opcode::Fence => {
ctx.emit(Inst::Fence);
} }
Opcode::RawBitcast Opcode::RawBitcast

View File

@@ -0,0 +1,25 @@
test compile
target s390x
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; ATOMIC_CAS
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
function %atomic_cas_i64(i64, i64, i64) -> i64 {
block0(v0: i64, v1: i64, v2: i64):
v3 = atomic_cas.i64 v2, v0, v1
return v3
}
; check: csg %r2, %r3, 0(%r4)
; nextln: br %r14
function %atomic_cas_i32(i32, i32, i64) -> i32 {
block0(v0: i32, v1: i32, v2: i64):
v3 = atomic_cas.i32 v2, v0, v1
return v3
}
; check: cs %r2, %r3, 0(%r4)
; nextln: br %r14

View File

@@ -0,0 +1,72 @@
test compile
target s390x
function %atomic_load_i64(i64) -> i64 {
block0(v0: i64):
v1 = atomic_load.i64 little v0
return v1
}
; check: lrvg %r2, 0(%r2)
; nextln: br %r14
function %atomic_load_i64_sym() -> i64 {
gv0 = symbol colocated %sym
block0:
v0 = symbol_value.i64 gv0
v1 = atomic_load.i64 little v0
return v1
}
; check: larl %r1, %sym + 0 ; lrvg %r2, 0(%r1)
; nextln: br %r14
function %atomic_load_i32(i64) -> i32 {
block0(v0: i64):
v1 = atomic_load.i32 little v0
return v1
}
; check: lrv %r2, 0(%r2)
; nextln: br %r14
function %atomic_load_i32_sym() -> i32 {
gv0 = symbol colocated %sym
block0:
v0 = symbol_value.i64 gv0
v1 = atomic_load.i32 little v0
return v1
}
; check: larl %r1, %sym + 0 ; lrv %r2, 0(%r1)
; nextln: br %r14
function %atomic_load_i16(i64) -> i16 {
block0(v0: i64):
v1 = atomic_load.i16 little v0
return v1
}
; check: lrvh %r2, 0(%r2)
; nextln: br %r14
function %atomic_load_i16_sym() -> i16 {
gv0 = symbol colocated %sym
block0:
v0 = symbol_value.i64 gv0
v1 = atomic_load.i16 little v0
return v1
}
; check: larl %r1, %sym + 0 ; lrvh %r2, 0(%r1)
; nextln: br %r14
function %atomic_load_i8(i64) -> i8 {
block0(v0: i64):
v1 = atomic_load.i8 little v0
return v1
}
; check: llc %r2, 0(%r2)
; nextln: br %r14

View File

@@ -0,0 +1,72 @@
test compile
target s390x
function %atomic_load_i64(i64) -> i64 {
block0(v0: i64):
v1 = atomic_load.i64 v0
return v1
}
; check: lg %r2, 0(%r2)
; nextln: br %r14
function %atomic_load_i64_sym() -> i64 {
gv0 = symbol colocated %sym
block0:
v0 = symbol_value.i64 gv0
v1 = atomic_load.i64 v0
return v1
}
; check: lgrl %r2, %sym + 0
; nextln: br %r14
function %atomic_load_i32(i64) -> i32 {
block0(v0: i64):
v1 = atomic_load.i32 v0
return v1
}
; check: l %r2, 0(%r2)
; nextln: br %r14
function %atomic_load_i32_sym() -> i32 {
gv0 = symbol colocated %sym
block0:
v0 = symbol_value.i64 gv0
v1 = atomic_load.i32 v0
return v1
}
; check: lrl %r2, %sym + 0
; nextln: br %r14
function %atomic_load_i16(i64) -> i16 {
block0(v0: i64):
v1 = atomic_load.i16 v0
return v1
}
; check: llh %r2, 0(%r2)
; nextln: br %r14
function %atomic_load_i16_sym() -> i16 {
gv0 = symbol colocated %sym
block0:
v0 = symbol_value.i64 gv0
v1 = atomic_load.i16 v0
return v1
}
; check: llhrl %r2, %sym + 0
; nextln: br %r14
function %atomic_load_i8(i64) -> i8 {
block0(v0: i64):
v1 = atomic_load.i8 v0
return v1
}
; check: llc %r2, 0(%r2)
; nextln: br %r14

View File

@@ -0,0 +1,114 @@
test compile
target s390x
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; ATOMIC_RMW (ADD)
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
function %atomic_rmw_add_i64(i64, i64) -> i64 {
block0(v0: i64, v1: i64):
v2 = atomic_rmw.i64 add v0, v1
return v2
}
; check: laag %r2, %r3, 0(%r2)
; nextln: br %r14
function %atomic_rmw_add_i32(i64, i32) -> i32 {
block0(v0: i64, v1: i32):
v2 = atomic_rmw.i32 add v0, v1
return v2
}
; check: laa %r2, %r3, 0(%r2)
; nextln: br %r14
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; ATOMIC_RMW (SUB)
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
function %atomic_rmw_sub_i64(i64, i64) -> i64 {
block0(v0: i64, v1: i64):
v2 = atomic_rmw.i64 sub v0, v1
return v2
}
; check: lcgr %r3, %r3
; nextln: laag %r2, %r3, 0(%r2)
; nextln: br %r14
function %atomic_rmw_sub_i32(i64, i32) -> i32 {
block0(v0: i64, v1: i32):
v2 = atomic_rmw.i32 sub v0, v1
return v2
}
; check: lcr %r3, %r3
; nextln: laa %r2, %r3, 0(%r2)
; nextln: br %r14
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; ATOMIC_RMW (AND)
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
function %atomic_rmw_and_i64(i64, i64) -> i64 {
block0(v0: i64, v1: i64):
v2 = atomic_rmw.i64 and v0, v1
return v2
}
; check: lang %r2, %r3, 0(%r2)
; nextln: br %r14
function %atomic_rmw_and_i32(i64, i32) -> i32 {
block0(v0: i64, v1: i32):
v2 = atomic_rmw.i32 and v0, v1
return v2
}
; check: lan %r2, %r3, 0(%r2)
; nextln: br %r14
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; ATOMIC_RMW (OR)
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
function %atomic_rmw_or_i64(i64, i64) -> i64 {
block0(v0: i64, v1: i64):
v2 = atomic_rmw.i64 or v0, v1
return v2
}
; check: laog %r2, %r3, 0(%r2)
; nextln: br %r14
function %atomic_rmw_or_i32(i64, i32) -> i32 {
block0(v0: i64, v1: i32):
v2 = atomic_rmw.i32 or v0, v1
return v2
}
; check: lao %r2, %r3, 0(%r2)
; nextln: br %r14
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; ATOMIC_RMW (XOR)
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
function %atomic_rmw_xor_i64(i64, i64) -> i64 {
block0(v0: i64, v1: i64):
v2 = atomic_rmw.i64 xor v0, v1
return v2
}
; check: laxg %r2, %r3, 0(%r2)
; nextln: br %r14
function %atomic_rmw_xor_i32(i64, i32) -> i32 {
block0(v0: i64, v1: i32):
v2 = atomic_rmw.i32 xor v0, v1
return v2
}
; check: lax %r2, %r3, 0(%r2)
; nextln: br %r14

View File

@@ -0,0 +1,125 @@
test compile
target s390x
function %atomic_store_i64(i64, i64) {
block0(v0: i64, v1: i64):
atomic_store.i64 little v0, v1
return
}
; check: strvg %r2, 0(%r3)
; nextln: bcr 14, 0
; nextln: br %r14
function %atomic_store_i64_sym(i64) {
gv0 = symbol colocated %sym
block0(v0: i64):
v1 = symbol_value.i64 gv0
atomic_store.i64 little v0, v1
return
}
; check: larl %r1, %sym + 0 ; strvg %r2, 0(%r1)
; nextln: bcr 14, 0
; nextln: br %r14
function %atomic_store_imm_i64(i64) {
block0(v0: i64):
v1 = iconst.i64 12345
atomic_store.i64 little v1, v0
return
}
; check: lghi %r3, 12345
; nextln: strvg %r3, 0(%r2)
; nextln: bcr 14, 0
; nextln: br %r14
function %atomic_store_i32(i32, i64) {
block0(v0: i32, v1: i64):
atomic_store.i32 little v0, v1
return
}
; check: strv %r2, 0(%r3)
; nextln: bcr 14, 0
; nextln: br %r14
function %atomic_store_i32_sym(i32) {
gv0 = symbol colocated %sym
block0(v0: i32):
v1 = symbol_value.i64 gv0
atomic_store.i32 little v0, v1
return
}
; check: larl %r1, %sym + 0 ; strv %r2, 0(%r1)
; nextln: bcr 14, 0
; nextln: br %r14
function %atomic_store_imm_i32(i64) {
block0(v0: i64):
v1 = iconst.i32 12345
atomic_store.i32 little v1, v0
return
}
; check: lhi %r3, 12345
; nextln: strv %r3, 0(%r2)
; nextln: bcr 14, 0
; nextln: br %r14
function %atomic_store_i16(i16, i64) {
block0(v0: i16, v1: i64):
atomic_store.i16 little v0, v1
return
}
; check: strvh %r2, 0(%r3)
; nextln: bcr 14, 0
; nextln: br %r14
function %atomic_store_i16_sym(i16) {
gv0 = symbol colocated %sym
block0(v0: i16):
v1 = symbol_value.i64 gv0
atomic_store.i16 little v0, v1
return
}
; check: larl %r1, %sym + 0 ; strvh %r2, 0(%r1)
; nextln: bcr 14, 0
; nextln: br %r14
function %atomic_store_imm_i16(i64) {
block0(v0: i64):
v1 = iconst.i16 12345
atomic_store.i16 little v1, v0
return
}
; check: mvhhi 0(%r2), 14640
; nextln: bcr 14, 0
; nextln: br %r14
function %atomic_store_i8(i8, i64) {
block0(v0: i8, v1: i64):
atomic_store.i8 little v0, v1
return
}
; check: stc %r2, 0(%r3)
; nextln: bcr 14, 0
; nextln: br %r14
function %atomic_store_imm_i8(i64) {
block0(v0: i64):
v1 = iconst.i8 123
atomic_store.i8 little v1, v0
return
}
; check: mvi 0(%r2), 123
; nextln: bcr 14, 0
; nextln: br %r14

View File

@@ -0,0 +1,123 @@
test compile
target s390x
function %atomic_store_i64(i64, i64) {
block0(v0: i64, v1: i64):
atomic_store.i64 v0, v1
return
}
; check: stg %r2, 0(%r3)
; nextln: bcr 14, 0
; nextln: br %r14
function %atomic_store_i64_sym(i64) {
gv0 = symbol colocated %sym
block0(v0: i64):
v1 = symbol_value.i64 gv0
atomic_store.i64 v0, v1
return
}
; check: stgrl %r2, %sym + 0
; nextln: bcr 14, 0
; nextln: br %r14
function %atomic_store_imm_i64(i64) {
block0(v0: i64):
v1 = iconst.i64 12345
atomic_store.i64 v1, v0
return
}
; check: mvghi 0(%r2), 12345
; nextln: bcr 14, 0
; nextln: br %r14
function %atomic_store_i32(i32, i64) {
block0(v0: i32, v1: i64):
atomic_store.i32 v0, v1
return
}
; check: st %r2, 0(%r3)
; nextln: bcr 14, 0
; nextln: br %r14
function %atomic_store_i32_sym(i32) {
gv0 = symbol colocated %sym
block0(v0: i32):
v1 = symbol_value.i64 gv0
atomic_store.i32 v0, v1
return
}
; check: strl %r2, %sym + 0
; nextln: bcr 14, 0
; nextln: br %r14
function %atomic_store_imm_i32(i64) {
block0(v0: i64):
v1 = iconst.i32 12345
atomic_store.i32 v1, v0
return
}
; check: mvhi 0(%r2), 12345
; nextln: bcr 14, 0
; nextln: br %r14
function %atomic_store_i16(i16, i64) {
block0(v0: i16, v1: i64):
atomic_store.i16 v0, v1
return
}
; check: sth %r2, 0(%r3)
; nextln: bcr 14, 0
; nextln: br %r14
function %atomic_store_i16_sym(i16) {
gv0 = symbol colocated %sym
block0(v0: i16):
v1 = symbol_value.i64 gv0
atomic_store.i16 v0, v1
return
}
; check: sthrl %r2, %sym + 0
; nextln: bcr 14, 0
; nextln: br %r14
function %atomic_store_imm_i16(i64) {
block0(v0: i64):
v1 = iconst.i16 12345
atomic_store.i16 v1, v0
return
}
; check: mvhhi 0(%r2), 12345
; nextln: bcr 14, 0
; nextln: br %r14
function %atomic_store_i8(i8, i64) {
block0(v0: i8, v1: i64):
atomic_store.i8 v0, v1
return
}
; check: stc %r2, 0(%r3)
; nextln: bcr 14, 0
; nextln: br %r14
function %atomic_store_imm_i8(i64) {
block0(v0: i64):
v1 = iconst.i8 123
atomic_store.i8 v1, v0
return
}
; check: mvi 0(%r2), 123
; nextln: bcr 14, 0
; nextln: br %r14

View File

@@ -0,0 +1,17 @@
test compile
target s390x
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; FENCE
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
function %fence() {
block0:
fence
return
}
; check: bcr 14, 0
; nextln: br %r14