diff --git a/cranelift/codegen/meta/src/shared/instructions.rs b/cranelift/codegen/meta/src/shared/instructions.rs index 04cbc87065..ffded34f95 100644 --- a/cranelift/codegen/meta/src/shared/instructions.rs +++ b/cranelift/codegen/meta/src/shared/instructions.rs @@ -4600,7 +4600,8 @@ pub(crate) fn define( r#" Atomically load from memory at `p`. - It should only be used for integer types with 32 or 64 bits. + This is a polymorphic instruction that can load any value type which has a memory + representation. It should only be used for integer types with 8, 16, 32 or 64 bits. This operation is sequentially consistent and creates happens-before edges that order normal (non-atomic) loads and stores. "#, @@ -4612,124 +4613,14 @@ pub(crate) fn define( .other_side_effects(true), ); - ig.push( - Inst::new( - "atomic_uload8", - r#" - Atomically load 8 bits from memory at `p` and zero-extend to either 32 or 64 bits. - - This is equivalent to ``load.i8`` followed by ``uextend``. - - This operation is sequentially consistent and creates happens-before edges that order - normal (non-atomic) loads and stores. - "#, - &formats.load_no_offset, - ) - .operands_in(vec![MemFlags, p]) - .operands_out(vec![a]) - .can_load(true) - .other_side_effects(true), - ); - - ig.push( - Inst::new( - "atomic_uload16", - r#" - Atomically load 16 bits from memory at `p` and zero-extend to either 32 or 64 bits. - - This is equivalent to ``load.i16`` followed by ``uextend``. - - This operation is sequentially consistent and creates - happens-before edges that order normal (non-atomic) loads and stores. - "#, - &formats.load_no_offset, - ) - .operands_in(vec![MemFlags, p]) - .operands_out(vec![a]) - .can_load(true) - .other_side_effects(true), - ); - - ig.push( - Inst::new( - "atomic_uload32", - r#" - Atomically load 32 bits from memory at `p` and zero-extend to 64 bits. - - This is equivalent to ``load.i32`` followed by ``uextend``. - - This operation is sequentially consistent and creates - happens-before edges that order normal (non-atomic) loads and stores. - "#, - &formats.load_no_offset, - ) - .operands_in(vec![MemFlags, p]) - .operands_out(vec![a]) - .can_load(true) - .other_side_effects(true), - ); - ig.push( Inst::new( "atomic_store", r#" Atomically store `x` to memory at `p`. - This is a polymorphic instruction that can store a 32 or 64-bit value. - This operation is sequentially consistent and creates happens-before edges that order - normal (non-atomic) loads and stores. - "#, - &formats.store_no_offset, - ) - .operands_in(vec![MemFlags, x, p]) - .can_store(true) - .other_side_effects(true), - ); - - ig.push( - Inst::new( - "atomic_store8", - r#" - Atomically store the low 8 bits of `x` to memory at `p`. - - This is equivalent to ``ireduce.i8`` followed by ``store.i8``. - - This operation is sequentially consistent and creates happens-before edges that order - normal (non-atomic) loads and stores. - "#, - &formats.store_no_offset, - ) - .operands_in(vec![MemFlags, x, p]) - .can_store(true) - .other_side_effects(true), - ); - - ig.push( - Inst::new( - "atomic_store16", - r#" - Atomically store the low 16 bits of `x` to memory at `p`. - - This is equivalent to ``ireduce.i16`` followed by ``store.i16``. - - This operation is sequentially consistent and creates happens-before edges that order - normal (non-atomic) loads and stores. - "#, - &formats.store_no_offset, - ) - .operands_in(vec![MemFlags, x, p]) - .can_store(true) - .other_side_effects(true), - ); - - ig.push( - Inst::new( - "atomic_store32", - r#" - Atomically store the low 32 bits of `x` to memory at `p`. - - This is equivalent to ``ireduce.i32`` followed by ``store.i32``. - + This is a polymorphic instruction that can store any value type with a memory + representation. It should only be used for integer types with 8, 16, 32 or 64 bits. This operation is sequentially consistent and creates happens-before edges that order normal (non-atomic) loads and stores. "#, diff --git a/cranelift/codegen/src/isa/aarch64/lower.rs b/cranelift/codegen/src/isa/aarch64/lower.rs index 12535cf382..6e64654568 100644 --- a/cranelift/codegen/src/isa/aarch64/lower.rs +++ b/cranelift/codegen/src/isa/aarch64/lower.rs @@ -1740,6 +1740,22 @@ pub(crate) fn is_valid_atomic_transaction_ty(ty: Type) -> bool { } } +pub(crate) fn emit_atomic_load>( + ctx: &mut C, + rt: Writable, + insn: IRInst, +) { + assert!(ctx.data(insn).opcode() == Opcode::AtomicLoad); + let inputs = insn_inputs(ctx, insn); + let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); + let access_ty = ctx.output_ty(insn, 0); + assert!(is_valid_atomic_transaction_ty(access_ty)); + // We're ignoring the result type of the load because the LoadAcquire will + // explicitly zero extend to the nearest word, and also zero the high half + // of an X register. + ctx.emit(Inst::LoadAcquire { access_ty, rt, rn }); +} + fn load_op_to_ty(op: Opcode) -> Option { match op { Opcode::Sload8 | Opcode::Uload8 | Opcode::Sload8Complex | Opcode::Uload8Complex => Some(I8), diff --git a/cranelift/codegen/src/isa/aarch64/lower_inst.rs b/cranelift/codegen/src/isa/aarch64/lower_inst.rs index 8a4df2026b..3ddc3712a0 100644 --- a/cranelift/codegen/src/isa/aarch64/lower_inst.rs +++ b/cranelift/codegen/src/isa/aarch64/lower_inst.rs @@ -521,6 +521,19 @@ pub(crate) fn lower_insn_to_regs>( } Opcode::Uextend | Opcode::Sextend => { + if op == Opcode::Uextend { + let inputs = ctx.get_input_as_source_or_const(inputs[0].insn, inputs[0].input); + if let Some((atomic_load, 0)) = inputs.inst { + if ctx.data(atomic_load).opcode() == Opcode::AtomicLoad { + let output_ty = ty.unwrap(); + assert!(output_ty == I32 || output_ty == I64); + let rt = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); + emit_atomic_load(ctx, rt, atomic_load); + ctx.sink_inst(atomic_load); + return Ok(()); + } + } + } let output_ty = ty.unwrap(); let input_ty = ctx.input_ty(insn, 0); let from_bits = ty_bits(input_ty) as u8; @@ -1522,38 +1535,15 @@ pub(crate) fn lower_insn_to_regs>( } } - Opcode::AtomicLoad - | Opcode::AtomicUload8 - | Opcode::AtomicUload16 - | Opcode::AtomicUload32 => { + Opcode::AtomicLoad => { let rt = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); - let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); - let ty = ty.unwrap(); - let access_ty = match op { - Opcode::AtomicLoad => ty, - Opcode::AtomicUload8 => I8, - Opcode::AtomicUload16 => I16, - Opcode::AtomicUload32 => I32, - _ => panic!(), - }; - assert!(is_valid_atomic_transaction_ty(access_ty)); - ctx.emit(Inst::LoadAcquire { access_ty, rt, rn }); + emit_atomic_load(ctx, rt, insn); } - Opcode::AtomicStore - | Opcode::AtomicStore32 - | Opcode::AtomicStore16 - | Opcode::AtomicStore8 => { + Opcode::AtomicStore => { let rt = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); let rn = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None); - let ty = ctx.input_ty(insn, 0); - let access_ty = match op { - Opcode::AtomicStore => ty, - Opcode::AtomicStore32 => I32, - Opcode::AtomicStore16 => I16, - Opcode::AtomicStore8 => I8, - _ => unreachable!(), - }; + let access_ty = ctx.input_ty(insn, 0); assert!(is_valid_atomic_transaction_ty(access_ty)); ctx.emit(Inst::StoreRelease { access_ty, rt, rn }); } diff --git a/cranelift/codegen/src/isa/s390x/lower.rs b/cranelift/codegen/src/isa/s390x/lower.rs index 8ff375788a..b13edc4bb2 100644 --- a/cranelift/codegen/src/isa/s390x/lower.rs +++ b/cranelift/codegen/src/isa/s390x/lower.rs @@ -2734,61 +2734,37 @@ fn lower_insn_to_regs>( ctx.emit(Inst::AtomicCas64 { rd, rn, mem }); } } - Opcode::AtomicLoad - | Opcode::AtomicUload8 - | Opcode::AtomicUload16 - | Opcode::AtomicUload32 => { + Opcode::AtomicLoad => { let flags = ctx.memflags(insn).unwrap(); let endianness = flags.endianness(Endianness::Big); let ty = ty.unwrap(); - let access_ty = match op { - Opcode::AtomicLoad => ty, - Opcode::AtomicUload8 => types::I8, - Opcode::AtomicUload16 => types::I16, - Opcode::AtomicUload32 => types::I32, - _ => unreachable!(), - }; - assert!(is_valid_atomic_transaction_ty(access_ty)); + assert!(is_valid_atomic_transaction_ty(ty)); let mem = lower_address(ctx, &inputs[..], 0, flags); let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); if endianness == Endianness::Big { - ctx.emit(match (ty_bits(access_ty), ty_bits(ty)) { - (8, 32) => Inst::Load32ZExt8 { rd, mem }, - (8, 64) => Inst::Load64ZExt8 { rd, mem }, - (16, 32) => Inst::Load32ZExt16 { rd, mem }, - (16, 64) => Inst::Load64ZExt16 { rd, mem }, - (32, 32) => Inst::Load32 { rd, mem }, - (32, 64) => Inst::Load64ZExt32 { rd, mem }, - (64, 64) => Inst::Load64 { rd, mem }, + ctx.emit(match ty_bits(ty) { + 8 => Inst::Load32ZExt8 { rd, mem }, + 16 => Inst::Load32ZExt16 { rd, mem }, + 32 => Inst::Load32 { rd, mem }, + 64 => Inst::Load64 { rd, mem }, _ => panic!("Unsupported size in load"), }); } else { - ctx.emit(match (ty_bits(access_ty), ty_bits(ty)) { - (8, 32) => Inst::Load32ZExt8 { rd, mem }, - (8, 64) => Inst::Load64ZExt8 { rd, mem }, - (16, 32) => Inst::LoadRev16 { rd, mem }, - (32, 32) => Inst::LoadRev32 { rd, mem }, - (64, 64) => Inst::LoadRev64 { rd, mem }, + ctx.emit(match ty_bits(ty) { + 8 => Inst::Load32ZExt8 { rd, mem }, + 16 => Inst::LoadRev16 { rd, mem }, + 32 => Inst::LoadRev32 { rd, mem }, + 64 => Inst::LoadRev64 { rd, mem }, _ => panic!("Unsupported size in load"), }); } } - Opcode::AtomicStore - | Opcode::AtomicStore32 - | Opcode::AtomicStore16 - | Opcode::AtomicStore8 => { + Opcode::AtomicStore => { let flags = ctx.memflags(insn).unwrap(); let endianness = flags.endianness(Endianness::Big); - let data_ty = ctx.input_ty(insn, 0); - let ty = match op { - Opcode::AtomicStore => data_ty, - Opcode::AtomicStore32 => types::I32, - Opcode::AtomicStore16 => types::I16, - Opcode::AtomicStore8 => types::I8, - _ => unreachable!(), - }; + let ty = ctx.input_ty(insn, 0); assert!(is_valid_atomic_transaction_ty(ty)); let mem = lower_address(ctx, &inputs[1..], 0, flags); diff --git a/cranelift/codegen/src/isa/x64/lower.rs b/cranelift/codegen/src/isa/x64/lower.rs index 19433dc71e..b4c05cee8f 100644 --- a/cranelift/codegen/src/isa/x64/lower.rs +++ b/cranelift/codegen/src/isa/x64/lower.rs @@ -5825,10 +5825,7 @@ fn lower_insn_to_regs>( ctx.emit(Inst::gen_move(dst, regs::rax(), types::I64)); } - Opcode::AtomicLoad - | Opcode::AtomicUload8 - | Opcode::AtomicUload16 - | Opcode::AtomicUload32 => { + Opcode::AtomicLoad => { // This is a normal load. The x86-TSO memory model provides sufficient sequencing // to satisfy the CLIF synchronisation requirements for `AtomicLoad` without the // need for any fence instructions. @@ -5850,21 +5847,11 @@ fn lower_insn_to_regs>( } } - Opcode::AtomicStore - | Opcode::AtomicStore32 - | Opcode::AtomicStore16 - | Opcode::AtomicStore8 => { + Opcode::AtomicStore => { // This is a normal store, followed by an `mfence` instruction. let data = put_input_in_reg(ctx, inputs[0]); let addr = lower_to_amode(ctx, inputs[1], 0); - let data_ty = ctx.input_ty(insn, 0); - let ty_access = match op { - Opcode::AtomicStore => data_ty, - Opcode::AtomicStore32 => types::I32, - Opcode::AtomicStore16 => types::I16, - Opcode::AtomicStore8 => types::I8, - _ => unreachable!(), - }; + let ty_access = ctx.input_ty(insn, 0); assert!(is_valid_atomic_transaction_ty(ty_access)); ctx.emit(Inst::store(ty_access, data, addr)); diff --git a/cranelift/filetests/filetests/isa/aarch64/atomic_load.clif b/cranelift/filetests/filetests/isa/aarch64/atomic_load.clif index 31af721015..cc69eef05c 100644 --- a/cranelift/filetests/filetests/isa/aarch64/atomic_load.clif +++ b/cranelift/filetests/filetests/isa/aarch64/atomic_load.clif @@ -21,50 +21,75 @@ block0(v0: i64): ; nextln: ldp fp, lr, [sp], #16 ; nextln: ret -function %atomic_uload_i32_i64(i64) -> i64 { +function %atomic_load_i16(i64) -> i16 { block0(v0: i64): - v1 = atomic_uload32.i64 v0 + v1 = atomic_load.i16 v0 return v1 } +; check: ldarh w0, [x0] +; nextln: ldp fp, lr, [sp], #16 +; nextln: ret + +function %atomic_load_i8(i64) -> i8 { +block0(v0: i64): + v1 = atomic_load.i8 v0 + return v1 +} + +; check: ldarb w0, [x0] +; nextln: ldp fp, lr, [sp], #16 +; nextln: ret + +function %atomic_load_i32_i64(i64) -> i64 { +block0(v0: i64): + v1 = atomic_load.i32 v0 + v2 = uextend.i64 v1 + return v2 +} + ; check: ldar w0, [x0] ; nextln: ldp fp, lr, [sp], #16 ; nextln: ret -function %atomic_uload_i16_i32(i64) -> i32 { +function %atomic_load_i16_i64(i64) -> i64 { block0(v0: i64): - v1 = atomic_uload16.i32 v0 - return v1 + v1 = atomic_load.i16 v0 + v2 = uextend.i64 v1 + return v2 } ; check: ldarh w0, [x0] ; nextln: ldp fp, lr, [sp], #16 ; nextln: ret -function %atomic_uload_i16_i64(i64) -> i64 { +function %atomic_load_i8_i64(i64) -> i64 { block0(v0: i64): - v1 = atomic_uload16.i64 v0 - return v1 + v1 = atomic_load.i8 v0 + v2 = uextend.i64 v1 + return v2 +} + +; check: ldarb w0, [x0] +; nextln: ldp fp, lr, [sp], #16 +; nextln: ret + +function %atomic_load_i16_i32(i64) -> i32 { +block0(v0: i64): + v1 = atomic_load.i16 v0 + v2 = uextend.i32 v1 + return v2 } ; check: ldarh w0, [x0] ; nextln: ldp fp, lr, [sp], #16 ; nextln: ret -function %atomic_uload_i8_i32(i64) -> i32 { +function %atomic_load_i8_i32(i64) -> i32 { block0(v0: i64): - v1 = atomic_uload8.i32 v0 - return v1 -} - -; check: ldarb w0, [x0] -; nextln: ldp fp, lr, [sp], #16 -; nextln: ret - -function %atomic_uload_i8_i64(i64) -> i64 { -block0(v0: i64): - v1 = atomic_uload8.i64 v0 - return v1 + v1 = atomic_load.i8 v0 + v2 = uextend.i32 v1 + return v2 } ; check: ldarb w0, [x0] diff --git a/cranelift/filetests/filetests/isa/aarch64/atomic_store.clif b/cranelift/filetests/filetests/isa/aarch64/atomic_store.clif index 9c0cd529d7..89d3745278 100644 --- a/cranelift/filetests/filetests/isa/aarch64/atomic_store.clif +++ b/cranelift/filetests/filetests/isa/aarch64/atomic_store.clif @@ -21,52 +21,82 @@ block0(v0: i32, v1: i64): ; nextln: ldp fp, lr, [sp], #16 ; nextln: ret -function %atomic_ustore_i32_i64(i64, i64) { -block0(v0: i64, v1: i64): - atomic_store32.i64 v0, v1 +function %atomic_store_i16(i16, i64) { +block0(v0: i16, v1: i64): + atomic_store.i16 v0, v1 return } +; check: stlrh w0, [x1] +; nextln: ldp fp, lr, [sp], #16 +; nextln: ret + +function %atomic_store_i8(i8, i64) { +block0(v0: i8, v1: i64): + atomic_store.i8 v0, v1 + return +} + +; check: stlrb w0, [x1] +; nextln: ldp fp, lr, [sp], #16 +; nextln: ret + +function %atomic_store_i64_i32(i64, i64) { +block0(v0: i64, v1: i64): + v2 = ireduce.i32 v0 + atomic_store.i32 v2, v1 + return +} + +; check-not: uxt ; check: stlr w0, [x1] ; nextln: ldp fp, lr, [sp], #16 ; nextln: ret -function %atomic_ustore_i16_i32(i32, i64) { -block0(v0: i32, v1: i64): - atomic_store16.i32 v0, v1 +function %atomic_store_i64_i16(i64, i64) { +block0(v0: i64, v1: i64): + v2 = ireduce.i16 v0 + atomic_store.i16 v2, v1 return } +; check-not: uxt ; check: stlrh w0, [x1] ; nextln: ldp fp, lr, [sp], #16 ; nextln: ret -function %atomic_ustore_i16_i64(i64, i64) { +function %atomic_store_i64_i8(i64, i64) { block0(v0: i64, v1: i64): - atomic_store16.i64 v0, v1 + v2 = ireduce.i8 v0 + atomic_store.i8 v2, v1 return } +; check-not: uxt +; check: stlrb w0, [x1] +; nextln: ldp fp, lr, [sp], #16 +; nextln: ret + +function %atomic_store_i32_i16(i32, i64) { +block0(v0: i32, v1: i64): + v2 = ireduce.i16 v0 + atomic_store.i16 v2, v1 + return +} + +; check-not: uxt ; check: stlrh w0, [x1] ; nextln: ldp fp, lr, [sp], #16 ; nextln: ret -function %atomic_ustore_i8_i32(i32, i64) { +function %atomic_store_i32_i8(i32, i64) { block0(v0: i32, v1: i64): - atomic_store8.i32 v0, v1 - return -} - -; check: stlrb w0, [x1] -; nextln: ldp fp, lr, [sp], #16 -; nextln: ret - -function %atomic_ustore_i8_i64(i64, i64) { -block0(v0: i64, v1: i64): - atomic_store8.i64 v0, v1 + v2 = ireduce.i8 v0 + atomic_store.i8 v2, v1 return } +; check-not: uxt ; check: stlrb w0, [x1] ; nextln: ldp fp, lr, [sp], #16 ; nextln: ret diff --git a/cranelift/filetests/filetests/isa/s390x/atomic_load-little.clif b/cranelift/filetests/filetests/isa/s390x/atomic_load-little.clif index 629c432370..5556176bbb 100644 --- a/cranelift/filetests/filetests/isa/s390x/atomic_load-little.clif +++ b/cranelift/filetests/filetests/isa/s390x/atomic_load-little.clif @@ -41,29 +41,29 @@ block0: ; check: larl %r1, %sym + 0 ; lrv %r2, 0(%r1) ; nextln: br %r14 -function %atomic_load_i16(i64) -> i32 { +function %atomic_load_i16(i64) -> i16 { block0(v0: i64): - v1 = atomic_uload16.i32 little v0 + v1 = atomic_load.i16 little v0 return v1 } ; check: lrvh %r2, 0(%r2) ; nextln: br %r14 -function %atomic_load_i16_sym() -> i32 { +function %atomic_load_i16_sym() -> i16 { gv0 = symbol colocated %sym block0: v0 = symbol_value.i64 gv0 - v1 = atomic_uload16.i32 little v0 + v1 = atomic_load.i16 little v0 return v1 } ; check: larl %r1, %sym + 0 ; lrvh %r2, 0(%r1) ; nextln: br %r14 -function %atomic_load_i8(i64) -> i32 { +function %atomic_load_i8(i64) -> i8 { block0(v0: i64): - v1 = atomic_uload8.i32 little v0 + v1 = atomic_load.i8 little v0 return v1 } diff --git a/cranelift/filetests/filetests/isa/s390x/atomic_load.clif b/cranelift/filetests/filetests/isa/s390x/atomic_load.clif index 9a58de52d1..b361aaa4c7 100644 --- a/cranelift/filetests/filetests/isa/s390x/atomic_load.clif +++ b/cranelift/filetests/filetests/isa/s390x/atomic_load.clif @@ -41,29 +41,29 @@ block0: ; check: lrl %r2, %sym + 0 ; nextln: br %r14 -function %atomic_load_i16(i64) -> i32 { +function %atomic_load_i16(i64) -> i16 { block0(v0: i64): - v1 = atomic_uload16.i32 v0 + v1 = atomic_load.i16 v0 return v1 } ; check: llh %r2, 0(%r2) ; nextln: br %r14 -function %atomic_load_i16_sym() -> i32 { +function %atomic_load_i16_sym() -> i16 { gv0 = symbol colocated %sym block0: v0 = symbol_value.i64 gv0 - v1 = atomic_uload16.i32 v0 + v1 = atomic_load.i16 v0 return v1 } ; check: llhrl %r2, %sym + 0 ; nextln: br %r14 -function %atomic_load_i8(i64) -> i32 { +function %atomic_load_i8(i64) -> i8 { block0(v0: i64): - v1 = atomic_uload8.i32 v0 + v1 = atomic_load.i8 v0 return v1 } diff --git a/cranelift/interpreter/src/step.rs b/cranelift/interpreter/src/step.rs index 2b892de57c..c9c0372980 100644 --- a/cranelift/interpreter/src/step.rs +++ b/cranelift/interpreter/src/step.rs @@ -625,14 +625,8 @@ where Opcode::Iconcat => assign(Value::concat(arg(0)?, arg(1)?)?), Opcode::AtomicRmw => unimplemented!("AtomicRmw"), Opcode::AtomicCas => unimplemented!("AtomicCas"), - Opcode::AtomicLoad - | Opcode::AtomicUload8 - | Opcode::AtomicUload16 - | Opcode::AtomicUload32 => unimplemented!("AtomicLoad"), - Opcode::AtomicStore - | Opcode::AtomicStore8 - | Opcode::AtomicStore16 - | Opcode::AtomicStore32 => unimplemented!("AtomicStore"), + Opcode::AtomicLoad => unimplemented!("AtomicLoad"), + Opcode::AtomicStore => unimplemented!("AtomicStore"), Opcode::Fence => unimplemented!("Fence"), Opcode::WideningPairwiseDotProductS => unimplemented!("WideningPairwiseDotProductS"), Opcode::SqmulRoundSat => unimplemented!("SqmulRoundSat"),