diff --git a/cranelift/filetests/filetests/runtests/atomic-load-store.clif b/cranelift/filetests/filetests/runtests/atomic-load-store.clif new file mode 100644 index 0000000000..356ba1dc0b --- /dev/null +++ b/cranelift/filetests/filetests/runtests/atomic-load-store.clif @@ -0,0 +1,94 @@ +test interpret +test run +target x86_64 +target aarch64 +target riscv64 +target s390x + +function %i64_atomic_store_load(i64) -> i64 { + ss0 = explicit_slot 8 + +block0(v0: i64): + v1 = stack_addr.i64 ss0 + atomic_store.i64 v0, v1 + v2 = atomic_load.i64 v1 + return v2 +} +; run: %i64_atomic_store_load(0) == 0 +; run: %i64_atomic_store_load(-1) == -1 +; run: %i64_atomic_store_load(0x00000000_FFFFFFFF) == 0x00000000_FFFFFFFF +; run: %i64_atomic_store_load(0xFFFFFFFF_00000000) == 0xFFFFFFFF_00000000 +; run: %i64_atomic_store_load(0xFEDCBA98_76543210) == 0xFEDCBA98_76543210 +; run: %i64_atomic_store_load(0xA00A00A0_0A00A00A) == 0xA00A00A0_0A00A00A +; run: %i64_atomic_store_load(0xC0FFEEEE_DECAFFFF) == 0xC0FFEEEE_DECAFFFF + + +function %i32_atomic_store_load(i32) -> i32 { + ss0 = explicit_slot 4 + +block0(v0: i32): + v1 = stack_addr.i64 ss0 + atomic_store.i32 v0, v1 + v2 = atomic_load.i32 v1 + return v2 +} +; run: %i32_atomic_store_load(0) == 0 +; run: %i32_atomic_store_load(-1) == -1 +; run: %i32_atomic_store_load(0x0000FFFF) == 0x0000FFFF +; run: %i32_atomic_store_load(0xFFFF0000) == 0xFFFF0000 +; run: %i32_atomic_store_load(0xFEDC3210) == 0xFEDC3210 +; run: %i32_atomic_store_load(0xA00AA00A) == 0xA00AA00A +; run: %i32_atomic_store_load(0xC0FFEEEE) == 0xC0FFEEEE + + +function %i16_atomic_store_load(i16) -> i16 { + ss0 = explicit_slot 2 + +block0(v0: i16): + v1 = stack_addr.i64 ss0 + atomic_store.i16 v0, v1 + v2 = atomic_load.i16 v1 + return v2 +} +; run: %i16_atomic_store_load(0) == 0 +; run: %i16_atomic_store_load(-1) == -1 +; run: %i16_atomic_store_load(0x00FF) == 0x00FF +; run: %i16_atomic_store_load(0xFF00) == 0xFF00 +; run: %i16_atomic_store_load(0xFE10) == 0xFE10 +; run: %i16_atomic_store_load(0xA00A) == 0xA00A +; run: %i16_atomic_store_load(0xC0FF) == 0xC0FF + + +function %i8_atomic_store_load(i8) -> i8 { + ss0 = explicit_slot 1 + +block0(v0: i8): + v1 = stack_addr.i64 ss0 + atomic_store.i8 v0, v1 + v2 = atomic_load.i8 v1 + return v2 +} +; run: %i8_atomic_store_load(0) == 0 +; run: %i8_atomic_store_load(-1) == -1 +; run: %i8_atomic_store_load(0x0F) == 0x0F +; run: %i8_atomic_store_load(0xF0) == 0xF0 +; run: %i8_atomic_store_load(0xAA) == 0xAA +; run: %i8_atomic_store_load(0xC0) == 0xC0 + + +function %atomic_store_load_aligned(i64) -> i64 { + ss0 = explicit_slot 16 + +block0(v0: i64): + v1 = stack_addr.i64 ss0 + atomic_store.i64 aligned v0, v1 + v2 = atomic_load.i64 aligned v1 + return v2 +} +; run: %atomic_store_load_aligned(0) == 0 +; run: %atomic_store_load_aligned(-1) == -1 +; run: %atomic_store_load_aligned(0x00000000_FFFFFFFF) == 0x00000000_FFFFFFFF +; run: %atomic_store_load_aligned(0xFFFFFFFF_00000000) == 0xFFFFFFFF_00000000 +; run: %atomic_store_load_aligned(0xFEDCBA98_76543210) == 0xFEDCBA98_76543210 +; run: %atomic_store_load_aligned(0xA00A00A0_0A00A00A) == 0xA00A00A0_0A00A00A +; run: %atomic_store_load_aligned(0xC0FFEEEE_DECAFFFF) == 0xC0FFEEEE_DECAFFFF diff --git a/cranelift/filetests/filetests/runtests/fence.clif b/cranelift/filetests/filetests/runtests/fence.clif new file mode 100644 index 0000000000..e27c90e60b --- /dev/null +++ b/cranelift/filetests/filetests/runtests/fence.clif @@ -0,0 +1,18 @@ +test interpret +test run +target aarch64 +target s390x +target x86_64 +target riscv64 + +; Check that the fence instruction doesn't crash. Testing anything else would +; require multiple threads, which requires a runtime like Wasmtime. + +function %fence() -> i8 { +block0: + fence + + v0 = iconst.i8 0 + return v0 +} +; run: %fence() == 0 diff --git a/cranelift/fuzzgen/src/function_generator.rs b/cranelift/fuzzgen/src/function_generator.rs index d5b51c5c59..3e2af2e911 100644 --- a/cranelift/fuzzgen/src/function_generator.rs +++ b/cranelift/fuzzgen/src/function_generator.rs @@ -2,7 +2,6 @@ use crate::codegen::ir::{ArgumentExtension, ArgumentPurpose}; use crate::config::Config; use anyhow::Result; use arbitrary::{Arbitrary, Unstructured}; -use cranelift::codegen::ir::immediates::Offset32; use cranelift::codegen::ir::instructions::InstructionFormat; use cranelift::codegen::ir::stackslot::StackSize; use cranelift::codegen::ir::{types::*, FuncRef, LibCall, UserExternalName, UserFuncName}; @@ -223,27 +222,73 @@ fn insert_load_store( ) -> Result<()> { let ctrl_type = *rets.first().or(args.first()).unwrap(); let type_size = ctrl_type.bytes(); - let (address, offset) = fgen.generate_load_store_address(builder, type_size)?; - // TODO: More advanced MemFlags - let flags = MemFlags::new(); + // Should we generate an aligned address + let is_atomic = [Opcode::AtomicLoad, Opcode::AtomicStore].contains(&opcode); + let is_aarch64 = matches!(fgen.target_triple.architecture, Architecture::Aarch64(_)); + let aligned = if is_atomic && is_aarch64 { + // AArch64 has issues with unaligned atomics. + // https://github.com/bytecodealliance/wasmtime/issues/5483 + true + } else { + bool::arbitrary(fgen.u)? + }; + + let mut flags = MemFlags::new(); + // Even if we picked an aligned address, we can always generate unaligned memflags + if aligned && bool::arbitrary(fgen.u)? { + flags.set_aligned(); + } + // If the address is aligned, then we know it won't trap + if aligned && bool::arbitrary(fgen.u)? { + flags.set_notrap(); + } + + let (address, max_offset) = fgen.generate_load_store_address(builder, type_size, aligned)?; + + // Pick an offset to pass into the load/store. + let offset = if aligned { + 0 + } else { + fgen.u.int_in_range(0..=max_offset)? as i32 + } + .into(); // The variable being loaded or stored into let var = fgen.get_variable_of_type(ctrl_type)?; - if opcode.can_store() { - let val = builder.use_var(var); + match opcode.format() { + InstructionFormat::LoadNoOffset => { + let (inst, dfg) = builder + .ins() + .LoadNoOffset(opcode, ctrl_type, flags, address); - builder - .ins() - .Store(opcode, ctrl_type, flags, offset, val, address); - } else { - let (inst, dfg) = builder - .ins() - .Load(opcode, ctrl_type, flags, offset, address); + let new_val = dfg.first_result(inst); + builder.def_var(var, new_val); + } + InstructionFormat::StoreNoOffset => { + let val = builder.use_var(var); - let new_val = dfg.first_result(inst); - builder.def_var(var, new_val); + builder + .ins() + .StoreNoOffset(opcode, ctrl_type, flags, val, address); + } + InstructionFormat::Store => { + let val = builder.use_var(var); + + builder + .ins() + .Store(opcode, ctrl_type, flags, offset, val, address); + } + InstructionFormat::Load => { + let (inst, dfg) = builder + .ins() + .Load(opcode, ctrl_type, flags, offset, address); + + let new_val = dfg.first_result(inst); + builder.def_var(var, new_val); + } + _ => unimplemented!(), } Ok(()) @@ -1038,6 +1083,8 @@ const OPCODE_SIGNATURES: &[OpcodeSignature] = &[ (Opcode::Icmp, &[I32, I32], &[I8], insert_cmp), (Opcode::Icmp, &[I64, I64], &[I8], insert_cmp), (Opcode::Icmp, &[I128, I128], &[I8], insert_cmp), + // Fence + (Opcode::Fence, &[], &[], insert_opcode), // Stack Access (Opcode::StackStore, &[I8], &[], insert_stack_store), (Opcode::StackStore, &[I16], &[], insert_stack_store), @@ -1077,6 +1124,11 @@ const OPCODE_SIGNATURES: &[OpcodeSignature] = &[ // Opcode::Sload16x4 // Opcode::Uload32x2 // Opcode::Sload32x2 + // AtomicLoad + (Opcode::AtomicLoad, &[], &[I8], insert_load_store), + (Opcode::AtomicLoad, &[], &[I16], insert_load_store), + (Opcode::AtomicLoad, &[], &[I32], insert_load_store), + (Opcode::AtomicLoad, &[], &[I64], insert_load_store), // Stores (Opcode::Store, &[I8], &[], insert_load_store), (Opcode::Store, &[I16], &[], insert_load_store), @@ -1092,6 +1144,11 @@ const OPCODE_SIGNATURES: &[OpcodeSignature] = &[ (Opcode::Istore16, &[I32], &[], insert_load_store), (Opcode::Istore16, &[I64], &[], insert_load_store), (Opcode::Istore32, &[I64], &[], insert_load_store), + // AtomicStore + (Opcode::AtomicStore, &[I8], &[], insert_load_store), + (Opcode::AtomicStore, &[I16], &[], insert_load_store), + (Opcode::AtomicStore, &[I32], &[], insert_load_store), + (Opcode::AtomicStore, &[I64], &[], insert_load_store), // Bitcast (Opcode::Bitcast, &[F32], &[I32], insert_bitcast), (Opcode::Bitcast, &[I32], &[F32], insert_bitcast), @@ -1277,34 +1334,42 @@ where /// we don't run the risk of returning them from a function, which would make the fuzzer /// complain since they are different from the interpreter to the backend. /// - /// The address is not guaranteed to be valid, but there's a chance that it is. + /// `min_size`: Controls the amount of space that the address should have. /// - /// `min_size`: Controls the amount of space that the address should have.This is not - /// guaranteed to be respected + /// `aligned`: When passed as true, the resulting address is guaranteed to be aligned + /// on an 8 byte boundary. + /// + /// Returns a valid address and the maximum possible offset that still respects `min_size`. fn generate_load_store_address( &mut self, builder: &mut FunctionBuilder, min_size: u32, - ) -> Result<(Value, Offset32)> { + aligned: bool, + ) -> Result<(Value, u32)> { // TODO: Currently our only source of addresses is stack_addr, but we // should add global_value, symbol_value eventually let (addr, available_size) = { let (ss, slot_size) = self.stack_slot_with_size(min_size)?; - let max_offset = slot_size.saturating_sub(min_size); - let offset = self.u.int_in_range(0..=max_offset)? as i32; - let base_addr = builder.ins().stack_addr(I64, ss, offset); - let available_size = (slot_size as i32).saturating_sub(offset); + + // stack_slot_with_size guarantees that slot_size >= min_size + let max_offset = slot_size - min_size; + let offset = if aligned { + self.u.int_in_range(0..=max_offset / min_size)? * min_size + } else { + self.u.int_in_range(0..=max_offset)? + }; + + let base_addr = builder.ins().stack_addr(I64, ss, offset as i32); + let available_size = slot_size.saturating_sub(offset); (base_addr, available_size) }; // TODO: Insert a bunch of amode opcodes here to modify the address! // Now that we have an address and a size, we just choose a random offset to return to the - // caller. Try to preserve min_size bytes. - let max_offset = available_size.saturating_sub(min_size as i32); - let offset = self.u.int_in_range(0..=max_offset)? as i32; - - Ok((addr, offset.into())) + // caller. Preserving min_size bytes. + let max_offset = available_size.saturating_sub(min_size); + Ok((addr, max_offset)) } /// Get a variable of type `ty` from the current function diff --git a/cranelift/interpreter/src/step.rs b/cranelift/interpreter/src/step.rs index 8696c57ab5..365efc9634 100644 --- a/cranelift/interpreter/src/step.rs +++ b/cranelift/interpreter/src/step.rs @@ -1162,9 +1162,27 @@ where Opcode::Iconcat => assign(Value::concat(arg(0)?, arg(1)?)?), Opcode::AtomicRmw => unimplemented!("AtomicRmw"), Opcode::AtomicCas => unimplemented!("AtomicCas"), - Opcode::AtomicLoad => unimplemented!("AtomicLoad"), - Opcode::AtomicStore => unimplemented!("AtomicStore"), - Opcode::Fence => unimplemented!("Fence"), + Opcode::AtomicLoad => { + let load_ty = inst_context.controlling_type().unwrap(); + let addr = arg(0)?.into_int()? as u64; + // We are doing a regular load here, this isn't actually thread safe. + assign_or_memtrap( + Address::try_from(addr).and_then(|addr| state.checked_load(addr, load_ty)), + ) + } + Opcode::AtomicStore => { + let val = arg(0)?; + let addr = arg(1)?.into_int()? as u64; + // We are doing a regular store here, this isn't actually thread safe. + continue_or_memtrap( + Address::try_from(addr).and_then(|addr| state.checked_store(addr, val)), + ) + } + Opcode::Fence => { + // The interpreter always runs in a single threaded context, so we don't + // actually need to emit a fence here. + ControlFlow::Continue + } Opcode::WideningPairwiseDotProductS => { let ctrl_ty = types::I16X8; let new_type = ctrl_ty.merge_lanes().unwrap();