diff --git a/cranelift/frontend/src/frontend.rs b/cranelift/frontend/src/frontend.rs index f81ccc88bb..a5803cbcf6 100644 --- a/cranelift/frontend/src/frontend.rs +++ b/cranelift/frontend/src/frontend.rs @@ -1,7 +1,6 @@ //! A frontend for building Cranelift IR from other languages. use crate::ssa::{SSABlock, SSABuilder, SideEffects}; use crate::variable::Variable; -use alloc::vec::Vec; use cranelift_codegen::cursor::{Cursor, FuncCursor}; use cranelift_codegen::entity::{EntitySet, SecondaryMap}; use cranelift_codegen::ir; @@ -626,8 +625,15 @@ impl<'a> FunctionBuilder<'a> { self.ins().call(libc_memcpy, &[dest, src, size]); } - /// Optimised memcpy for small copies. - pub fn emit_small_memcpy( + /// Optimised memcpy or memmove for small copies. + /// + /// # Codegen safety + /// + /// The following properties must hold to prevent UB: + /// + /// * `src_align` and `dest_align` are an upper-bound on the alignment of `src` respectively `dest`. + /// * If `non_overlapping` is true, then this must be correct. + pub fn emit_small_memory_copy( &mut self, config: TargetFrontendConfig, dest: Value, @@ -635,6 +641,7 @@ impl<'a> FunctionBuilder<'a> { size: u64, dest_align: u8, src_align: u8, + non_overlapping: bool, ) { // Currently the result of guess work, not actual profiling. const THRESHOLD: u64 = 4; @@ -663,16 +670,27 @@ impl<'a> FunctionBuilder<'a> { if load_and_store_amount > THRESHOLD { let size_value = self.ins().iconst(config.pointer_type(), size as i64); - self.call_memcpy(config, dest, src, size_value); + if non_overlapping { + self.call_memcpy(config, dest, src, size_value); + } else { + self.call_memmove(config, dest, src, size_value); + } return; } let mut flags = MemFlags::new(); flags.set_aligned(); - for i in 0..load_and_store_amount { - let offset = (access_size * i) as i32; - let value = self.ins().load(int_type, flags, src, offset); + // Load all of the memory first. This is necessary in case `dest` overlaps. + // It can also improve performance a bit. + let registers: smallvec::SmallVec<[_; THRESHOLD as usize]> = (0..load_and_store_amount) + .map(|i| { + let offset = (access_size * i) as i32; + (self.ins().load(int_type, flags, src, offset), offset) + }) + .collect(); + + for (value, offset) in registers { self.ins().store(flags, value, dest, offset); } } @@ -798,55 +816,6 @@ impl<'a> FunctionBuilder<'a> { self.ins().call(libc_memmove, &[dest, source, size]); } - - /// Optimised memmove for small moves. - pub fn emit_small_memmove( - &mut self, - config: TargetFrontendConfig, - dest: Value, - src: Value, - size: u64, - dest_align: u8, - src_align: u8, - ) { - // Currently the result of guess work, not actual profiling. - const THRESHOLD: u64 = 4; - - let access_size = greatest_divisible_power_of_two(size); - assert!( - access_size.is_power_of_two(), - "`size` is not a power of two" - ); - assert!( - access_size >= u64::from(::core::cmp::min(src_align, dest_align)), - "`size` is smaller than `dest` and `src`'s alignment value." - ); - let load_and_store_amount = size / access_size; - - if load_and_store_amount > THRESHOLD { - let size_value = self.ins().iconst(config.pointer_type(), size as i64); - self.call_memmove(config, dest, src, size_value); - return; - } - - let mut flags = MemFlags::new(); - flags.set_aligned(); - - // Load all of the memory first in case `dest` overlaps. - let registers: Vec<_> = (0..load_and_store_amount) - .map(|i| { - let offset = (access_size * i) as i32; - ( - self.ins().load(config.pointer_type(), flags, src, offset), - offset, - ) - }) - .collect(); - - for (value, offset) in registers { - self.ins().store(flags, value, dest, offset); - } - } } fn greatest_divisible_power_of_two(size: u64) -> u64 { @@ -1104,7 +1073,7 @@ block0: let src = builder.use_var(x); let dest = builder.use_var(y); let size = 8; - builder.emit_small_memcpy(target.frontend_config(), dest, src, size, 8, 8); + builder.emit_small_memory_copy(target.frontend_config(), dest, src, size, 8, 8, true); builder.ins().return_(&[dest]); builder.seal_all_blocks(); @@ -1161,7 +1130,7 @@ block0: let src = builder.use_var(x); let dest = builder.use_var(y); let size = 8192; - builder.emit_small_memcpy(target.frontend_config(), dest, src, size, 8, 8); + builder.emit_small_memory_copy(target.frontend_config(), dest, src, size, 8, 8, true); builder.ins().return_(&[dest]); builder.seal_all_blocks();