Merge emit_small_memcpy and emit_small_memmove (#1301)
* Merge emit_small_memcpy and emit_small_memmove * Fix typo
This commit is contained in:
@@ -1,7 +1,6 @@
|
||||
//! A frontend for building Cranelift IR from other languages.
|
||||
use crate::ssa::{SSABlock, SSABuilder, SideEffects};
|
||||
use crate::variable::Variable;
|
||||
use alloc::vec::Vec;
|
||||
use cranelift_codegen::cursor::{Cursor, FuncCursor};
|
||||
use cranelift_codegen::entity::{EntitySet, SecondaryMap};
|
||||
use cranelift_codegen::ir;
|
||||
@@ -626,8 +625,15 @@ impl<'a> FunctionBuilder<'a> {
|
||||
self.ins().call(libc_memcpy, &[dest, src, size]);
|
||||
}
|
||||
|
||||
/// Optimised memcpy for small copies.
|
||||
pub fn emit_small_memcpy(
|
||||
/// Optimised memcpy or memmove for small copies.
|
||||
///
|
||||
/// # Codegen safety
|
||||
///
|
||||
/// The following properties must hold to prevent UB:
|
||||
///
|
||||
/// * `src_align` and `dest_align` are an upper-bound on the alignment of `src` respectively `dest`.
|
||||
/// * If `non_overlapping` is true, then this must be correct.
|
||||
pub fn emit_small_memory_copy(
|
||||
&mut self,
|
||||
config: TargetFrontendConfig,
|
||||
dest: Value,
|
||||
@@ -635,6 +641,7 @@ impl<'a> FunctionBuilder<'a> {
|
||||
size: u64,
|
||||
dest_align: u8,
|
||||
src_align: u8,
|
||||
non_overlapping: bool,
|
||||
) {
|
||||
// Currently the result of guess work, not actual profiling.
|
||||
const THRESHOLD: u64 = 4;
|
||||
@@ -663,16 +670,27 @@ impl<'a> FunctionBuilder<'a> {
|
||||
|
||||
if load_and_store_amount > THRESHOLD {
|
||||
let size_value = self.ins().iconst(config.pointer_type(), size as i64);
|
||||
self.call_memcpy(config, dest, src, size_value);
|
||||
if non_overlapping {
|
||||
self.call_memcpy(config, dest, src, size_value);
|
||||
} else {
|
||||
self.call_memmove(config, dest, src, size_value);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
let mut flags = MemFlags::new();
|
||||
flags.set_aligned();
|
||||
|
||||
for i in 0..load_and_store_amount {
|
||||
let offset = (access_size * i) as i32;
|
||||
let value = self.ins().load(int_type, flags, src, offset);
|
||||
// Load all of the memory first. This is necessary in case `dest` overlaps.
|
||||
// It can also improve performance a bit.
|
||||
let registers: smallvec::SmallVec<[_; THRESHOLD as usize]> = (0..load_and_store_amount)
|
||||
.map(|i| {
|
||||
let offset = (access_size * i) as i32;
|
||||
(self.ins().load(int_type, flags, src, offset), offset)
|
||||
})
|
||||
.collect();
|
||||
|
||||
for (value, offset) in registers {
|
||||
self.ins().store(flags, value, dest, offset);
|
||||
}
|
||||
}
|
||||
@@ -798,55 +816,6 @@ impl<'a> FunctionBuilder<'a> {
|
||||
|
||||
self.ins().call(libc_memmove, &[dest, source, size]);
|
||||
}
|
||||
|
||||
/// Optimised memmove for small moves.
|
||||
pub fn emit_small_memmove(
|
||||
&mut self,
|
||||
config: TargetFrontendConfig,
|
||||
dest: Value,
|
||||
src: Value,
|
||||
size: u64,
|
||||
dest_align: u8,
|
||||
src_align: u8,
|
||||
) {
|
||||
// Currently the result of guess work, not actual profiling.
|
||||
const THRESHOLD: u64 = 4;
|
||||
|
||||
let access_size = greatest_divisible_power_of_two(size);
|
||||
assert!(
|
||||
access_size.is_power_of_two(),
|
||||
"`size` is not a power of two"
|
||||
);
|
||||
assert!(
|
||||
access_size >= u64::from(::core::cmp::min(src_align, dest_align)),
|
||||
"`size` is smaller than `dest` and `src`'s alignment value."
|
||||
);
|
||||
let load_and_store_amount = size / access_size;
|
||||
|
||||
if load_and_store_amount > THRESHOLD {
|
||||
let size_value = self.ins().iconst(config.pointer_type(), size as i64);
|
||||
self.call_memmove(config, dest, src, size_value);
|
||||
return;
|
||||
}
|
||||
|
||||
let mut flags = MemFlags::new();
|
||||
flags.set_aligned();
|
||||
|
||||
// Load all of the memory first in case `dest` overlaps.
|
||||
let registers: Vec<_> = (0..load_and_store_amount)
|
||||
.map(|i| {
|
||||
let offset = (access_size * i) as i32;
|
||||
(
|
||||
self.ins().load(config.pointer_type(), flags, src, offset),
|
||||
offset,
|
||||
)
|
||||
})
|
||||
.collect();
|
||||
|
||||
for (value, offset) in registers {
|
||||
self.ins().store(flags, value, dest, offset);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn greatest_divisible_power_of_two(size: u64) -> u64 {
|
||||
@@ -1104,7 +1073,7 @@ block0:
|
||||
let src = builder.use_var(x);
|
||||
let dest = builder.use_var(y);
|
||||
let size = 8;
|
||||
builder.emit_small_memcpy(target.frontend_config(), dest, src, size, 8, 8);
|
||||
builder.emit_small_memory_copy(target.frontend_config(), dest, src, size, 8, 8, true);
|
||||
builder.ins().return_(&[dest]);
|
||||
|
||||
builder.seal_all_blocks();
|
||||
@@ -1161,7 +1130,7 @@ block0:
|
||||
let src = builder.use_var(x);
|
||||
let dest = builder.use_var(y);
|
||||
let size = 8192;
|
||||
builder.emit_small_memcpy(target.frontend_config(), dest, src, size, 8, 8);
|
||||
builder.emit_small_memory_copy(target.frontend_config(), dest, src, size, 8, 8, true);
|
||||
builder.ins().return_(&[dest]);
|
||||
|
||||
builder.seal_all_blocks();
|
||||
|
||||
Reference in New Issue
Block a user