Provide optimised codegen for small libc calls
This commit is contained in:
@@ -6,7 +6,8 @@ use cranelift_codegen::ir::function::DisplayFunction;
|
|||||||
use cranelift_codegen::ir::{
|
use cranelift_codegen::ir::{
|
||||||
types, AbiParam, DataFlowGraph, Ebb, ExtFuncData, ExternalName, FuncRef, Function, GlobalValue,
|
types, AbiParam, DataFlowGraph, Ebb, ExtFuncData, ExternalName, FuncRef, Function, GlobalValue,
|
||||||
GlobalValueData, Heap, HeapData, Inst, InstBuilder, InstBuilderBase, InstructionData,
|
GlobalValueData, Heap, HeapData, Inst, InstBuilder, InstBuilderBase, InstructionData,
|
||||||
JumpTable, JumpTableData, LibCall, SigRef, Signature, StackSlot, StackSlotData, Type, Value,
|
JumpTable, JumpTableData, LibCall, MemFlags, SigRef, Signature, StackSlot, StackSlotData, Type,
|
||||||
|
Value,
|
||||||
};
|
};
|
||||||
use cranelift_codegen::isa::TargetIsa;
|
use cranelift_codegen::isa::TargetIsa;
|
||||||
use cranelift_codegen::packed_option::PackedOption;
|
use cranelift_codegen::packed_option::PackedOption;
|
||||||
@@ -567,10 +568,51 @@ impl<'a> FunctionBuilder<'a> {
|
|||||||
self.ins().call(libc_memcpy, &[dest, src, size]);
|
self.ins().call(libc_memcpy, &[dest, src, size]);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Optimised memcpy for small copys.
|
||||||
|
pub fn emit_small_memcpy(
|
||||||
|
&mut self,
|
||||||
|
isa: &TargetIsa,
|
||||||
|
dest: Value,
|
||||||
|
src: Value,
|
||||||
|
size: u64,
|
||||||
|
dest_align: u8,
|
||||||
|
src_align: u8,
|
||||||
|
) {
|
||||||
|
// Currently the result of guess work, not actual profiling.
|
||||||
|
const THRESHOLD: u64 = 4;
|
||||||
|
|
||||||
|
let access_size = greatest_divisible_power_of_two(size);
|
||||||
|
assert!(
|
||||||
|
access_size.is_power_of_two(),
|
||||||
|
"`size` is not a power of two"
|
||||||
|
);
|
||||||
|
assert!(
|
||||||
|
access_size >= ::std::cmp::min(src_align, dest_align) as u64,
|
||||||
|
"`size` is smaller than `dest` and `src`'s alignment value."
|
||||||
|
);
|
||||||
|
let load_and_store_amount = size / access_size;
|
||||||
|
|
||||||
|
if load_and_store_amount > THRESHOLD {
|
||||||
|
let size_value = self.ins().iconst(isa.pointer_type(), size as i64);
|
||||||
|
self.call_memcpy(isa, dest, src, size_value);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut flags = MemFlags::new();
|
||||||
|
flags.set_aligned();
|
||||||
|
|
||||||
|
for i in 0..load_and_store_amount {
|
||||||
|
let offset = (access_size * i) as i32;
|
||||||
|
let int_type = Type::int(access_size as u16).unwrap();
|
||||||
|
let value = self.ins().load(int_type, flags, src, offset);
|
||||||
|
self.ins().store(flags, value, dest, offset);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// Calls libc.memset
|
/// Calls libc.memset
|
||||||
///
|
///
|
||||||
/// Writes `len` bytes of value `ch` to memory starting at `buffer`.
|
/// Writes `size` bytes of value `ch` to memory starting at `buffer`.
|
||||||
pub fn call_memset(&mut self, isa: &TargetIsa, buffer: Value, ch: Value, len: Value) {
|
pub fn call_memset(&mut self, isa: &TargetIsa, buffer: Value, ch: Value, size: Value) {
|
||||||
let pointer_type = isa.pointer_type();
|
let pointer_type = isa.pointer_type();
|
||||||
let signature = {
|
let signature = {
|
||||||
let mut s = Signature::new(isa.flags().call_conv());
|
let mut s = Signature::new(isa.flags().call_conv());
|
||||||
@@ -587,14 +629,68 @@ impl<'a> FunctionBuilder<'a> {
|
|||||||
});
|
});
|
||||||
|
|
||||||
let ch = self.ins().uextend(types::I32, ch);
|
let ch = self.ins().uextend(types::I32, ch);
|
||||||
self.ins().call(libc_memset, &[buffer, ch, len]);
|
self.ins().call(libc_memset, &[buffer, ch, size]);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Calls libc.memset
|
||||||
|
///
|
||||||
|
/// Writes `size` bytes of value `ch` to memory starting at `buffer`.
|
||||||
|
pub fn emit_small_memset(
|
||||||
|
&mut self,
|
||||||
|
isa: &TargetIsa,
|
||||||
|
buffer: Value,
|
||||||
|
ch: u32,
|
||||||
|
size: u64,
|
||||||
|
buffer_align: u8,
|
||||||
|
) {
|
||||||
|
// Currently the result of guess work, not actual profiling.
|
||||||
|
const THRESHOLD: u64 = 4;
|
||||||
|
|
||||||
|
let access_size = greatest_divisible_power_of_two(size);
|
||||||
|
assert!(
|
||||||
|
access_size.is_power_of_two(),
|
||||||
|
"`size` is not a power of two"
|
||||||
|
);
|
||||||
|
assert!(
|
||||||
|
access_size >= buffer_align as u64,
|
||||||
|
"`size` is smaller than `dest` and `src`'s alignment value."
|
||||||
|
);
|
||||||
|
let load_and_store_amount = size / access_size;
|
||||||
|
|
||||||
|
if load_and_store_amount > THRESHOLD {
|
||||||
|
let ch = self.ins().iconst(types::I32, ch as i64);
|
||||||
|
let size = self.ins().iconst(isa.pointer_type(), size as i64);
|
||||||
|
self.call_memset(isa, buffer, ch, size);
|
||||||
|
} else {
|
||||||
|
let mut flags = MemFlags::new();
|
||||||
|
flags.set_aligned();
|
||||||
|
|
||||||
|
let ch = ch as u64;
|
||||||
|
let int_type = Type::int(access_size as u16).unwrap();
|
||||||
|
let raw_value = if int_type == types::I64 {
|
||||||
|
(ch << 32) | (ch << 16) | (ch << 8) | ch
|
||||||
|
} else if int_type == types::I32 {
|
||||||
|
(ch << 16) | (ch << 8) | ch
|
||||||
|
} else if int_type == types::I16 {
|
||||||
|
(ch << 8) | ch
|
||||||
|
} else {
|
||||||
|
assert_eq!(int_type, types::I8);
|
||||||
|
ch
|
||||||
|
};
|
||||||
|
|
||||||
|
let value = self.ins().iconst(int_type, raw_value as i64);
|
||||||
|
for i in 0..load_and_store_amount {
|
||||||
|
let offset = (access_size * i) as i32;
|
||||||
|
self.ins().store(flags, value, buffer, offset);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Calls libc.memmove
|
/// Calls libc.memmove
|
||||||
///
|
///
|
||||||
/// Copies `len` bytes from memory starting at `source` to memory starting
|
/// Copies `size` bytes from memory starting at `source` to memory starting
|
||||||
/// at `dest`. `source` is always read before writing to `dest`.
|
/// at `dest`. `source` is always read before writing to `dest`.
|
||||||
pub fn call_memmove(&mut self, isa: &TargetIsa, dest: Value, source: Value, num: Value) {
|
pub fn call_memmove(&mut self, isa: &TargetIsa, dest: Value, source: Value, size: Value) {
|
||||||
let pointer_type = isa.pointer_type();
|
let pointer_type = isa.pointer_type();
|
||||||
let signature = {
|
let signature = {
|
||||||
let mut s = Signature::new(isa.flags().call_conv());
|
let mut s = Signature::new(isa.flags().call_conv());
|
||||||
@@ -610,8 +706,57 @@ impl<'a> FunctionBuilder<'a> {
|
|||||||
colocated: false,
|
colocated: false,
|
||||||
});
|
});
|
||||||
|
|
||||||
self.ins().call(libc_memmove, &[dest, source, num]);
|
self.ins().call(libc_memmove, &[dest, source, size]);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Optimised memmove for small moves.
|
||||||
|
pub fn emit_small_memmove(
|
||||||
|
&mut self,
|
||||||
|
isa: &TargetIsa,
|
||||||
|
dest: Value,
|
||||||
|
src: Value,
|
||||||
|
size: u64,
|
||||||
|
dest_align: u8,
|
||||||
|
src_align: u8,
|
||||||
|
) {
|
||||||
|
// Currently the result of guess work, not actual profiling.
|
||||||
|
const THRESHOLD: u64 = 4;
|
||||||
|
|
||||||
|
let access_size = greatest_divisible_power_of_two(size);
|
||||||
|
assert!(
|
||||||
|
access_size.is_power_of_two(),
|
||||||
|
"`size` is not a power of two"
|
||||||
|
);
|
||||||
|
assert!(
|
||||||
|
access_size >= ::std::cmp::min(src_align, dest_align) as u64,
|
||||||
|
"`size` is smaller than `dest` and `src`'s alignment value."
|
||||||
|
);
|
||||||
|
let load_and_store_amount = size / access_size;
|
||||||
|
|
||||||
|
if load_and_store_amount > THRESHOLD {
|
||||||
|
let size_value = self.ins().iconst(isa.pointer_type(), size as i64);
|
||||||
|
self.call_memmove(isa, dest, src, size_value);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut flags = MemFlags::new();
|
||||||
|
flags.set_aligned();
|
||||||
|
|
||||||
|
// Load all of the memory first in case `dest` overlaps.
|
||||||
|
let registers: Vec<_> = (0..load_and_store_amount)
|
||||||
|
.map(|i| {
|
||||||
|
let offset = (access_size * i) as i32;
|
||||||
|
(self.ins().load(types::I8, flags, src, offset), offset)
|
||||||
|
}).collect();
|
||||||
|
|
||||||
|
for (value, offset) in registers {
|
||||||
|
self.ins().store(flags, value, dest, offset);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn greatest_divisible_power_of_two(size: u64) -> u64 {
|
||||||
|
(size as i64 & -(size as i64)) as u64
|
||||||
}
|
}
|
||||||
|
|
||||||
// Helper functions
|
// Helper functions
|
||||||
@@ -648,6 +793,7 @@ impl<'a> FunctionBuilder<'a> {
|
|||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
|
use super::greatest_divisible_power_of_two;
|
||||||
use cranelift_codegen::entity::EntityRef;
|
use cranelift_codegen::entity::EntityRef;
|
||||||
use cranelift_codegen::ir::types::*;
|
use cranelift_codegen::ir::types::*;
|
||||||
use cranelift_codegen::ir::{AbiParam, ExternalName, Function, InstBuilder, Signature};
|
use cranelift_codegen::ir::{AbiParam, ExternalName, Function, InstBuilder, Signature};
|
||||||
@@ -821,4 +967,12 @@ ebb0:
|
|||||||
"
|
"
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_greatest_divisible_power_of_two() {
|
||||||
|
assert_eq!(64, greatest_divisible_power_of_two(64));
|
||||||
|
assert_eq!(16, greatest_divisible_power_of_two(48));
|
||||||
|
assert_eq!(8, greatest_divisible_power_of_two(24));
|
||||||
|
assert_eq!(1, greatest_divisible_power_of_two(25));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user