Partial hashing of module for faster caching (#221)
* Simple module compilation cache * Fix base64 encoding bug * Use warn! everywhere in cache system * Remove unused import * Temporary workaround for long path on Windows * Remove unused import for non-windows builds * Add command line argument to enable cache system + apply minor review feedback * Initial implementation of partial module hashing * Proper module hashing for the cache * Use newer version of cranelift
This commit is contained in:
@@ -1,13 +1,17 @@
|
|||||||
use crate::address_map::ModuleAddressMap;
|
use crate::address_map::ModuleAddressMap;
|
||||||
use crate::compilation::{CodeAndJTOffsets, Compilation, Relocations};
|
use crate::compilation::{CodeAndJTOffsets, Compilation, Relocations};
|
||||||
use crate::module::Module;
|
use crate::module::Module;
|
||||||
use cranelift_codegen::ir;
|
use crate::module_environ::FunctionBodyData;
|
||||||
use cranelift_codegen::isa;
|
use core::hash::Hasher;
|
||||||
|
use cranelift_codegen::{ir, isa};
|
||||||
|
use cranelift_entity::PrimaryMap;
|
||||||
|
use cranelift_wasm::DefinedFuncIndex;
|
||||||
use directories::ProjectDirs;
|
use directories::ProjectDirs;
|
||||||
use lazy_static::lazy_static;
|
use lazy_static::lazy_static;
|
||||||
use log::{debug, warn};
|
use log::{debug, warn};
|
||||||
use serde::de::{self, Deserialize, Deserializer, MapAccess, SeqAccess, Visitor};
|
use serde::de::{self, Deserialize, Deserializer, MapAccess, SeqAccess, Visitor};
|
||||||
use serde::ser::{self, Serialize, SerializeSeq, SerializeStruct, Serializer};
|
use serde::ser::{self, Serialize, SerializeSeq, SerializeStruct, Serializer};
|
||||||
|
use sha2::{Digest, Sha256};
|
||||||
use std::ffi::OsString;
|
use std::ffi::OsString;
|
||||||
use std::fmt;
|
use std::fmt;
|
||||||
use std::fs;
|
use std::fs;
|
||||||
@@ -102,36 +106,38 @@ pub struct ModuleCacheData {
|
|||||||
|
|
||||||
type ModuleCacheDataTupleType = (Compilation, Relocations, ModuleAddressMap);
|
type ModuleCacheDataTupleType = (Compilation, Relocations, ModuleAddressMap);
|
||||||
|
|
||||||
|
struct Sha256Hasher(Sha256);
|
||||||
|
|
||||||
impl ModuleCacheEntry {
|
impl ModuleCacheEntry {
|
||||||
pub fn new(
|
pub fn new<'data>(
|
||||||
module: &Module,
|
module: &Module,
|
||||||
|
function_body_inputs: &PrimaryMap<DefinedFuncIndex, FunctionBodyData<'data>>,
|
||||||
isa: &dyn isa::TargetIsa,
|
isa: &dyn isa::TargetIsa,
|
||||||
compiler_name: &str,
|
compiler_name: &str,
|
||||||
generate_debug_info: bool,
|
generate_debug_info: bool,
|
||||||
) -> Self {
|
) -> Self {
|
||||||
let mod_cache_path = if conf::cache_enabled() {
|
let mod_cache_path = if conf::cache_enabled() {
|
||||||
CACHE_DIR.clone().and_then(|p| {
|
CACHE_DIR.clone().map(|p| {
|
||||||
module.hash.map(|hash| {
|
let hash = Sha256Hasher::digest(module, function_body_inputs);
|
||||||
let compiler_dir = if cfg!(debug_assertions) {
|
let compiler_dir = if cfg!(debug_assertions) {
|
||||||
format!(
|
format!(
|
||||||
"{comp_name}-{comp_ver}-{comp_mtime}",
|
"{comp_name}-{comp_ver}-{comp_mtime}",
|
||||||
comp_name = compiler_name,
|
comp_name = compiler_name,
|
||||||
comp_ver = env!("GIT_REV"),
|
comp_ver = env!("GIT_REV"),
|
||||||
comp_mtime = *SELF_MTIME,
|
comp_mtime = *SELF_MTIME,
|
||||||
)
|
)
|
||||||
} else {
|
} else {
|
||||||
format!(
|
format!(
|
||||||
"{comp_name}-{comp_ver}",
|
"{comp_name}-{comp_ver}",
|
||||||
comp_name = compiler_name,
|
comp_name = compiler_name,
|
||||||
comp_ver = env!("GIT_REV"),
|
comp_ver = env!("GIT_REV"),
|
||||||
)
|
)
|
||||||
};
|
};
|
||||||
p.join(isa.name()).join(compiler_dir).join(format!(
|
p.join(isa.name()).join(compiler_dir).join(format!(
|
||||||
"mod-{mod_hash}{mod_dbg}",
|
"mod-{mod_hash}{mod_dbg}",
|
||||||
mod_hash = base64::encode_config(&hash, base64::URL_SAFE_NO_PAD), // standard encoding uses '/' which can't be used for filename
|
mod_hash = base64::encode_config(&hash, base64::URL_SAFE_NO_PAD), // standard encoding uses '/' which can't be used for filename
|
||||||
mod_dbg = if generate_debug_info { ".d" } else { "" },
|
mod_dbg = if generate_debug_info { ".d" } else { "" },
|
||||||
))
|
))
|
||||||
})
|
|
||||||
})
|
})
|
||||||
} else {
|
} else {
|
||||||
None
|
None
|
||||||
@@ -227,6 +233,27 @@ impl ModuleCacheData {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl Sha256Hasher {
|
||||||
|
pub fn digest<'data>(
|
||||||
|
module: &Module,
|
||||||
|
function_body_inputs: &PrimaryMap<DefinedFuncIndex, FunctionBodyData<'data>>,
|
||||||
|
) -> [u8; 32] {
|
||||||
|
let mut hasher = Self(Sha256::new());
|
||||||
|
module.hash_for_cache(function_body_inputs, &mut hasher);
|
||||||
|
hasher.0.result().into()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Hasher for Sha256Hasher {
|
||||||
|
fn finish(&self) -> u64 {
|
||||||
|
panic!("Sha256Hasher doesn't support finish!");
|
||||||
|
}
|
||||||
|
|
||||||
|
fn write(&mut self, bytes: &[u8]) {
|
||||||
|
self.0.input(bytes);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
//-////////////////////////////////////////////////////////////////////
|
//-////////////////////////////////////////////////////////////////////
|
||||||
// Serialization and deserialization of type containing SecondaryMap //
|
// Serialization and deserialization of type containing SecondaryMap //
|
||||||
//-////////////////////////////////////////////////////////////////////
|
//-////////////////////////////////////////////////////////////////////
|
||||||
|
|||||||
@@ -124,7 +124,13 @@ impl crate::compilation::Compiler for Cranelift {
|
|||||||
isa: &dyn isa::TargetIsa,
|
isa: &dyn isa::TargetIsa,
|
||||||
generate_debug_info: bool,
|
generate_debug_info: bool,
|
||||||
) -> Result<(Compilation, Relocations, ModuleAddressMap), CompileError> {
|
) -> Result<(Compilation, Relocations, ModuleAddressMap), CompileError> {
|
||||||
let cache_entry = ModuleCacheEntry::new(module, isa, "cranelift", generate_debug_info);
|
let cache_entry = ModuleCacheEntry::new(
|
||||||
|
module,
|
||||||
|
&function_body_inputs,
|
||||||
|
isa,
|
||||||
|
"cranelift",
|
||||||
|
generate_debug_info,
|
||||||
|
);
|
||||||
|
|
||||||
let data = match cache_entry.get_data() {
|
let data = match cache_entry.get_data() {
|
||||||
Some(data) => data,
|
Some(data) => data,
|
||||||
|
|||||||
@@ -1,6 +1,8 @@
|
|||||||
//! Data structures for representing decoded wasm modules.
|
//! Data structures for representing decoded wasm modules.
|
||||||
|
|
||||||
|
use crate::module_environ::FunctionBodyData;
|
||||||
use crate::tunables::Tunables;
|
use crate::tunables::Tunables;
|
||||||
|
use core::hash::{Hash, Hasher};
|
||||||
use cranelift_codegen::ir;
|
use cranelift_codegen::ir;
|
||||||
use cranelift_entity::{EntityRef, PrimaryMap};
|
use cranelift_entity::{EntityRef, PrimaryMap};
|
||||||
use cranelift_wasm::{
|
use cranelift_wasm::{
|
||||||
@@ -13,7 +15,7 @@ use std::string::String;
|
|||||||
use std::vec::Vec;
|
use std::vec::Vec;
|
||||||
|
|
||||||
/// A WebAssembly table initializer.
|
/// A WebAssembly table initializer.
|
||||||
#[derive(Clone, Debug)]
|
#[derive(Clone, Debug, Hash)]
|
||||||
pub struct TableElements {
|
pub struct TableElements {
|
||||||
/// The index of a table to initialize.
|
/// The index of a table to initialize.
|
||||||
pub table_index: TableIndex,
|
pub table_index: TableIndex,
|
||||||
@@ -26,7 +28,7 @@ pub struct TableElements {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// An entity to export.
|
/// An entity to export.
|
||||||
#[derive(Clone, Debug)]
|
#[derive(Clone, Debug, Hash, PartialEq, Eq, PartialOrd, Ord)]
|
||||||
pub enum Export {
|
pub enum Export {
|
||||||
/// Function export.
|
/// Function export.
|
||||||
Function(FuncIndex),
|
Function(FuncIndex),
|
||||||
@@ -39,7 +41,7 @@ pub enum Export {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// Implemenation styles for WebAssembly linear memory.
|
/// Implemenation styles for WebAssembly linear memory.
|
||||||
#[derive(Debug, Clone)]
|
#[derive(Debug, Clone, Hash)]
|
||||||
pub enum MemoryStyle {
|
pub enum MemoryStyle {
|
||||||
/// The actual memory can be resized and moved.
|
/// The actual memory can be resized and moved.
|
||||||
Dynamic,
|
Dynamic,
|
||||||
@@ -77,7 +79,7 @@ impl MemoryStyle {
|
|||||||
|
|
||||||
/// A WebAssembly linear memory description along with our chosen style for
|
/// A WebAssembly linear memory description along with our chosen style for
|
||||||
/// implementing it.
|
/// implementing it.
|
||||||
#[derive(Debug, Clone)]
|
#[derive(Debug, Clone, Hash)]
|
||||||
pub struct MemoryPlan {
|
pub struct MemoryPlan {
|
||||||
/// The WebAssembly linear memory description.
|
/// The WebAssembly linear memory description.
|
||||||
pub memory: Memory,
|
pub memory: Memory,
|
||||||
@@ -100,7 +102,7 @@ impl MemoryPlan {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// Implemenation styles for WebAssembly tables.
|
/// Implemenation styles for WebAssembly tables.
|
||||||
#[derive(Debug, Clone)]
|
#[derive(Debug, Clone, Hash)]
|
||||||
pub enum TableStyle {
|
pub enum TableStyle {
|
||||||
/// Signatures are stored in the table and checked in the caller.
|
/// Signatures are stored in the table and checked in the caller.
|
||||||
CallerChecksSignature,
|
CallerChecksSignature,
|
||||||
@@ -115,7 +117,7 @@ impl TableStyle {
|
|||||||
|
|
||||||
/// A WebAssembly table description along with our chosen style for
|
/// A WebAssembly table description along with our chosen style for
|
||||||
/// implementing it.
|
/// implementing it.
|
||||||
#[derive(Debug, Clone)]
|
#[derive(Debug, Clone, Hash)]
|
||||||
pub struct TablePlan {
|
pub struct TablePlan {
|
||||||
/// The WebAssembly table description.
|
/// The WebAssembly table description.
|
||||||
pub table: cranelift_wasm::Table,
|
pub table: cranelift_wasm::Table,
|
||||||
@@ -133,6 +135,7 @@ impl TablePlan {
|
|||||||
|
|
||||||
/// A translated WebAssembly module, excluding the function bodies and
|
/// A translated WebAssembly module, excluding the function bodies and
|
||||||
/// memory initializers.
|
/// memory initializers.
|
||||||
|
// WARNING: when modifying, make sure that `hash_for_cache` is still valid!
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
pub struct Module {
|
pub struct Module {
|
||||||
/// Unprocessed signatures exactly as provided by `declare_signature()`.
|
/// Unprocessed signatures exactly as provided by `declare_signature()`.
|
||||||
@@ -170,10 +173,6 @@ pub struct Module {
|
|||||||
|
|
||||||
/// WebAssembly table initializers.
|
/// WebAssembly table initializers.
|
||||||
pub table_elements: Vec<TableElements>,
|
pub table_elements: Vec<TableElements>,
|
||||||
|
|
||||||
/// Hash of the source wasm code if this module is not synthesized.
|
|
||||||
/// TODO: this is temporary workaround. Will be replaced with derive macro.
|
|
||||||
pub hash: Option<[u8; 32]>,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Module {
|
impl Module {
|
||||||
@@ -192,7 +191,6 @@ impl Module {
|
|||||||
exports: IndexMap::new(),
|
exports: IndexMap::new(),
|
||||||
start_func: None,
|
start_func: None,
|
||||||
table_elements: Vec::new(),
|
table_elements: Vec::new(),
|
||||||
hash: None,
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -283,4 +281,29 @@ impl Module {
|
|||||||
pub fn is_imported_global(&self, index: GlobalIndex) -> bool {
|
pub fn is_imported_global(&self, index: GlobalIndex) -> bool {
|
||||||
index.index() < self.imported_globals.len()
|
index.index() < self.imported_globals.len()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Computes hash of the module for the purpose of caching.
|
||||||
|
pub fn hash_for_cache<'data, H>(
|
||||||
|
&self,
|
||||||
|
function_body_inputs: &PrimaryMap<DefinedFuncIndex, FunctionBodyData<'data>>,
|
||||||
|
state: &mut H,
|
||||||
|
) where
|
||||||
|
H: Hasher,
|
||||||
|
{
|
||||||
|
// There's no need to cache names (strings), start function
|
||||||
|
// and data initializers (for both memory and tables)
|
||||||
|
self.signatures.hash(state);
|
||||||
|
self.functions.hash(state);
|
||||||
|
self.table_plans.hash(state);
|
||||||
|
self.memory_plans.hash(state);
|
||||||
|
self.globals.hash(state);
|
||||||
|
// IndexMap (self.export) iterates over values in order of item inserts
|
||||||
|
// Let's actually sort the values.
|
||||||
|
let mut exports = self.exports.values().collect::<Vec<_>>();
|
||||||
|
exports.sort();
|
||||||
|
for val in exports {
|
||||||
|
val.hash(state);
|
||||||
|
}
|
||||||
|
function_body_inputs.hash(state);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -10,12 +10,12 @@ use cranelift_wasm::{
|
|||||||
self, translate_module, DefinedFuncIndex, FuncIndex, Global, GlobalIndex, Memory, MemoryIndex,
|
self, translate_module, DefinedFuncIndex, FuncIndex, Global, GlobalIndex, Memory, MemoryIndex,
|
||||||
SignatureIndex, Table, TableIndex, WasmResult,
|
SignatureIndex, Table, TableIndex, WasmResult,
|
||||||
};
|
};
|
||||||
use sha2::{Digest, Sha256};
|
|
||||||
use std::boxed::Box;
|
use std::boxed::Box;
|
||||||
use std::string::String;
|
use std::string::String;
|
||||||
use std::vec::Vec;
|
use std::vec::Vec;
|
||||||
|
|
||||||
/// Contains function data: byte code and its offset in the module.
|
/// Contains function data: byte code and its offset in the module.
|
||||||
|
#[derive(Hash)]
|
||||||
pub struct FunctionBodyData<'a> {
|
pub struct FunctionBodyData<'a> {
|
||||||
/// Body byte code.
|
/// Body byte code.
|
||||||
pub data: &'a [u8],
|
pub data: &'a [u8],
|
||||||
@@ -80,11 +80,6 @@ impl<'data> ModuleEnvironment<'data> {
|
|||||||
pub fn translate(mut self, data: &'data [u8]) -> WasmResult<ModuleTranslation<'data>> {
|
pub fn translate(mut self, data: &'data [u8]) -> WasmResult<ModuleTranslation<'data>> {
|
||||||
translate_module(data, &mut self)?;
|
translate_module(data, &mut self)?;
|
||||||
|
|
||||||
// TODO: this is temporary workaround and will be replaced with derive macro.
|
|
||||||
let mut hasher = Sha256::new();
|
|
||||||
hasher.input(data);
|
|
||||||
self.result.module.hash = Some(hasher.result().into());
|
|
||||||
|
|
||||||
Ok(self.result)
|
Ok(self.result)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user