Partial hashing of module for faster caching (#221)

* Simple module compilation cache

* Fix base64 encoding bug

* Use warn! everywhere in cache system

* Remove unused import

* Temporary workaround for long path on Windows

* Remove unused import for non-windows builds

* Add command line argument to enable cache system + apply minor review feedback

* Initial implementation of partial module hashing

* Proper module hashing for the cache

* Use newer version of cranelift
This commit is contained in:
Artur Jamro
2019-08-06 17:19:26 -07:00
committed by Dan Gohman
parent 17d676ecbc
commit b10f8cf322
4 changed files with 94 additions and 43 deletions

View File

@@ -1,13 +1,17 @@
use crate::address_map::ModuleAddressMap;
use crate::compilation::{CodeAndJTOffsets, Compilation, Relocations};
use crate::module::Module;
use cranelift_codegen::ir;
use cranelift_codegen::isa;
use crate::module_environ::FunctionBodyData;
use core::hash::Hasher;
use cranelift_codegen::{ir, isa};
use cranelift_entity::PrimaryMap;
use cranelift_wasm::DefinedFuncIndex;
use directories::ProjectDirs;
use lazy_static::lazy_static;
use log::{debug, warn};
use serde::de::{self, Deserialize, Deserializer, MapAccess, SeqAccess, Visitor};
use serde::ser::{self, Serialize, SerializeSeq, SerializeStruct, Serializer};
use sha2::{Digest, Sha256};
use std::ffi::OsString;
use std::fmt;
use std::fs;
@@ -102,36 +106,38 @@ pub struct ModuleCacheData {
type ModuleCacheDataTupleType = (Compilation, Relocations, ModuleAddressMap);
struct Sha256Hasher(Sha256);
impl ModuleCacheEntry {
pub fn new(
pub fn new<'data>(
module: &Module,
function_body_inputs: &PrimaryMap<DefinedFuncIndex, FunctionBodyData<'data>>,
isa: &dyn isa::TargetIsa,
compiler_name: &str,
generate_debug_info: bool,
) -> Self {
let mod_cache_path = if conf::cache_enabled() {
CACHE_DIR.clone().and_then(|p| {
module.hash.map(|hash| {
let compiler_dir = if cfg!(debug_assertions) {
format!(
"{comp_name}-{comp_ver}-{comp_mtime}",
comp_name = compiler_name,
comp_ver = env!("GIT_REV"),
comp_mtime = *SELF_MTIME,
)
} else {
format!(
"{comp_name}-{comp_ver}",
comp_name = compiler_name,
comp_ver = env!("GIT_REV"),
)
};
p.join(isa.name()).join(compiler_dir).join(format!(
"mod-{mod_hash}{mod_dbg}",
mod_hash = base64::encode_config(&hash, base64::URL_SAFE_NO_PAD), // standard encoding uses '/' which can't be used for filename
mod_dbg = if generate_debug_info { ".d" } else { "" },
))
})
CACHE_DIR.clone().map(|p| {
let hash = Sha256Hasher::digest(module, function_body_inputs);
let compiler_dir = if cfg!(debug_assertions) {
format!(
"{comp_name}-{comp_ver}-{comp_mtime}",
comp_name = compiler_name,
comp_ver = env!("GIT_REV"),
comp_mtime = *SELF_MTIME,
)
} else {
format!(
"{comp_name}-{comp_ver}",
comp_name = compiler_name,
comp_ver = env!("GIT_REV"),
)
};
p.join(isa.name()).join(compiler_dir).join(format!(
"mod-{mod_hash}{mod_dbg}",
mod_hash = base64::encode_config(&hash, base64::URL_SAFE_NO_PAD), // standard encoding uses '/' which can't be used for filename
mod_dbg = if generate_debug_info { ".d" } else { "" },
))
})
} else {
None
@@ -227,6 +233,27 @@ impl ModuleCacheData {
}
}
impl Sha256Hasher {
pub fn digest<'data>(
module: &Module,
function_body_inputs: &PrimaryMap<DefinedFuncIndex, FunctionBodyData<'data>>,
) -> [u8; 32] {
let mut hasher = Self(Sha256::new());
module.hash_for_cache(function_body_inputs, &mut hasher);
hasher.0.result().into()
}
}
impl Hasher for Sha256Hasher {
fn finish(&self) -> u64 {
panic!("Sha256Hasher doesn't support finish!");
}
fn write(&mut self, bytes: &[u8]) {
self.0.input(bytes);
}
}
//-////////////////////////////////////////////////////////////////////
// Serialization and deserialization of type containing SecondaryMap //
//-////////////////////////////////////////////////////////////////////