Partial hashing of module for faster caching (#221)
* Simple module compilation cache * Fix base64 encoding bug * Use warn! everywhere in cache system * Remove unused import * Temporary workaround for long path on Windows * Remove unused import for non-windows builds * Add command line argument to enable cache system + apply minor review feedback * Initial implementation of partial module hashing * Proper module hashing for the cache * Use newer version of cranelift
This commit is contained in:
@@ -1,13 +1,17 @@
|
||||
use crate::address_map::ModuleAddressMap;
|
||||
use crate::compilation::{CodeAndJTOffsets, Compilation, Relocations};
|
||||
use crate::module::Module;
|
||||
use cranelift_codegen::ir;
|
||||
use cranelift_codegen::isa;
|
||||
use crate::module_environ::FunctionBodyData;
|
||||
use core::hash::Hasher;
|
||||
use cranelift_codegen::{ir, isa};
|
||||
use cranelift_entity::PrimaryMap;
|
||||
use cranelift_wasm::DefinedFuncIndex;
|
||||
use directories::ProjectDirs;
|
||||
use lazy_static::lazy_static;
|
||||
use log::{debug, warn};
|
||||
use serde::de::{self, Deserialize, Deserializer, MapAccess, SeqAccess, Visitor};
|
||||
use serde::ser::{self, Serialize, SerializeSeq, SerializeStruct, Serializer};
|
||||
use sha2::{Digest, Sha256};
|
||||
use std::ffi::OsString;
|
||||
use std::fmt;
|
||||
use std::fs;
|
||||
@@ -102,36 +106,38 @@ pub struct ModuleCacheData {
|
||||
|
||||
type ModuleCacheDataTupleType = (Compilation, Relocations, ModuleAddressMap);
|
||||
|
||||
struct Sha256Hasher(Sha256);
|
||||
|
||||
impl ModuleCacheEntry {
|
||||
pub fn new(
|
||||
pub fn new<'data>(
|
||||
module: &Module,
|
||||
function_body_inputs: &PrimaryMap<DefinedFuncIndex, FunctionBodyData<'data>>,
|
||||
isa: &dyn isa::TargetIsa,
|
||||
compiler_name: &str,
|
||||
generate_debug_info: bool,
|
||||
) -> Self {
|
||||
let mod_cache_path = if conf::cache_enabled() {
|
||||
CACHE_DIR.clone().and_then(|p| {
|
||||
module.hash.map(|hash| {
|
||||
let compiler_dir = if cfg!(debug_assertions) {
|
||||
format!(
|
||||
"{comp_name}-{comp_ver}-{comp_mtime}",
|
||||
comp_name = compiler_name,
|
||||
comp_ver = env!("GIT_REV"),
|
||||
comp_mtime = *SELF_MTIME,
|
||||
)
|
||||
} else {
|
||||
format!(
|
||||
"{comp_name}-{comp_ver}",
|
||||
comp_name = compiler_name,
|
||||
comp_ver = env!("GIT_REV"),
|
||||
)
|
||||
};
|
||||
p.join(isa.name()).join(compiler_dir).join(format!(
|
||||
"mod-{mod_hash}{mod_dbg}",
|
||||
mod_hash = base64::encode_config(&hash, base64::URL_SAFE_NO_PAD), // standard encoding uses '/' which can't be used for filename
|
||||
mod_dbg = if generate_debug_info { ".d" } else { "" },
|
||||
))
|
||||
})
|
||||
CACHE_DIR.clone().map(|p| {
|
||||
let hash = Sha256Hasher::digest(module, function_body_inputs);
|
||||
let compiler_dir = if cfg!(debug_assertions) {
|
||||
format!(
|
||||
"{comp_name}-{comp_ver}-{comp_mtime}",
|
||||
comp_name = compiler_name,
|
||||
comp_ver = env!("GIT_REV"),
|
||||
comp_mtime = *SELF_MTIME,
|
||||
)
|
||||
} else {
|
||||
format!(
|
||||
"{comp_name}-{comp_ver}",
|
||||
comp_name = compiler_name,
|
||||
comp_ver = env!("GIT_REV"),
|
||||
)
|
||||
};
|
||||
p.join(isa.name()).join(compiler_dir).join(format!(
|
||||
"mod-{mod_hash}{mod_dbg}",
|
||||
mod_hash = base64::encode_config(&hash, base64::URL_SAFE_NO_PAD), // standard encoding uses '/' which can't be used for filename
|
||||
mod_dbg = if generate_debug_info { ".d" } else { "" },
|
||||
))
|
||||
})
|
||||
} else {
|
||||
None
|
||||
@@ -227,6 +233,27 @@ impl ModuleCacheData {
|
||||
}
|
||||
}
|
||||
|
||||
impl Sha256Hasher {
|
||||
pub fn digest<'data>(
|
||||
module: &Module,
|
||||
function_body_inputs: &PrimaryMap<DefinedFuncIndex, FunctionBodyData<'data>>,
|
||||
) -> [u8; 32] {
|
||||
let mut hasher = Self(Sha256::new());
|
||||
module.hash_for_cache(function_body_inputs, &mut hasher);
|
||||
hasher.0.result().into()
|
||||
}
|
||||
}
|
||||
|
||||
impl Hasher for Sha256Hasher {
|
||||
fn finish(&self) -> u64 {
|
||||
panic!("Sha256Hasher doesn't support finish!");
|
||||
}
|
||||
|
||||
fn write(&mut self, bytes: &[u8]) {
|
||||
self.0.input(bytes);
|
||||
}
|
||||
}
|
||||
|
||||
//-////////////////////////////////////////////////////////////////////
|
||||
// Serialization and deserialization of type containing SecondaryMap //
|
||||
//-////////////////////////////////////////////////////////////////////
|
||||
|
||||
@@ -124,7 +124,13 @@ impl crate::compilation::Compiler for Cranelift {
|
||||
isa: &dyn isa::TargetIsa,
|
||||
generate_debug_info: bool,
|
||||
) -> Result<(Compilation, Relocations, ModuleAddressMap), CompileError> {
|
||||
let cache_entry = ModuleCacheEntry::new(module, isa, "cranelift", generate_debug_info);
|
||||
let cache_entry = ModuleCacheEntry::new(
|
||||
module,
|
||||
&function_body_inputs,
|
||||
isa,
|
||||
"cranelift",
|
||||
generate_debug_info,
|
||||
);
|
||||
|
||||
let data = match cache_entry.get_data() {
|
||||
Some(data) => data,
|
||||
|
||||
@@ -1,6 +1,8 @@
|
||||
//! Data structures for representing decoded wasm modules.
|
||||
|
||||
use crate::module_environ::FunctionBodyData;
|
||||
use crate::tunables::Tunables;
|
||||
use core::hash::{Hash, Hasher};
|
||||
use cranelift_codegen::ir;
|
||||
use cranelift_entity::{EntityRef, PrimaryMap};
|
||||
use cranelift_wasm::{
|
||||
@@ -13,7 +15,7 @@ use std::string::String;
|
||||
use std::vec::Vec;
|
||||
|
||||
/// A WebAssembly table initializer.
|
||||
#[derive(Clone, Debug)]
|
||||
#[derive(Clone, Debug, Hash)]
|
||||
pub struct TableElements {
|
||||
/// The index of a table to initialize.
|
||||
pub table_index: TableIndex,
|
||||
@@ -26,7 +28,7 @@ pub struct TableElements {
|
||||
}
|
||||
|
||||
/// An entity to export.
|
||||
#[derive(Clone, Debug)]
|
||||
#[derive(Clone, Debug, Hash, PartialEq, Eq, PartialOrd, Ord)]
|
||||
pub enum Export {
|
||||
/// Function export.
|
||||
Function(FuncIndex),
|
||||
@@ -39,7 +41,7 @@ pub enum Export {
|
||||
}
|
||||
|
||||
/// Implemenation styles for WebAssembly linear memory.
|
||||
#[derive(Debug, Clone)]
|
||||
#[derive(Debug, Clone, Hash)]
|
||||
pub enum MemoryStyle {
|
||||
/// The actual memory can be resized and moved.
|
||||
Dynamic,
|
||||
@@ -77,7 +79,7 @@ impl MemoryStyle {
|
||||
|
||||
/// A WebAssembly linear memory description along with our chosen style for
|
||||
/// implementing it.
|
||||
#[derive(Debug, Clone)]
|
||||
#[derive(Debug, Clone, Hash)]
|
||||
pub struct MemoryPlan {
|
||||
/// The WebAssembly linear memory description.
|
||||
pub memory: Memory,
|
||||
@@ -100,7 +102,7 @@ impl MemoryPlan {
|
||||
}
|
||||
|
||||
/// Implemenation styles for WebAssembly tables.
|
||||
#[derive(Debug, Clone)]
|
||||
#[derive(Debug, Clone, Hash)]
|
||||
pub enum TableStyle {
|
||||
/// Signatures are stored in the table and checked in the caller.
|
||||
CallerChecksSignature,
|
||||
@@ -115,7 +117,7 @@ impl TableStyle {
|
||||
|
||||
/// A WebAssembly table description along with our chosen style for
|
||||
/// implementing it.
|
||||
#[derive(Debug, Clone)]
|
||||
#[derive(Debug, Clone, Hash)]
|
||||
pub struct TablePlan {
|
||||
/// The WebAssembly table description.
|
||||
pub table: cranelift_wasm::Table,
|
||||
@@ -133,6 +135,7 @@ impl TablePlan {
|
||||
|
||||
/// A translated WebAssembly module, excluding the function bodies and
|
||||
/// memory initializers.
|
||||
// WARNING: when modifying, make sure that `hash_for_cache` is still valid!
|
||||
#[derive(Debug)]
|
||||
pub struct Module {
|
||||
/// Unprocessed signatures exactly as provided by `declare_signature()`.
|
||||
@@ -170,10 +173,6 @@ pub struct Module {
|
||||
|
||||
/// WebAssembly table initializers.
|
||||
pub table_elements: Vec<TableElements>,
|
||||
|
||||
/// Hash of the source wasm code if this module is not synthesized.
|
||||
/// TODO: this is temporary workaround. Will be replaced with derive macro.
|
||||
pub hash: Option<[u8; 32]>,
|
||||
}
|
||||
|
||||
impl Module {
|
||||
@@ -192,7 +191,6 @@ impl Module {
|
||||
exports: IndexMap::new(),
|
||||
start_func: None,
|
||||
table_elements: Vec::new(),
|
||||
hash: None,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -283,4 +281,29 @@ impl Module {
|
||||
pub fn is_imported_global(&self, index: GlobalIndex) -> bool {
|
||||
index.index() < self.imported_globals.len()
|
||||
}
|
||||
|
||||
/// Computes hash of the module for the purpose of caching.
|
||||
pub fn hash_for_cache<'data, H>(
|
||||
&self,
|
||||
function_body_inputs: &PrimaryMap<DefinedFuncIndex, FunctionBodyData<'data>>,
|
||||
state: &mut H,
|
||||
) where
|
||||
H: Hasher,
|
||||
{
|
||||
// There's no need to cache names (strings), start function
|
||||
// and data initializers (for both memory and tables)
|
||||
self.signatures.hash(state);
|
||||
self.functions.hash(state);
|
||||
self.table_plans.hash(state);
|
||||
self.memory_plans.hash(state);
|
||||
self.globals.hash(state);
|
||||
// IndexMap (self.export) iterates over values in order of item inserts
|
||||
// Let's actually sort the values.
|
||||
let mut exports = self.exports.values().collect::<Vec<_>>();
|
||||
exports.sort();
|
||||
for val in exports {
|
||||
val.hash(state);
|
||||
}
|
||||
function_body_inputs.hash(state);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -10,12 +10,12 @@ use cranelift_wasm::{
|
||||
self, translate_module, DefinedFuncIndex, FuncIndex, Global, GlobalIndex, Memory, MemoryIndex,
|
||||
SignatureIndex, Table, TableIndex, WasmResult,
|
||||
};
|
||||
use sha2::{Digest, Sha256};
|
||||
use std::boxed::Box;
|
||||
use std::string::String;
|
||||
use std::vec::Vec;
|
||||
|
||||
/// Contains function data: byte code and its offset in the module.
|
||||
#[derive(Hash)]
|
||||
pub struct FunctionBodyData<'a> {
|
||||
/// Body byte code.
|
||||
pub data: &'a [u8],
|
||||
@@ -80,11 +80,6 @@ impl<'data> ModuleEnvironment<'data> {
|
||||
pub fn translate(mut self, data: &'data [u8]) -> WasmResult<ModuleTranslation<'data>> {
|
||||
translate_module(data, &mut self)?;
|
||||
|
||||
// TODO: this is temporary workaround and will be replaced with derive macro.
|
||||
let mut hasher = Sha256::new();
|
||||
hasher.input(data);
|
||||
self.result.module.hash = Some(hasher.result().into());
|
||||
|
||||
Ok(self.result)
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user