Partial hashing of module for faster caching (#221)

* Simple module compilation cache

* Fix base64 encoding bug

* Use warn! everywhere in cache system

* Remove unused import

* Temporary workaround for long path on Windows

* Remove unused import for non-windows builds

* Add command line argument to enable cache system + apply minor review feedback

* Initial implementation of partial module hashing

* Proper module hashing for the cache

* Use newer version of cranelift
This commit is contained in:
Artur Jamro
2019-08-06 17:19:26 -07:00
committed by Dan Gohman
parent 17d676ecbc
commit b10f8cf322
4 changed files with 94 additions and 43 deletions

View File

@@ -1,13 +1,17 @@
use crate::address_map::ModuleAddressMap; use crate::address_map::ModuleAddressMap;
use crate::compilation::{CodeAndJTOffsets, Compilation, Relocations}; use crate::compilation::{CodeAndJTOffsets, Compilation, Relocations};
use crate::module::Module; use crate::module::Module;
use cranelift_codegen::ir; use crate::module_environ::FunctionBodyData;
use cranelift_codegen::isa; use core::hash::Hasher;
use cranelift_codegen::{ir, isa};
use cranelift_entity::PrimaryMap;
use cranelift_wasm::DefinedFuncIndex;
use directories::ProjectDirs; use directories::ProjectDirs;
use lazy_static::lazy_static; use lazy_static::lazy_static;
use log::{debug, warn}; use log::{debug, warn};
use serde::de::{self, Deserialize, Deserializer, MapAccess, SeqAccess, Visitor}; use serde::de::{self, Deserialize, Deserializer, MapAccess, SeqAccess, Visitor};
use serde::ser::{self, Serialize, SerializeSeq, SerializeStruct, Serializer}; use serde::ser::{self, Serialize, SerializeSeq, SerializeStruct, Serializer};
use sha2::{Digest, Sha256};
use std::ffi::OsString; use std::ffi::OsString;
use std::fmt; use std::fmt;
use std::fs; use std::fs;
@@ -102,36 +106,38 @@ pub struct ModuleCacheData {
type ModuleCacheDataTupleType = (Compilation, Relocations, ModuleAddressMap); type ModuleCacheDataTupleType = (Compilation, Relocations, ModuleAddressMap);
struct Sha256Hasher(Sha256);
impl ModuleCacheEntry { impl ModuleCacheEntry {
pub fn new( pub fn new<'data>(
module: &Module, module: &Module,
function_body_inputs: &PrimaryMap<DefinedFuncIndex, FunctionBodyData<'data>>,
isa: &dyn isa::TargetIsa, isa: &dyn isa::TargetIsa,
compiler_name: &str, compiler_name: &str,
generate_debug_info: bool, generate_debug_info: bool,
) -> Self { ) -> Self {
let mod_cache_path = if conf::cache_enabled() { let mod_cache_path = if conf::cache_enabled() {
CACHE_DIR.clone().and_then(|p| { CACHE_DIR.clone().map(|p| {
module.hash.map(|hash| { let hash = Sha256Hasher::digest(module, function_body_inputs);
let compiler_dir = if cfg!(debug_assertions) { let compiler_dir = if cfg!(debug_assertions) {
format!( format!(
"{comp_name}-{comp_ver}-{comp_mtime}", "{comp_name}-{comp_ver}-{comp_mtime}",
comp_name = compiler_name, comp_name = compiler_name,
comp_ver = env!("GIT_REV"), comp_ver = env!("GIT_REV"),
comp_mtime = *SELF_MTIME, comp_mtime = *SELF_MTIME,
) )
} else { } else {
format!( format!(
"{comp_name}-{comp_ver}", "{comp_name}-{comp_ver}",
comp_name = compiler_name, comp_name = compiler_name,
comp_ver = env!("GIT_REV"), comp_ver = env!("GIT_REV"),
) )
}; };
p.join(isa.name()).join(compiler_dir).join(format!( p.join(isa.name()).join(compiler_dir).join(format!(
"mod-{mod_hash}{mod_dbg}", "mod-{mod_hash}{mod_dbg}",
mod_hash = base64::encode_config(&hash, base64::URL_SAFE_NO_PAD), // standard encoding uses '/' which can't be used for filename mod_hash = base64::encode_config(&hash, base64::URL_SAFE_NO_PAD), // standard encoding uses '/' which can't be used for filename
mod_dbg = if generate_debug_info { ".d" } else { "" }, mod_dbg = if generate_debug_info { ".d" } else { "" },
)) ))
})
}) })
} else { } else {
None None
@@ -227,6 +233,27 @@ impl ModuleCacheData {
} }
} }
impl Sha256Hasher {
pub fn digest<'data>(
module: &Module,
function_body_inputs: &PrimaryMap<DefinedFuncIndex, FunctionBodyData<'data>>,
) -> [u8; 32] {
let mut hasher = Self(Sha256::new());
module.hash_for_cache(function_body_inputs, &mut hasher);
hasher.0.result().into()
}
}
impl Hasher for Sha256Hasher {
fn finish(&self) -> u64 {
panic!("Sha256Hasher doesn't support finish!");
}
fn write(&mut self, bytes: &[u8]) {
self.0.input(bytes);
}
}
//-//////////////////////////////////////////////////////////////////// //-////////////////////////////////////////////////////////////////////
// Serialization and deserialization of type containing SecondaryMap // // Serialization and deserialization of type containing SecondaryMap //
//-//////////////////////////////////////////////////////////////////// //-////////////////////////////////////////////////////////////////////

View File

@@ -124,7 +124,13 @@ impl crate::compilation::Compiler for Cranelift {
isa: &dyn isa::TargetIsa, isa: &dyn isa::TargetIsa,
generate_debug_info: bool, generate_debug_info: bool,
) -> Result<(Compilation, Relocations, ModuleAddressMap), CompileError> { ) -> Result<(Compilation, Relocations, ModuleAddressMap), CompileError> {
let cache_entry = ModuleCacheEntry::new(module, isa, "cranelift", generate_debug_info); let cache_entry = ModuleCacheEntry::new(
module,
&function_body_inputs,
isa,
"cranelift",
generate_debug_info,
);
let data = match cache_entry.get_data() { let data = match cache_entry.get_data() {
Some(data) => data, Some(data) => data,

View File

@@ -1,6 +1,8 @@
//! Data structures for representing decoded wasm modules. //! Data structures for representing decoded wasm modules.
use crate::module_environ::FunctionBodyData;
use crate::tunables::Tunables; use crate::tunables::Tunables;
use core::hash::{Hash, Hasher};
use cranelift_codegen::ir; use cranelift_codegen::ir;
use cranelift_entity::{EntityRef, PrimaryMap}; use cranelift_entity::{EntityRef, PrimaryMap};
use cranelift_wasm::{ use cranelift_wasm::{
@@ -13,7 +15,7 @@ use std::string::String;
use std::vec::Vec; use std::vec::Vec;
/// A WebAssembly table initializer. /// A WebAssembly table initializer.
#[derive(Clone, Debug)] #[derive(Clone, Debug, Hash)]
pub struct TableElements { pub struct TableElements {
/// The index of a table to initialize. /// The index of a table to initialize.
pub table_index: TableIndex, pub table_index: TableIndex,
@@ -26,7 +28,7 @@ pub struct TableElements {
} }
/// An entity to export. /// An entity to export.
#[derive(Clone, Debug)] #[derive(Clone, Debug, Hash, PartialEq, Eq, PartialOrd, Ord)]
pub enum Export { pub enum Export {
/// Function export. /// Function export.
Function(FuncIndex), Function(FuncIndex),
@@ -39,7 +41,7 @@ pub enum Export {
} }
/// Implemenation styles for WebAssembly linear memory. /// Implemenation styles for WebAssembly linear memory.
#[derive(Debug, Clone)] #[derive(Debug, Clone, Hash)]
pub enum MemoryStyle { pub enum MemoryStyle {
/// The actual memory can be resized and moved. /// The actual memory can be resized and moved.
Dynamic, Dynamic,
@@ -77,7 +79,7 @@ impl MemoryStyle {
/// A WebAssembly linear memory description along with our chosen style for /// A WebAssembly linear memory description along with our chosen style for
/// implementing it. /// implementing it.
#[derive(Debug, Clone)] #[derive(Debug, Clone, Hash)]
pub struct MemoryPlan { pub struct MemoryPlan {
/// The WebAssembly linear memory description. /// The WebAssembly linear memory description.
pub memory: Memory, pub memory: Memory,
@@ -100,7 +102,7 @@ impl MemoryPlan {
} }
/// Implemenation styles for WebAssembly tables. /// Implemenation styles for WebAssembly tables.
#[derive(Debug, Clone)] #[derive(Debug, Clone, Hash)]
pub enum TableStyle { pub enum TableStyle {
/// Signatures are stored in the table and checked in the caller. /// Signatures are stored in the table and checked in the caller.
CallerChecksSignature, CallerChecksSignature,
@@ -115,7 +117,7 @@ impl TableStyle {
/// A WebAssembly table description along with our chosen style for /// A WebAssembly table description along with our chosen style for
/// implementing it. /// implementing it.
#[derive(Debug, Clone)] #[derive(Debug, Clone, Hash)]
pub struct TablePlan { pub struct TablePlan {
/// The WebAssembly table description. /// The WebAssembly table description.
pub table: cranelift_wasm::Table, pub table: cranelift_wasm::Table,
@@ -133,6 +135,7 @@ impl TablePlan {
/// A translated WebAssembly module, excluding the function bodies and /// A translated WebAssembly module, excluding the function bodies and
/// memory initializers. /// memory initializers.
// WARNING: when modifying, make sure that `hash_for_cache` is still valid!
#[derive(Debug)] #[derive(Debug)]
pub struct Module { pub struct Module {
/// Unprocessed signatures exactly as provided by `declare_signature()`. /// Unprocessed signatures exactly as provided by `declare_signature()`.
@@ -170,10 +173,6 @@ pub struct Module {
/// WebAssembly table initializers. /// WebAssembly table initializers.
pub table_elements: Vec<TableElements>, pub table_elements: Vec<TableElements>,
/// Hash of the source wasm code if this module is not synthesized.
/// TODO: this is temporary workaround. Will be replaced with derive macro.
pub hash: Option<[u8; 32]>,
} }
impl Module { impl Module {
@@ -192,7 +191,6 @@ impl Module {
exports: IndexMap::new(), exports: IndexMap::new(),
start_func: None, start_func: None,
table_elements: Vec::new(), table_elements: Vec::new(),
hash: None,
} }
} }
@@ -283,4 +281,29 @@ impl Module {
pub fn is_imported_global(&self, index: GlobalIndex) -> bool { pub fn is_imported_global(&self, index: GlobalIndex) -> bool {
index.index() < self.imported_globals.len() index.index() < self.imported_globals.len()
} }
/// Computes hash of the module for the purpose of caching.
pub fn hash_for_cache<'data, H>(
&self,
function_body_inputs: &PrimaryMap<DefinedFuncIndex, FunctionBodyData<'data>>,
state: &mut H,
) where
H: Hasher,
{
// There's no need to cache names (strings), start function
// and data initializers (for both memory and tables)
self.signatures.hash(state);
self.functions.hash(state);
self.table_plans.hash(state);
self.memory_plans.hash(state);
self.globals.hash(state);
// IndexMap (self.export) iterates over values in order of item inserts
// Let's actually sort the values.
let mut exports = self.exports.values().collect::<Vec<_>>();
exports.sort();
for val in exports {
val.hash(state);
}
function_body_inputs.hash(state);
}
} }

View File

@@ -10,12 +10,12 @@ use cranelift_wasm::{
self, translate_module, DefinedFuncIndex, FuncIndex, Global, GlobalIndex, Memory, MemoryIndex, self, translate_module, DefinedFuncIndex, FuncIndex, Global, GlobalIndex, Memory, MemoryIndex,
SignatureIndex, Table, TableIndex, WasmResult, SignatureIndex, Table, TableIndex, WasmResult,
}; };
use sha2::{Digest, Sha256};
use std::boxed::Box; use std::boxed::Box;
use std::string::String; use std::string::String;
use std::vec::Vec; use std::vec::Vec;
/// Contains function data: byte code and its offset in the module. /// Contains function data: byte code and its offset in the module.
#[derive(Hash)]
pub struct FunctionBodyData<'a> { pub struct FunctionBodyData<'a> {
/// Body byte code. /// Body byte code.
pub data: &'a [u8], pub data: &'a [u8],
@@ -80,11 +80,6 @@ impl<'data> ModuleEnvironment<'data> {
pub fn translate(mut self, data: &'data [u8]) -> WasmResult<ModuleTranslation<'data>> { pub fn translate(mut self, data: &'data [u8]) -> WasmResult<ModuleTranslation<'data>> {
translate_module(data, &mut self)?; translate_module(data, &mut self)?;
// TODO: this is temporary workaround and will be replaced with derive macro.
let mut hasher = Sha256::new();
hasher.input(data);
self.result.module.hash = Some(hasher.result().into());
Ok(self.result) Ok(self.result)
} }
} }