Partial hashing of module for faster caching (#221)

* Simple module compilation cache

* Fix base64 encoding bug

* Use warn! everywhere in cache system

* Remove unused import

* Temporary workaround for long path on Windows

* Remove unused import for non-windows builds

* Add command line argument to enable cache system + apply minor review feedback

* Initial implementation of partial module hashing

* Proper module hashing for the cache

* Use newer version of cranelift
This commit is contained in:
Artur Jamro
2019-08-06 17:19:26 -07:00
committed by Dan Gohman
parent 17d676ecbc
commit b10f8cf322
4 changed files with 94 additions and 43 deletions

View File

@@ -1,13 +1,17 @@
use crate::address_map::ModuleAddressMap;
use crate::compilation::{CodeAndJTOffsets, Compilation, Relocations};
use crate::module::Module;
use cranelift_codegen::ir;
use cranelift_codegen::isa;
use crate::module_environ::FunctionBodyData;
use core::hash::Hasher;
use cranelift_codegen::{ir, isa};
use cranelift_entity::PrimaryMap;
use cranelift_wasm::DefinedFuncIndex;
use directories::ProjectDirs;
use lazy_static::lazy_static;
use log::{debug, warn};
use serde::de::{self, Deserialize, Deserializer, MapAccess, SeqAccess, Visitor};
use serde::ser::{self, Serialize, SerializeSeq, SerializeStruct, Serializer};
use sha2::{Digest, Sha256};
use std::ffi::OsString;
use std::fmt;
use std::fs;
@@ -102,16 +106,19 @@ pub struct ModuleCacheData {
type ModuleCacheDataTupleType = (Compilation, Relocations, ModuleAddressMap);
struct Sha256Hasher(Sha256);
impl ModuleCacheEntry {
pub fn new(
pub fn new<'data>(
module: &Module,
function_body_inputs: &PrimaryMap<DefinedFuncIndex, FunctionBodyData<'data>>,
isa: &dyn isa::TargetIsa,
compiler_name: &str,
generate_debug_info: bool,
) -> Self {
let mod_cache_path = if conf::cache_enabled() {
CACHE_DIR.clone().and_then(|p| {
module.hash.map(|hash| {
CACHE_DIR.clone().map(|p| {
let hash = Sha256Hasher::digest(module, function_body_inputs);
let compiler_dir = if cfg!(debug_assertions) {
format!(
"{comp_name}-{comp_ver}-{comp_mtime}",
@@ -132,7 +139,6 @@ impl ModuleCacheEntry {
mod_dbg = if generate_debug_info { ".d" } else { "" },
))
})
})
} else {
None
};
@@ -227,6 +233,27 @@ impl ModuleCacheData {
}
}
impl Sha256Hasher {
pub fn digest<'data>(
module: &Module,
function_body_inputs: &PrimaryMap<DefinedFuncIndex, FunctionBodyData<'data>>,
) -> [u8; 32] {
let mut hasher = Self(Sha256::new());
module.hash_for_cache(function_body_inputs, &mut hasher);
hasher.0.result().into()
}
}
impl Hasher for Sha256Hasher {
fn finish(&self) -> u64 {
panic!("Sha256Hasher doesn't support finish!");
}
fn write(&mut self, bytes: &[u8]) {
self.0.input(bytes);
}
}
//-////////////////////////////////////////////////////////////////////
// Serialization and deserialization of type containing SecondaryMap //
//-////////////////////////////////////////////////////////////////////

View File

@@ -124,7 +124,13 @@ impl crate::compilation::Compiler for Cranelift {
isa: &dyn isa::TargetIsa,
generate_debug_info: bool,
) -> Result<(Compilation, Relocations, ModuleAddressMap), CompileError> {
let cache_entry = ModuleCacheEntry::new(module, isa, "cranelift", generate_debug_info);
let cache_entry = ModuleCacheEntry::new(
module,
&function_body_inputs,
isa,
"cranelift",
generate_debug_info,
);
let data = match cache_entry.get_data() {
Some(data) => data,

View File

@@ -1,6 +1,8 @@
//! Data structures for representing decoded wasm modules.
use crate::module_environ::FunctionBodyData;
use crate::tunables::Tunables;
use core::hash::{Hash, Hasher};
use cranelift_codegen::ir;
use cranelift_entity::{EntityRef, PrimaryMap};
use cranelift_wasm::{
@@ -13,7 +15,7 @@ use std::string::String;
use std::vec::Vec;
/// A WebAssembly table initializer.
#[derive(Clone, Debug)]
#[derive(Clone, Debug, Hash)]
pub struct TableElements {
/// The index of a table to initialize.
pub table_index: TableIndex,
@@ -26,7 +28,7 @@ pub struct TableElements {
}
/// An entity to export.
#[derive(Clone, Debug)]
#[derive(Clone, Debug, Hash, PartialEq, Eq, PartialOrd, Ord)]
pub enum Export {
/// Function export.
Function(FuncIndex),
@@ -39,7 +41,7 @@ pub enum Export {
}
/// Implemenation styles for WebAssembly linear memory.
#[derive(Debug, Clone)]
#[derive(Debug, Clone, Hash)]
pub enum MemoryStyle {
/// The actual memory can be resized and moved.
Dynamic,
@@ -77,7 +79,7 @@ impl MemoryStyle {
/// A WebAssembly linear memory description along with our chosen style for
/// implementing it.
#[derive(Debug, Clone)]
#[derive(Debug, Clone, Hash)]
pub struct MemoryPlan {
/// The WebAssembly linear memory description.
pub memory: Memory,
@@ -100,7 +102,7 @@ impl MemoryPlan {
}
/// Implemenation styles for WebAssembly tables.
#[derive(Debug, Clone)]
#[derive(Debug, Clone, Hash)]
pub enum TableStyle {
/// Signatures are stored in the table and checked in the caller.
CallerChecksSignature,
@@ -115,7 +117,7 @@ impl TableStyle {
/// A WebAssembly table description along with our chosen style for
/// implementing it.
#[derive(Debug, Clone)]
#[derive(Debug, Clone, Hash)]
pub struct TablePlan {
/// The WebAssembly table description.
pub table: cranelift_wasm::Table,
@@ -133,6 +135,7 @@ impl TablePlan {
/// A translated WebAssembly module, excluding the function bodies and
/// memory initializers.
// WARNING: when modifying, make sure that `hash_for_cache` is still valid!
#[derive(Debug)]
pub struct Module {
/// Unprocessed signatures exactly as provided by `declare_signature()`.
@@ -170,10 +173,6 @@ pub struct Module {
/// WebAssembly table initializers.
pub table_elements: Vec<TableElements>,
/// Hash of the source wasm code if this module is not synthesized.
/// TODO: this is temporary workaround. Will be replaced with derive macro.
pub hash: Option<[u8; 32]>,
}
impl Module {
@@ -192,7 +191,6 @@ impl Module {
exports: IndexMap::new(),
start_func: None,
table_elements: Vec::new(),
hash: None,
}
}
@@ -283,4 +281,29 @@ impl Module {
pub fn is_imported_global(&self, index: GlobalIndex) -> bool {
index.index() < self.imported_globals.len()
}
/// Computes hash of the module for the purpose of caching.
pub fn hash_for_cache<'data, H>(
&self,
function_body_inputs: &PrimaryMap<DefinedFuncIndex, FunctionBodyData<'data>>,
state: &mut H,
) where
H: Hasher,
{
// There's no need to cache names (strings), start function
// and data initializers (for both memory and tables)
self.signatures.hash(state);
self.functions.hash(state);
self.table_plans.hash(state);
self.memory_plans.hash(state);
self.globals.hash(state);
// IndexMap (self.export) iterates over values in order of item inserts
// Let's actually sort the values.
let mut exports = self.exports.values().collect::<Vec<_>>();
exports.sort();
for val in exports {
val.hash(state);
}
function_body_inputs.hash(state);
}
}

View File

@@ -10,12 +10,12 @@ use cranelift_wasm::{
self, translate_module, DefinedFuncIndex, FuncIndex, Global, GlobalIndex, Memory, MemoryIndex,
SignatureIndex, Table, TableIndex, WasmResult,
};
use sha2::{Digest, Sha256};
use std::boxed::Box;
use std::string::String;
use std::vec::Vec;
/// Contains function data: byte code and its offset in the module.
#[derive(Hash)]
pub struct FunctionBodyData<'a> {
/// Body byte code.
pub data: &'a [u8],
@@ -80,11 +80,6 @@ impl<'data> ModuleEnvironment<'data> {
pub fn translate(mut self, data: &'data [u8]) -> WasmResult<ModuleTranslation<'data>> {
translate_module(data, &mut self)?;
// TODO: this is temporary workaround and will be replaced with derive macro.
let mut hasher = Sha256::new();
hasher.input(data);
self.result.module.hash = Some(hasher.result().into());
Ok(self.result)
}
}