Simple module compilation cache (#203)

* Simple module compilation cache

* Fix base64 encoding bug

* Use warn! everywhere in cache system

* Remove unused import

* Temporary workaround for long path on Windows

* Remove unused import for non-windows builds

* Add command line argument to enable cache system + apply minor review feedback
This commit is contained in:
Artur Jamro
2019-07-25 16:16:10 -07:00
committed by Dan Gohman
parent 17e4528648
commit 165dc4944d
20 changed files with 502 additions and 110 deletions

View File

@@ -4,10 +4,11 @@
use cranelift_codegen::ir;
use cranelift_entity::PrimaryMap;
use cranelift_wasm::DefinedFuncIndex;
use serde::{Deserialize, Serialize};
use std::vec::Vec;
/// Single source location to generated address mapping.
#[derive(Debug, Clone)]
#[derive(Serialize, Deserialize, Debug, Clone)]
pub struct InstructionAddressMap {
/// Original source location.
pub srcloc: ir::SourceLoc,
@@ -20,7 +21,7 @@ pub struct InstructionAddressMap {
}
/// Function and its instructions addresses mappings.
#[derive(Debug, Clone)]
#[derive(Serialize, Deserialize, Debug, Clone)]
pub struct FunctionAddressMap {
/// Instructions maps.
/// The array is sorted by the InstructionAddressMap::code_offset field.

View File

@@ -0,0 +1,342 @@
use crate::address_map::ModuleAddressMap;
use crate::compilation::{CodeAndJTOffsets, Compilation, Relocations};
use crate::module::Module;
use cranelift_codegen::ir;
use cranelift_codegen::isa;
use directories::ProjectDirs;
use lazy_static::lazy_static;
use log::warn;
use serde::de::{self, Deserialize, Deserializer, MapAccess, SeqAccess, Visitor};
use serde::ser::{self, Serialize, SerializeSeq, SerializeStruct, Serializer};
#[cfg(windows)]
use std::ffi::OsString;
use std::fmt;
use std::fs;
use std::io;
#[cfg(windows)]
use std::path::Path;
use std::path::PathBuf;
/// Module for configuring the cache system.
pub mod conf {
use spin::Once;
// Private static, so only internal function can access it.
static CACHE_ENABLED: Once<bool> = Once::new();
/// Returns true if and only if the cache is enabled.
pub fn cache_enabled() -> bool {
// Not everyone knows about the cache system, i.e. the tests,
// so the default is false.
*CACHE_ENABLED.call_once(|| false)
}
/// Initializes the cache system. Should be called exactly once,
/// and before using the cache system. Otherwise it can panic.
pub fn init(enabled: bool) {
// init() should be called exactly once
assert!(CACHE_ENABLED.r#try().is_none());
let val = *CACHE_ENABLED.call_once(|| enabled);
// But multiple threads can pass the first assertion, so let's guarantee consistency:
assert!(val == enabled);
}
}
lazy_static! {
static ref CACHE_DIR: Option<PathBuf> =
match ProjectDirs::from("", "CraneStation", "wasmtime") {
Some(proj_dirs) => {
let cache_dir = proj_dirs.cache_dir();
// Temporary workaround for: https://github.com/rust-lang/rust/issues/32689
#[cfg(windows)]
let mut long_path = OsString::from("\\\\?\\");
#[cfg(windows)]
let cache_dir = {
if cache_dir.starts_with("\\\\?\\") {
cache_dir
}
else {
long_path.push(cache_dir.as_os_str());
Path::new(&long_path)
}
};
match fs::create_dir_all(cache_dir) {
Ok(()) => (),
Err(err) => warn!("Unable to create cache directory, failed with: {}", err),
};
Some(cache_dir.to_path_buf())
}
None => {
warn!("Unable to find cache directory");
None
}
};
}
pub struct ModuleCacheEntry {
mod_cache_path: Option<PathBuf>,
}
#[derive(serde::Serialize, serde::Deserialize)]
pub struct ModuleCacheData {
compilation: Compilation,
relocations: Relocations,
address_transforms: ModuleAddressMap,
}
type ModuleCacheDataTupleType = (Compilation, Relocations, ModuleAddressMap);
impl ModuleCacheEntry {
pub fn new(module: &Module, _isa: &dyn isa::TargetIsa, _generate_debug_info: bool) -> Self {
// TODO: cache directory hierarchy with isa name, compiler name & git revision, and files with flag if debug symbols are available
let mod_cache_path = if conf::cache_enabled() {
CACHE_DIR.clone().and_then(|p| {
module.hash.map(|hash| {
p.join(format!(
"mod-{}",
base64::encode_config(&hash, base64::URL_SAFE_NO_PAD) // standard encoding uses '/' which can't be used for filename
))
})
})
} else {
None
};
ModuleCacheEntry { mod_cache_path }
}
pub fn get_data(&self) -> Option<ModuleCacheData> {
if let Some(p) = &self.mod_cache_path {
match fs::read(p) {
Ok(cache_bytes) => match bincode::deserialize(&cache_bytes[..]) {
Ok(data) => Some(data),
Err(err) => {
warn!("Failed to deserialize cached code: {}", err);
None
}
},
Err(_) => None,
}
} else {
None
}
}
pub fn update_data(&self, data: &ModuleCacheData) {
if let Some(p) = &self.mod_cache_path {
let cache_buf = match bincode::serialize(&data) {
Ok(data) => data,
Err(err) => {
warn!("Failed to serialize cached code: {}", err);
return;
}
};
match fs::write(p, &cache_buf) {
Ok(()) => (),
Err(err) => {
warn!(
"Failed to write cached code to disk, path: {}, message: {}",
p.display(),
err
);
match fs::remove_file(p) {
Ok(()) => (),
Err(err) => {
if err.kind() != io::ErrorKind::NotFound {
warn!(
"Failed to cleanup invalid cache, path: {}, message: {}",
p.display(),
err
);
}
}
}
}
}
}
}
}
impl ModuleCacheData {
pub fn from_tuple(data: ModuleCacheDataTupleType) -> Self {
Self {
compilation: data.0,
relocations: data.1,
address_transforms: data.2,
}
}
pub fn to_tuple(self) -> ModuleCacheDataTupleType {
(self.compilation, self.relocations, self.address_transforms)
}
}
//-////////////////////////////////////////////////////////////////////
// Serialization and deserialization of type containing SecondaryMap //
//-////////////////////////////////////////////////////////////////////
enum JtOffsetsWrapper<'a> {
Ref(&'a ir::JumpTableOffsets), // for serialization
Data(ir::JumpTableOffsets), // for deserialization
}
impl Serialize for CodeAndJTOffsets {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: Serializer,
{
let mut cajto = serializer.serialize_struct("CodeAndJTOffsets", 2)?;
cajto.serialize_field("body", &self.body)?;
cajto.serialize_field("jt_offsets", &JtOffsetsWrapper::Ref(&self.jt_offsets))?;
cajto.end()
}
}
impl<'de> Deserialize<'de> for CodeAndJTOffsets {
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where
D: Deserializer<'de>,
{
#[derive(serde::Deserialize)]
#[serde(field_identifier, rename_all = "lowercase")]
enum Field {
Body,
JtOffsets,
};
struct CodeAndJTOffsetsVisitor;
impl<'de> Visitor<'de> for CodeAndJTOffsetsVisitor {
type Value = CodeAndJTOffsets;
fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
formatter.write_str("struct CodeAndJTOffsets")
}
fn visit_seq<V>(self, mut seq: V) -> Result<Self::Value, V::Error>
where
V: SeqAccess<'de>,
{
let body = seq
.next_element()?
.ok_or_else(|| de::Error::invalid_length(0, &self))?;
let jt_offsets = seq
.next_element()?
.ok_or_else(|| de::Error::invalid_length(1, &self))?;
match jt_offsets {
JtOffsetsWrapper::Data(jt_offsets) => Ok(CodeAndJTOffsets { body, jt_offsets }),
JtOffsetsWrapper::Ref(_) => Err(de::Error::custom(
"Received invalid variant of JtOffsetsWrapper",
)),
}
}
fn visit_map<V>(self, mut map: V) -> Result<Self::Value, V::Error>
where
V: MapAccess<'de>,
{
let mut body = None;
let mut jt_offsets = None;
while let Some(key) = map.next_key()? {
match key {
Field::Body => {
if body.is_some() {
return Err(de::Error::duplicate_field("body"));
}
body = Some(map.next_value()?);
}
Field::JtOffsets => {
if jt_offsets.is_some() {
return Err(de::Error::duplicate_field("jt_offsets"));
}
jt_offsets = Some(map.next_value()?);
}
}
}
let body = body.ok_or_else(|| de::Error::missing_field("body"))?;
let jt_offsets =
jt_offsets.ok_or_else(|| de::Error::missing_field("jt_offsets"))?;
match jt_offsets {
JtOffsetsWrapper::Data(jt_offsets) => Ok(CodeAndJTOffsets { body, jt_offsets }),
JtOffsetsWrapper::Ref(_) => Err(de::Error::custom(
"Received invalid variant of JtOffsetsWrapper",
)),
}
}
}
const FIELDS: &'static [&'static str] = &["body", "jt_offsets"];
deserializer.deserialize_struct("CodeAndJTOffsets", FIELDS, CodeAndJTOffsetsVisitor)
}
}
impl Serialize for JtOffsetsWrapper<'_> {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: Serializer,
{
match self {
JtOffsetsWrapper::Ref(data) => {
// TODO: bincode encodes option as "byte for Some/None" and then optionally the content
// TODO: we can actually optimize it by encoding manually bitmask, then elements
let default_val = data.get_default();
let mut seq = serializer.serialize_seq(Some(1 + data.len()))?;
seq.serialize_element(&Some(default_val))?;
for e in data.values() {
let some_e = Some(e);
seq.serialize_element(if e == default_val { &None } else { &some_e })?;
}
seq.end()
}
JtOffsetsWrapper::Data(_) => Err(ser::Error::custom(
"Received invalid variant of JtOffsetsWrapper",
)),
}
}
}
impl<'de> Deserialize<'de> for JtOffsetsWrapper<'_> {
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where
D: Deserializer<'de>,
{
struct JtOffsetsWrapperVisitor;
impl<'de> Visitor<'de> for JtOffsetsWrapperVisitor {
type Value = JtOffsetsWrapper<'static>;
fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
formatter.write_str("struct JtOffsetsWrapper")
}
fn visit_seq<A>(self, mut seq: A) -> Result<Self::Value, A::Error>
where
A: SeqAccess<'de>,
{
match seq.next_element()? {
Some(Some(default_val)) => {
let mut m = cranelift_entity::SecondaryMap::with_default(default_val);
let mut idx = 0;
while let Some(val) = seq.next_element()? {
let val: Option<_> = val; // compiler can't infer the type, and this line is needed
match ir::JumpTable::with_number(idx) {
Some(jt_idx) => m[jt_idx] = val.unwrap_or(default_val),
None => {
return Err(serde::de::Error::custom(
"Invalid JumpTable reference",
))
}
};
idx += 1;
}
Ok(JtOffsetsWrapper::Data(m))
}
_ => Err(serde::de::Error::custom("Default value required")),
}
}
}
deserializer.deserialize_seq(JtOffsetsWrapperVisitor {})
}
}

View File

@@ -7,6 +7,7 @@ use crate::module_environ::FunctionBodyData;
use cranelift_codegen::{binemit, ir, isa, CodegenError};
use cranelift_entity::PrimaryMap;
use cranelift_wasm::{DefinedFuncIndex, FuncIndex, WasmError};
use serde::{Deserialize, Serialize};
use std::ops::Range;
use std::vec::Vec;
@@ -23,7 +24,7 @@ pub struct CodeAndJTOffsets {
type Functions = PrimaryMap<DefinedFuncIndex, CodeAndJTOffsets>;
/// The result of compiling a WebAssembly module's functions.
#[derive(Debug)]
#[derive(Deserialize, Serialize, Debug)]
pub struct Compilation {
/// Compiled machine code for the function bodies.
functions: Functions,
@@ -94,7 +95,7 @@ impl<'a> Iterator for Iter<'a> {
}
/// A record of a relocation to perform.
#[derive(Debug, Clone)]
#[derive(Serialize, Deserialize, Debug, Clone)]
pub struct Relocation {
/// The relocation code.
pub reloc: binemit::Reloc,
@@ -107,7 +108,7 @@ pub struct Relocation {
}
/// Destination function. Can be either user function or some special one, like `memory.grow`.
#[derive(Debug, Copy, Clone)]
#[derive(Serialize, Deserialize, Debug, Copy, Clone)]
pub enum RelocationTarget {
/// The user function index.
UserFunc(FuncIndex),

View File

@@ -1,6 +1,7 @@
//! Support for compiling with Cranelift.
use crate::address_map::{FunctionAddressMap, InstructionAddressMap, ModuleAddressMap};
use crate::cache::{ModuleCacheData, ModuleCacheEntry};
use crate::compilation::{
CodeAndJTOffsets, Compilation, CompileError, Relocation, RelocationTarget, Relocations,
};
@@ -123,73 +124,90 @@ impl crate::compilation::Compiler for Cranelift {
isa: &dyn isa::TargetIsa,
generate_debug_info: bool,
) -> Result<(Compilation, Relocations, ModuleAddressMap), CompileError> {
let mut functions = PrimaryMap::with_capacity(function_body_inputs.len());
let mut relocations = PrimaryMap::with_capacity(function_body_inputs.len());
let mut address_transforms = PrimaryMap::with_capacity(function_body_inputs.len());
let cache_entry = ModuleCacheEntry::new(module, isa, generate_debug_info);
function_body_inputs
.into_iter()
.collect::<Vec<(DefinedFuncIndex, &FunctionBodyData<'data>)>>()
.par_iter()
.map(|(i, input)| {
let func_index = module.func_index(*i);
let mut context = Context::new();
context.func.name = get_func_name(func_index);
context.func.signature = module.signatures[module.functions[func_index]].clone();
let data = match cache_entry.get_data() {
Some(data) => data,
None => {
let mut functions = PrimaryMap::with_capacity(function_body_inputs.len());
let mut relocations = PrimaryMap::with_capacity(function_body_inputs.len());
let mut address_transforms = PrimaryMap::with_capacity(function_body_inputs.len());
let mut trans = FuncTranslator::new();
trans
.translate(
input.data,
input.module_offset,
&mut context.func,
&mut FuncEnvironment::new(isa.frontend_config(), module),
)
.map_err(CompileError::Wasm)?;
function_body_inputs
.into_iter()
.collect::<Vec<(DefinedFuncIndex, &FunctionBodyData<'data>)>>()
.par_iter()
.map(|(i, input)| {
let func_index = module.func_index(*i);
let mut context = Context::new();
context.func.name = get_func_name(func_index);
context.func.signature =
module.signatures[module.functions[func_index]].clone();
let mut code_buf: Vec<u8> = Vec::new();
let mut reloc_sink = RelocSink::new(func_index);
let mut trap_sink = binemit::NullTrapSink {};
context
.compile_and_emit(isa, &mut code_buf, &mut reloc_sink, &mut trap_sink)
.map_err(CompileError::Codegen)?;
let mut trans = FuncTranslator::new();
trans
.translate(
input.data,
input.module_offset,
&mut context.func,
&mut FuncEnvironment::new(isa.frontend_config(), module),
)
.map_err(CompileError::Wasm)?;
let jt_offsets = context.func.jt_offsets.clone();
let mut code_buf: Vec<u8> = Vec::new();
let mut reloc_sink = RelocSink::new(func_index);
let mut trap_sink = binemit::NullTrapSink {};
context
.compile_and_emit(isa, &mut code_buf, &mut reloc_sink, &mut trap_sink)
.map_err(CompileError::Codegen)?;
let address_transform = if generate_debug_info {
let body_len = code_buf.len();
let at = get_address_transform(&context, isa);
let jt_offsets = context.func.jt_offsets.clone();
Some(FunctionAddressMap {
instructions: at,
body_offset: 0,
body_len,
let address_transform = if generate_debug_info {
let body_len = code_buf.len();
let at = get_address_transform(&context, isa);
Some(FunctionAddressMap {
instructions: at,
body_offset: 0,
body_len,
})
} else {
None
};
Ok((
code_buf,
jt_offsets,
reloc_sink.func_relocs,
address_transform,
))
})
} else {
None
};
.collect::<Result<Vec<_>, CompileError>>()?
.into_iter()
.for_each(|(function, func_jt_offsets, relocs, address_transform)| {
functions.push(CodeAndJTOffsets {
body: function,
jt_offsets: func_jt_offsets,
});
relocations.push(relocs);
if let Some(address_transform) = address_transform {
address_transforms.push(address_transform);
}
});
Ok((
code_buf,
jt_offsets,
reloc_sink.func_relocs,
address_transform,
))
})
.collect::<Result<Vec<_>, CompileError>>()?
.into_iter()
.for_each(|(function, func_jt_offsets, relocs, address_transform)| {
functions.push(CodeAndJTOffsets {
body: function,
jt_offsets: func_jt_offsets,
});
relocations.push(relocs);
if let Some(address_transform) = address_transform {
address_transforms.push(address_transform);
}
});
// TODO: Reorganize where we create the Vec for the resolved imports.
// TODO: Reorganize where we create the Vec for the resolved imports.
Ok((Compilation::new(functions), relocations, address_transforms))
let data = ModuleCacheData::from_tuple((
Compilation::new(functions),
relocations,
address_transforms,
));
cache_entry.update_data(&data);
data
}
};
Ok(data.to_tuple())
}
}

View File

@@ -45,11 +45,14 @@ mod module_environ;
mod tunables;
mod vmoffsets;
mod cache;
pub mod cranelift;
#[cfg(feature = "lightbeam")]
pub mod lightbeam;
pub use crate::address_map::{FunctionAddressMap, InstructionAddressMap, ModuleAddressMap};
pub use crate::cache::conf as cache_conf;
pub use crate::compilation::{
Compilation, CompileError, Compiler, Relocation, RelocationTarget, Relocations,
};

View File

@@ -170,6 +170,10 @@ pub struct Module {
/// WebAssembly table initializers.
pub table_elements: Vec<TableElements>,
/// Hash of the source wasm code if this module is not synthesized.
/// TODO: this is temporary workaround. Will be replaced with derive macro.
pub hash: Option<[u8; 32]>,
}
impl Module {
@@ -188,6 +192,7 @@ impl Module {
exports: IndexMap::new(),
start_func: None,
table_elements: Vec::new(),
hash: None,
}
}

View File

@@ -10,6 +10,7 @@ use cranelift_wasm::{
self, translate_module, DefinedFuncIndex, FuncIndex, Global, GlobalIndex, Memory, MemoryIndex,
SignatureIndex, Table, TableIndex, WasmResult,
};
use sha2::{Digest, Sha256};
use std::boxed::Box;
use std::string::String;
use std::vec::Vec;
@@ -79,6 +80,11 @@ impl<'data> ModuleEnvironment<'data> {
pub fn translate(mut self, data: &'data [u8]) -> WasmResult<ModuleTranslation<'data>> {
translate_module(data, &mut self)?;
// TODO: this is temporary workaround and will be replaced with derive macro.
let mut hasher = Sha256::new();
hasher.input(data);
self.result.module.hash = Some(hasher.result().into());
Ok(self.result)
}
}