Encode modules with variable-length integers (#2322)

Update `Module::{serialize,deserialize}` to use variable-length integers
with `bincode` to make the output artifacts smaller. Locally this
reduces the size of #2318 from 160 to 110 MB, a 30% decrease in size!
Deserialization performance is slightly slower, but seemingly within the
range of noise locally for me.
This commit is contained in:
Alex Crichton
2020-10-26 09:52:29 -05:00
committed by GitHub
parent c15d9bd61b
commit 27233857c5

View File

@@ -2,6 +2,7 @@ use crate::frame_info::GlobalFrameInfoRegistration;
use crate::runtime::{Config, Engine}; use crate::runtime::{Config, Engine};
use crate::types::{EntityType, ExportType, ExternType, ImportType}; use crate::types::{EntityType, ExportType, ExternType, ImportType};
use anyhow::{bail, Context, Result}; use anyhow::{bail, Context, Result};
use bincode::Options;
use std::path::Path; use std::path::Path;
use std::sync::{Arc, Mutex}; use std::sync::{Arc, Mutex};
use wasmparser::Validator; use wasmparser::Validator;
@@ -293,9 +294,7 @@ impl Module {
self.compiled.to_compilation_artifacts(), self.compiled.to_compilation_artifacts(),
); );
let mut buffer = Vec::new(); let buffer = bincode_options().serialize(&artifacts)?;
bincode::serialize_into(&mut buffer, &artifacts)?;
Ok(buffer) Ok(buffer)
} }
@@ -311,9 +310,9 @@ impl Module {
pub fn deserialize(engine: &Engine, serialized: &[u8]) -> Result<Module> { pub fn deserialize(engine: &Engine, serialized: &[u8]) -> Result<Module> {
let expected_fingerprint = compiler_fingerprint(engine.config()); let expected_fingerprint = compiler_fingerprint(engine.config());
let (fingerprint, artifacts) = let (fingerprint, artifacts) = bincode_options()
bincode::deserialize_from::<_, (u64, CompilationArtifacts)>(serialized) .deserialize::<(u64, CompilationArtifacts)>(serialized)
.context("Deserialize compilation artifacts")?; .context("Deserialize compilation artifacts")?;
if fingerprint != expected_fingerprint { if fingerprint != expected_fingerprint {
bail!("Incompatible compilation artifact"); bail!("Incompatible compilation artifact");
} }
@@ -557,6 +556,17 @@ impl Module {
} }
} }
fn bincode_options() -> impl Options {
// Use a variable-length integer encoding instead of fixed length. The
// module shown on #2318 gets compressed from ~160MB to ~110MB simply using
// this, presumably because there's a lot of 8-byte integers which generally
// have small values. Local testing shows that the deserialization
// performance, while higher, is in the few-percent range. For huge size
// savings this seems worthwhile to lose a small percentage of
// deserialization performance.
bincode::DefaultOptions::new().with_varint_encoding()
}
fn compiler_fingerprint(config: &Config) -> u64 { fn compiler_fingerprint(config: &Config) -> u64 {
use std::hash::Hasher; use std::hash::Hasher;
let mut hasher = std::collections::hash_map::DefaultHasher::new(); let mut hasher = std::collections::hash_map::DefaultHasher::new();