From 27233857c534a3ae3e9f8fa3a7a3a0fb563e1f4d Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Mon, 26 Oct 2020 09:52:29 -0500 Subject: [PATCH] Encode modules with variable-length integers (#2322) Update `Module::{serialize,deserialize}` to use variable-length integers with `bincode` to make the output artifacts smaller. Locally this reduces the size of #2318 from 160 to 110 MB, a 30% decrease in size! Deserialization performance is slightly slower, but seemingly within the range of noise locally for me. --- crates/wasmtime/src/module.rs | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/crates/wasmtime/src/module.rs b/crates/wasmtime/src/module.rs index 89c4c12886..1b4967dfd4 100644 --- a/crates/wasmtime/src/module.rs +++ b/crates/wasmtime/src/module.rs @@ -2,6 +2,7 @@ use crate::frame_info::GlobalFrameInfoRegistration; use crate::runtime::{Config, Engine}; use crate::types::{EntityType, ExportType, ExternType, ImportType}; use anyhow::{bail, Context, Result}; +use bincode::Options; use std::path::Path; use std::sync::{Arc, Mutex}; use wasmparser::Validator; @@ -293,9 +294,7 @@ impl Module { self.compiled.to_compilation_artifacts(), ); - let mut buffer = Vec::new(); - bincode::serialize_into(&mut buffer, &artifacts)?; - + let buffer = bincode_options().serialize(&artifacts)?; Ok(buffer) } @@ -311,9 +310,9 @@ impl Module { pub fn deserialize(engine: &Engine, serialized: &[u8]) -> Result { let expected_fingerprint = compiler_fingerprint(engine.config()); - let (fingerprint, artifacts) = - bincode::deserialize_from::<_, (u64, CompilationArtifacts)>(serialized) - .context("Deserialize compilation artifacts")?; + let (fingerprint, artifacts) = bincode_options() + .deserialize::<(u64, CompilationArtifacts)>(serialized) + .context("Deserialize compilation artifacts")?; if fingerprint != expected_fingerprint { bail!("Incompatible compilation artifact"); } @@ -557,6 +556,17 @@ impl Module { } } +fn bincode_options() -> impl Options { + // Use a variable-length integer encoding instead of fixed length. The + // module shown on #2318 gets compressed from ~160MB to ~110MB simply using + // this, presumably because there's a lot of 8-byte integers which generally + // have small values. Local testing shows that the deserialization + // performance, while higher, is in the few-percent range. For huge size + // savings this seems worthwhile to lose a small percentage of + // deserialization performance. + bincode::DefaultOptions::new().with_varint_encoding() +} + fn compiler_fingerprint(config: &Config) -> u64 { use std::hash::Hasher; let mut hasher = std::collections::hash_map::DefaultHasher::new();