Encode modules with variable-length integers (#2322)
Update `Module::{serialize,deserialize}` to use variable-length integers
with `bincode` to make the output artifacts smaller. Locally this
reduces the size of #2318 from 160 to 110 MB, a 30% decrease in size!
Deserialization performance is slightly slower, but seemingly within the
range of noise locally for me.
This commit is contained in:
@@ -2,6 +2,7 @@ use crate::frame_info::GlobalFrameInfoRegistration;
|
|||||||
use crate::runtime::{Config, Engine};
|
use crate::runtime::{Config, Engine};
|
||||||
use crate::types::{EntityType, ExportType, ExternType, ImportType};
|
use crate::types::{EntityType, ExportType, ExternType, ImportType};
|
||||||
use anyhow::{bail, Context, Result};
|
use anyhow::{bail, Context, Result};
|
||||||
|
use bincode::Options;
|
||||||
use std::path::Path;
|
use std::path::Path;
|
||||||
use std::sync::{Arc, Mutex};
|
use std::sync::{Arc, Mutex};
|
||||||
use wasmparser::Validator;
|
use wasmparser::Validator;
|
||||||
@@ -293,9 +294,7 @@ impl Module {
|
|||||||
self.compiled.to_compilation_artifacts(),
|
self.compiled.to_compilation_artifacts(),
|
||||||
);
|
);
|
||||||
|
|
||||||
let mut buffer = Vec::new();
|
let buffer = bincode_options().serialize(&artifacts)?;
|
||||||
bincode::serialize_into(&mut buffer, &artifacts)?;
|
|
||||||
|
|
||||||
Ok(buffer)
|
Ok(buffer)
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -311,9 +310,9 @@ impl Module {
|
|||||||
pub fn deserialize(engine: &Engine, serialized: &[u8]) -> Result<Module> {
|
pub fn deserialize(engine: &Engine, serialized: &[u8]) -> Result<Module> {
|
||||||
let expected_fingerprint = compiler_fingerprint(engine.config());
|
let expected_fingerprint = compiler_fingerprint(engine.config());
|
||||||
|
|
||||||
let (fingerprint, artifacts) =
|
let (fingerprint, artifacts) = bincode_options()
|
||||||
bincode::deserialize_from::<_, (u64, CompilationArtifacts)>(serialized)
|
.deserialize::<(u64, CompilationArtifacts)>(serialized)
|
||||||
.context("Deserialize compilation artifacts")?;
|
.context("Deserialize compilation artifacts")?;
|
||||||
if fingerprint != expected_fingerprint {
|
if fingerprint != expected_fingerprint {
|
||||||
bail!("Incompatible compilation artifact");
|
bail!("Incompatible compilation artifact");
|
||||||
}
|
}
|
||||||
@@ -557,6 +556,17 @@ impl Module {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn bincode_options() -> impl Options {
|
||||||
|
// Use a variable-length integer encoding instead of fixed length. The
|
||||||
|
// module shown on #2318 gets compressed from ~160MB to ~110MB simply using
|
||||||
|
// this, presumably because there's a lot of 8-byte integers which generally
|
||||||
|
// have small values. Local testing shows that the deserialization
|
||||||
|
// performance, while higher, is in the few-percent range. For huge size
|
||||||
|
// savings this seems worthwhile to lose a small percentage of
|
||||||
|
// deserialization performance.
|
||||||
|
bincode::DefaultOptions::new().with_varint_encoding()
|
||||||
|
}
|
||||||
|
|
||||||
fn compiler_fingerprint(config: &Config) -> u64 {
|
fn compiler_fingerprint(config: &Config) -> u64 {
|
||||||
use std::hash::Hasher;
|
use std::hash::Hasher;
|
||||||
let mut hasher = std::collections::hash_map::DefaultHasher::new();
|
let mut hasher = std::collections::hash_map::DefaultHasher::new();
|
||||||
|
|||||||
Reference in New Issue
Block a user