Further compress the in-memory representation of address maps (#2324)
This commit reduces the size of `InstructionAddressMap` from 24 bytes to 8 bytes by dropping the `code_len` field and reducing `code_offset` to `u32` instead of `usize`. The intention is to primarily make the in-memory version take up less space, and the hunch is that the `code_len` is largely not necessary since most entries in this map are always adjacent to one another. The `code_len` field is now implied by the `code_offset` field of the next entry in the map. This isn't as big of an improvement to serialized module size as #2321 or #2322, primarily because of the switch to variable-length encoding. Despite this though it shaves about 10MB off the encoded size of the module from #2318
This commit is contained in:
@@ -229,19 +229,28 @@ impl StackMapSink {
|
|||||||
fn get_function_address_map<'data>(
|
fn get_function_address_map<'data>(
|
||||||
context: &Context,
|
context: &Context,
|
||||||
data: &FunctionBodyData<'data>,
|
data: &FunctionBodyData<'data>,
|
||||||
body_len: usize,
|
body_len: u32,
|
||||||
isa: &dyn isa::TargetIsa,
|
isa: &dyn isa::TargetIsa,
|
||||||
) -> FunctionAddressMap {
|
) -> FunctionAddressMap {
|
||||||
|
// Generate artificial srcloc for function start/end to identify boundary
|
||||||
|
// within module.
|
||||||
|
let data = data.body.get_binary_reader();
|
||||||
|
let offset = data.original_position();
|
||||||
|
let len = data.bytes_remaining();
|
||||||
|
assert!((offset + len) <= u32::max_value() as usize);
|
||||||
|
let start_srcloc = ir::SourceLoc::new(offset as u32);
|
||||||
|
let end_srcloc = ir::SourceLoc::new((offset + len) as u32);
|
||||||
|
|
||||||
let instructions = if let Some(ref mcr) = &context.mach_compile_result {
|
let instructions = if let Some(ref mcr) = &context.mach_compile_result {
|
||||||
// New-style backend: we have a `MachCompileResult` that will give us `MachSrcLoc` mapping
|
// New-style backend: we have a `MachCompileResult` that will give us `MachSrcLoc` mapping
|
||||||
// tuples.
|
// tuples.
|
||||||
collect_address_maps(mcr.buffer.get_srclocs_sorted().into_iter().map(
|
collect_address_maps(
|
||||||
|&MachSrcLoc { start, end, loc }| InstructionAddressMap {
|
body_len,
|
||||||
srcloc: loc,
|
mcr.buffer
|
||||||
code_offset: start as usize,
|
.get_srclocs_sorted()
|
||||||
code_len: (end - start) as usize,
|
.into_iter()
|
||||||
},
|
.map(|&MachSrcLoc { start, end, loc }| (loc, start, (end - start))),
|
||||||
))
|
)
|
||||||
} else {
|
} else {
|
||||||
// Old-style backend: we need to traverse the instruction/encoding info in the function.
|
// Old-style backend: we need to traverse the instruction/encoding info in the function.
|
||||||
let func = &context.func;
|
let func = &context.func;
|
||||||
@@ -250,28 +259,16 @@ fn get_function_address_map<'data>(
|
|||||||
|
|
||||||
let encinfo = isa.encoding_info();
|
let encinfo = isa.encoding_info();
|
||||||
collect_address_maps(
|
collect_address_maps(
|
||||||
|
body_len,
|
||||||
blocks
|
blocks
|
||||||
.into_iter()
|
.into_iter()
|
||||||
.flat_map(|block| func.inst_offsets(block, &encinfo))
|
.flat_map(|block| func.inst_offsets(block, &encinfo))
|
||||||
.map(|(offset, inst, size)| InstructionAddressMap {
|
.map(|(offset, inst, size)| (func.srclocs[inst], offset, size)),
|
||||||
srcloc: func.srclocs[inst],
|
|
||||||
code_offset: offset as usize,
|
|
||||||
code_len: size as usize,
|
|
||||||
}),
|
|
||||||
)
|
)
|
||||||
};
|
};
|
||||||
|
|
||||||
// Generate artificial srcloc for function start/end to identify boundary
|
|
||||||
// within module. Similar to FuncTranslator::cur_srcloc(): it will wrap around
|
|
||||||
// if byte code is larger than 4 GB.
|
|
||||||
let data = data.body.get_binary_reader();
|
|
||||||
let offset = data.original_position();
|
|
||||||
let len = data.bytes_remaining();
|
|
||||||
let start_srcloc = ir::SourceLoc::new(offset as u32);
|
|
||||||
let end_srcloc = ir::SourceLoc::new((offset + len) as u32);
|
|
||||||
|
|
||||||
FunctionAddressMap {
|
FunctionAddressMap {
|
||||||
instructions,
|
instructions: instructions.into(),
|
||||||
start_srcloc,
|
start_srcloc,
|
||||||
end_srcloc,
|
end_srcloc,
|
||||||
body_offset: 0,
|
body_offset: 0,
|
||||||
@@ -283,23 +280,54 @@ fn get_function_address_map<'data>(
|
|||||||
// into a `FunctionAddressMap`. This will automatically coalesce adjacent
|
// into a `FunctionAddressMap`. This will automatically coalesce adjacent
|
||||||
// instructions which map to the same original source position.
|
// instructions which map to the same original source position.
|
||||||
fn collect_address_maps(
|
fn collect_address_maps(
|
||||||
iter: impl IntoIterator<Item = InstructionAddressMap>,
|
code_size: u32,
|
||||||
|
iter: impl IntoIterator<Item = (ir::SourceLoc, u32, u32)>,
|
||||||
) -> Vec<InstructionAddressMap> {
|
) -> Vec<InstructionAddressMap> {
|
||||||
let mut iter = iter.into_iter();
|
let mut iter = iter.into_iter();
|
||||||
let mut cur = match iter.next() {
|
let (mut cur_loc, mut cur_offset, mut cur_len) = match iter.next() {
|
||||||
Some(i) => i,
|
Some(i) => i,
|
||||||
None => return Vec::new(),
|
None => return Vec::new(),
|
||||||
};
|
};
|
||||||
let mut ret = Vec::new();
|
let mut ret = Vec::new();
|
||||||
for item in iter {
|
for (loc, offset, len) in iter {
|
||||||
if cur.code_offset + cur.code_len == item.code_offset && item.srcloc == cur.srcloc {
|
// If this instruction is adjacent to the previous and has the same
|
||||||
cur.code_len += item.code_len;
|
// source location then we can "coalesce" it with the current
|
||||||
} else {
|
// instruction.
|
||||||
ret.push(cur);
|
if cur_offset + cur_len == offset && loc == cur_loc {
|
||||||
cur = item;
|
cur_len += len;
|
||||||
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Push an entry for the previous source item.
|
||||||
|
ret.push(InstructionAddressMap {
|
||||||
|
srcloc: cur_loc,
|
||||||
|
code_offset: cur_offset,
|
||||||
|
});
|
||||||
|
// And push a "dummy" entry if necessary to cover the span of ranges,
|
||||||
|
// if any, between the previous source offset and this one.
|
||||||
|
if cur_offset + cur_len != offset {
|
||||||
|
ret.push(InstructionAddressMap {
|
||||||
|
srcloc: ir::SourceLoc::default(),
|
||||||
|
code_offset: cur_offset + cur_len,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
// Update our current location to get extended later or pushed on at
|
||||||
|
// the end.
|
||||||
|
cur_loc = loc;
|
||||||
|
cur_offset = offset;
|
||||||
|
cur_len = len;
|
||||||
}
|
}
|
||||||
ret.push(cur);
|
ret.push(InstructionAddressMap {
|
||||||
|
srcloc: cur_loc,
|
||||||
|
code_offset: cur_offset,
|
||||||
|
});
|
||||||
|
if cur_offset + cur_len != code_size {
|
||||||
|
ret.push(InstructionAddressMap {
|
||||||
|
srcloc: ir::SourceLoc::default(),
|
||||||
|
code_offset: cur_offset + cur_len,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -406,7 +434,8 @@ impl Compiler for Cranelift {
|
|||||||
CompileError::Codegen(pretty_error(&context.func, Some(isa), error))
|
CompileError::Codegen(pretty_error(&context.func, Some(isa), error))
|
||||||
})?;
|
})?;
|
||||||
|
|
||||||
let address_transform = get_function_address_map(&context, &input, code_buf.len(), isa);
|
let address_transform =
|
||||||
|
get_function_address_map(&context, &input, code_buf.len() as u32, isa);
|
||||||
|
|
||||||
let ranges = if tunables.debug_info {
|
let ranges = if tunables.debug_info {
|
||||||
let ranges = context.build_value_labels_ranges(isa).map_err(|error| {
|
let ranges = context.build_value_labels_ranges(isa).map_err(|error| {
|
||||||
|
|||||||
@@ -102,7 +102,7 @@ fn build_function_lookup(
|
|||||||
let mut ranges_index = BTreeMap::new();
|
let mut ranges_index = BTreeMap::new();
|
||||||
let mut current_range = Vec::new();
|
let mut current_range = Vec::new();
|
||||||
let mut last_gen_inst_empty = false;
|
let mut last_gen_inst_empty = false;
|
||||||
for t in &ft.instructions {
|
for (i, t) in ft.instructions.iter().enumerate() {
|
||||||
if t.srcloc.is_default() {
|
if t.srcloc.is_default() {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
@@ -111,8 +111,11 @@ fn build_function_lookup(
|
|||||||
assert_le!(fn_start, offset);
|
assert_le!(fn_start, offset);
|
||||||
assert_le!(offset, fn_end);
|
assert_le!(offset, fn_end);
|
||||||
|
|
||||||
let inst_gen_start = t.code_offset;
|
let inst_gen_start = t.code_offset as usize;
|
||||||
let inst_gen_end = t.code_offset + t.code_len;
|
let inst_gen_end = match ft.instructions.get(i + 1) {
|
||||||
|
Some(i) => i.code_offset as usize,
|
||||||
|
None => ft.body_len as usize,
|
||||||
|
};
|
||||||
|
|
||||||
if last_wasm_pos > offset {
|
if last_wasm_pos > offset {
|
||||||
// Start new range.
|
// Start new range.
|
||||||
@@ -149,7 +152,7 @@ fn build_function_lookup(
|
|||||||
}
|
}
|
||||||
last_wasm_pos = offset;
|
last_wasm_pos = offset;
|
||||||
}
|
}
|
||||||
let last_gen_addr = ft.body_offset + ft.body_len;
|
let last_gen_addr = ft.body_offset + ft.body_len as usize;
|
||||||
ranges_index.insert(range_wasm_start, ranges.len());
|
ranges_index.insert(range_wasm_start, ranges.len());
|
||||||
ranges.push(Range {
|
ranges.push(Range {
|
||||||
wasm_start: range_wasm_start,
|
wasm_start: range_wasm_start,
|
||||||
@@ -193,13 +196,13 @@ fn build_function_addr_map(
|
|||||||
for (_, f) in funcs {
|
for (_, f) in funcs {
|
||||||
let ft = &f.address_map;
|
let ft = &f.address_map;
|
||||||
let mut fn_map = Vec::new();
|
let mut fn_map = Vec::new();
|
||||||
for t in &ft.instructions {
|
for t in ft.instructions.iter() {
|
||||||
if t.srcloc.is_default() {
|
if t.srcloc.is_default() {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
let offset = get_wasm_code_offset(t.srcloc, code_section_offset);
|
let offset = get_wasm_code_offset(t.srcloc, code_section_offset);
|
||||||
fn_map.push(AddressMap {
|
fn_map.push(AddressMap {
|
||||||
generated: t.code_offset,
|
generated: t.code_offset as usize,
|
||||||
wasm: offset,
|
wasm: offset,
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
@@ -213,7 +216,7 @@ fn build_function_addr_map(
|
|||||||
|
|
||||||
map.push(FunctionMap {
|
map.push(FunctionMap {
|
||||||
offset: ft.body_offset,
|
offset: ft.body_offset,
|
||||||
len: ft.body_len,
|
len: ft.body_len as usize,
|
||||||
wasm_start: get_wasm_code_offset(ft.start_srcloc, code_section_offset),
|
wasm_start: get_wasm_code_offset(ft.start_srcloc, code_section_offset),
|
||||||
wasm_end: get_wasm_code_offset(ft.end_srcloc, code_section_offset),
|
wasm_end: get_wasm_code_offset(ft.end_srcloc, code_section_offset),
|
||||||
addresses: fn_map.into_boxed_slice(),
|
addresses: fn_map.into_boxed_slice(),
|
||||||
@@ -605,6 +608,7 @@ mod tests {
|
|||||||
use super::{build_function_lookup, get_wasm_code_offset, AddressTransform};
|
use super::{build_function_lookup, get_wasm_code_offset, AddressTransform};
|
||||||
use gimli::write::Address;
|
use gimli::write::Address;
|
||||||
use std::iter::FromIterator;
|
use std::iter::FromIterator;
|
||||||
|
use std::mem;
|
||||||
use wasmtime_environ::entity::PrimaryMap;
|
use wasmtime_environ::entity::PrimaryMap;
|
||||||
use wasmtime_environ::ir::SourceLoc;
|
use wasmtime_environ::ir::SourceLoc;
|
||||||
use wasmtime_environ::{CompiledFunction, WasmFileInfo};
|
use wasmtime_environ::{CompiledFunction, WasmFileInfo};
|
||||||
@@ -626,14 +630,21 @@ mod tests {
|
|||||||
InstructionAddressMap {
|
InstructionAddressMap {
|
||||||
srcloc: SourceLoc::new(wasm_offset + 2),
|
srcloc: SourceLoc::new(wasm_offset + 2),
|
||||||
code_offset: 5,
|
code_offset: 5,
|
||||||
code_len: 3,
|
},
|
||||||
|
InstructionAddressMap {
|
||||||
|
srcloc: SourceLoc::default(),
|
||||||
|
code_offset: 8,
|
||||||
},
|
},
|
||||||
InstructionAddressMap {
|
InstructionAddressMap {
|
||||||
srcloc: SourceLoc::new(wasm_offset + 7),
|
srcloc: SourceLoc::new(wasm_offset + 7),
|
||||||
code_offset: 15,
|
code_offset: 15,
|
||||||
code_len: 8,
|
|
||||||
},
|
},
|
||||||
],
|
InstructionAddressMap {
|
||||||
|
srcloc: SourceLoc::default(),
|
||||||
|
code_offset: 23,
|
||||||
|
},
|
||||||
|
]
|
||||||
|
.into(),
|
||||||
start_srcloc: SourceLoc::new(wasm_offset),
|
start_srcloc: SourceLoc::new(wasm_offset),
|
||||||
end_srcloc: SourceLoc::new(wasm_offset + 10),
|
end_srcloc: SourceLoc::new(wasm_offset + 10),
|
||||||
body_offset: 0,
|
body_offset: 0,
|
||||||
@@ -678,11 +689,16 @@ mod tests {
|
|||||||
fn test_build_function_lookup_two_ranges() {
|
fn test_build_function_lookup_two_ranges() {
|
||||||
let mut input = create_simple_func(11);
|
let mut input = create_simple_func(11);
|
||||||
// append instruction with same srcloc as input.instructions[0]
|
// append instruction with same srcloc as input.instructions[0]
|
||||||
input.instructions.push(InstructionAddressMap {
|
let mut list = Vec::from(mem::take(&mut input.instructions));
|
||||||
|
list.push(InstructionAddressMap {
|
||||||
srcloc: SourceLoc::new(11 + 2),
|
srcloc: SourceLoc::new(11 + 2),
|
||||||
code_offset: 23,
|
code_offset: 23,
|
||||||
code_len: 3,
|
|
||||||
});
|
});
|
||||||
|
list.push(InstructionAddressMap {
|
||||||
|
srcloc: SourceLoc::default(),
|
||||||
|
code_offset: 26,
|
||||||
|
});
|
||||||
|
input.instructions = list.into();
|
||||||
let (start, end, lookup) = build_function_lookup(&input, 1);
|
let (start, end, lookup) = build_function_lookup(&input, 1);
|
||||||
assert_eq!(10, start);
|
assert_eq!(10, start);
|
||||||
assert_eq!(20, end);
|
assert_eq!(20, end);
|
||||||
|
|||||||
@@ -1145,14 +1145,21 @@ mod tests {
|
|||||||
InstructionAddressMap {
|
InstructionAddressMap {
|
||||||
srcloc: SourceLoc::new(code_section_offset + 12),
|
srcloc: SourceLoc::new(code_section_offset + 12),
|
||||||
code_offset: 5,
|
code_offset: 5,
|
||||||
code_len: 3,
|
},
|
||||||
|
InstructionAddressMap {
|
||||||
|
srcloc: SourceLoc::default(),
|
||||||
|
code_offset: 8,
|
||||||
},
|
},
|
||||||
InstructionAddressMap {
|
InstructionAddressMap {
|
||||||
srcloc: SourceLoc::new(code_section_offset + 17),
|
srcloc: SourceLoc::new(code_section_offset + 17),
|
||||||
code_offset: 15,
|
code_offset: 15,
|
||||||
code_len: 8,
|
|
||||||
},
|
},
|
||||||
],
|
InstructionAddressMap {
|
||||||
|
srcloc: SourceLoc::default(),
|
||||||
|
code_offset: 23,
|
||||||
|
},
|
||||||
|
]
|
||||||
|
.into(),
|
||||||
start_srcloc: SourceLoc::new(code_section_offset + 10),
|
start_srcloc: SourceLoc::new(code_section_offset + 10),
|
||||||
end_srcloc: SourceLoc::new(code_section_offset + 20),
|
end_srcloc: SourceLoc::new(code_section_offset + 20),
|
||||||
body_offset: 0,
|
body_offset: 0,
|
||||||
|
|||||||
@@ -7,22 +7,24 @@ use serde::{Deserialize, Serialize};
|
|||||||
/// Single source location to generated address mapping.
|
/// Single source location to generated address mapping.
|
||||||
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)]
|
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)]
|
||||||
pub struct InstructionAddressMap {
|
pub struct InstructionAddressMap {
|
||||||
/// Original source location.
|
/// Where in the source this instruction comes from.
|
||||||
pub srcloc: ir::SourceLoc,
|
pub srcloc: ir::SourceLoc,
|
||||||
|
|
||||||
/// Generated instructions offset.
|
/// Offset from the start of the function's compiled code to where this
|
||||||
pub code_offset: usize,
|
/// instruction is located, or the region where it starts.
|
||||||
|
pub code_offset: u32,
|
||||||
/// Generated instructions length.
|
|
||||||
pub code_len: usize,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Function and its instructions addresses mappings.
|
/// Function and its instructions addresses mappings.
|
||||||
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq, Default)]
|
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq, Default)]
|
||||||
pub struct FunctionAddressMap {
|
pub struct FunctionAddressMap {
|
||||||
/// Instructions maps.
|
/// An array of data for the instructions in this function, indicating where
|
||||||
/// The array is sorted by the InstructionAddressMap::code_offset field.
|
/// each instruction maps back to in the original function.
|
||||||
pub instructions: Vec<InstructionAddressMap>,
|
///
|
||||||
|
/// This array is sorted least-to-greatest by the `code_offset` field.
|
||||||
|
/// Additionally the span of each `InstructionAddressMap` is implicitly the
|
||||||
|
/// gap between it and the next item in the array.
|
||||||
|
pub instructions: Box<[InstructionAddressMap]>,
|
||||||
|
|
||||||
/// Function start source location (normally declaration).
|
/// Function start source location (normally declaration).
|
||||||
pub start_srcloc: ir::SourceLoc,
|
pub start_srcloc: ir::SourceLoc,
|
||||||
@@ -34,7 +36,7 @@ pub struct FunctionAddressMap {
|
|||||||
pub body_offset: usize,
|
pub body_offset: usize,
|
||||||
|
|
||||||
/// Generated function body length.
|
/// Generated function body length.
|
||||||
pub body_len: usize,
|
pub body_len: u32,
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Memory definition offset in the VMContext structure.
|
/// Memory definition offset in the VMContext structure.
|
||||||
|
|||||||
@@ -64,7 +64,7 @@ impl GlobalFrameInfo {
|
|||||||
// Use our relative position from the start of the function to find the
|
// Use our relative position from the start of the function to find the
|
||||||
// machine instruction that corresponds to `pc`, which then allows us to
|
// machine instruction that corresponds to `pc`, which then allows us to
|
||||||
// map that to a wasm original source location.
|
// map that to a wasm original source location.
|
||||||
let rel_pos = pc - func.start;
|
let rel_pos = (pc - func.start) as u32;
|
||||||
let pos = match func
|
let pos = match func
|
||||||
.instr_map
|
.instr_map
|
||||||
.instructions
|
.instructions
|
||||||
@@ -77,19 +77,8 @@ impl GlobalFrameInfo {
|
|||||||
// instructions cover `pc`.
|
// instructions cover `pc`.
|
||||||
Err(0) => None,
|
Err(0) => None,
|
||||||
|
|
||||||
// This would be at the `nth` slot, so check `n-1` to see if we're
|
// This would be at the `nth` slot, so we're at the `n-1`th slot.
|
||||||
// part of that instruction. This happens due to the minus one when
|
Err(n) => Some(n - 1),
|
||||||
// this function is called form trap symbolication, where we don't
|
|
||||||
// always get called with a `pc` that's an exact instruction
|
|
||||||
// boundary.
|
|
||||||
Err(n) => {
|
|
||||||
let instr = &func.instr_map.instructions[n - 1];
|
|
||||||
if instr.code_offset <= rel_pos && rel_pos < instr.code_offset + instr.code_len {
|
|
||||||
Some(n - 1)
|
|
||||||
} else {
|
|
||||||
None
|
|
||||||
}
|
|
||||||
}
|
|
||||||
};
|
};
|
||||||
|
|
||||||
// In debug mode for now assert that we found a mapping for `pc` within
|
// In debug mode for now assert that we found a mapping for `pc` within
|
||||||
|
|||||||
Reference in New Issue
Block a user