Move wasm data/debuginfo into the ELF compilation image (#3235)

* Move wasm data/debuginfo into the ELF compilation image

This commit moves existing allocations of `Box<[u8]>` stored separately
from compilation's final ELF image into the ELF image itself. The goal
of this commit is to reduce the amount of data which `bincode` will need
to process in the future. DWARF debugging information and wasm data
segments can be quite large, and they're relatively rarely read, so
there's typically no need to copy them around. Instead by moving them
into the ELF image this opens up the opportunity in the future to
eliminate copies and use data directly as-found in the image itself.

For information accessed possibly-multiple times, such as the wasm data
ranges, the indexes of the data within the ELF image are computed when
a `CompiledModule` is created. These indexes are then used to directly
index into the image without having to root around in the ELF file each
time they're accessed.

One other change located here is that the symbolication context
previously cloned the debug information into it to adhere to the
`'static` lifetime safely, but this isn't actually ever used in
`wasmtime` right now so the unsafety around this has been removed and
instead borrowed data is returned (no more clones, yay!).

* Fix lightbeam
This commit is contained in:
Alex Crichton
2021-08-25 09:03:07 -05:00
committed by GitHub
parent a662f5361d
commit 7d05ebe7ff
12 changed files with 273 additions and 222 deletions

2
Cargo.lock generated
View File

@@ -3713,6 +3713,7 @@ dependencies = [
"indexmap", "indexmap",
"log", "log",
"more-asserts", "more-asserts",
"object",
"serde", "serde",
"target-lexicon", "target-lexicon",
"thiserror", "thiserror",
@@ -3798,6 +3799,7 @@ dependencies = [
"cranelift-codegen", "cranelift-codegen",
"gimli", "gimli",
"lightbeam", "lightbeam",
"object",
"target-lexicon", "target-lexicon",
"wasmparser", "wasmparser",
"wasmtime-environ", "wasmtime-environ",

View File

@@ -1,6 +1,6 @@
use crate::debug::ModuleMemoryOffset; use crate::debug::ModuleMemoryOffset;
use crate::func_environ::{get_func_name, FuncEnvironment}; use crate::func_environ::{get_func_name, FuncEnvironment};
use crate::obj::{ObjectBuilder, ObjectBuilderTarget}; use crate::obj::ObjectBuilder;
use crate::{ use crate::{
blank_sig, func_signature, indirect_signature, value_type, wasmtime_call_conv, blank_sig, func_signature, indirect_signature, value_type, wasmtime_call_conv,
CompiledFunction, Relocation, RelocationTarget, CompiledFunction, Relocation, RelocationTarget,
@@ -18,6 +18,7 @@ use cranelift_wasm::{
DefinedFuncIndex, DefinedMemoryIndex, FuncIndex, FuncTranslator, MemoryIndex, SignatureIndex, DefinedFuncIndex, DefinedMemoryIndex, FuncIndex, FuncTranslator, MemoryIndex, SignatureIndex,
WasmFuncType, WasmFuncType,
}; };
use object::write::Object;
use std::any::Any; use std::any::Any;
use std::cmp; use std::cmp;
use std::collections::{BTreeMap, BTreeSet}; use std::collections::{BTreeMap, BTreeSet};
@@ -221,7 +222,8 @@ impl wasmtime_environ::Compiler for Compiler {
types: &TypeTables, types: &TypeTables,
funcs: PrimaryMap<DefinedFuncIndex, Box<dyn Any + Send>>, funcs: PrimaryMap<DefinedFuncIndex, Box<dyn Any + Send>>,
emit_dwarf: bool, emit_dwarf: bool,
) -> Result<(Vec<u8>, PrimaryMap<DefinedFuncIndex, FunctionInfo>)> { obj: &mut Object,
) -> Result<PrimaryMap<DefinedFuncIndex, FunctionInfo>> {
const CODE_SECTION_ALIGNMENT: u64 = 0x1000; const CODE_SECTION_ALIGNMENT: u64 = 0x1000;
let funcs: crate::CompiledFunctions = funcs let funcs: crate::CompiledFunctions = funcs
.into_iter() .into_iter()
@@ -244,8 +246,7 @@ impl wasmtime_environ::Compiler for Compiler {
trampolines.push((i, func)); trampolines.push((i, func));
} }
let target = ObjectBuilderTarget::elf(self.isa.triple().architecture)?; let mut builder = ObjectBuilder::new(obj, &translation.module);
let mut builder = ObjectBuilder::new(target, &translation.module);
for (i, func) in funcs.iter() { for (i, func) in funcs.iter() {
builder.func(i, func); builder.func(i, func);
@@ -285,21 +286,24 @@ impl wasmtime_environ::Compiler for Compiler {
builder.dwarf_sections(&dwarf_sections)?; builder.dwarf_sections(&dwarf_sections)?;
} }
Ok(( builder.finish(&*self.isa)?;
builder.finish(&*self.isa)?, Ok(funcs.into_iter().map(|(_, f)| f.info).collect())
funcs.into_iter().map(|(_, f)| f.info).collect(),
))
} }
fn emit_trampoline_obj(&self, ty: &WasmFuncType, host_fn: usize) -> Result<Vec<u8>> { fn emit_trampoline_obj(
&self,
ty: &WasmFuncType,
host_fn: usize,
obj: &mut Object,
) -> Result<()> {
let host_to_wasm = self.host_to_wasm_trampoline(ty)?; let host_to_wasm = self.host_to_wasm_trampoline(ty)?;
let wasm_to_host = self.wasm_to_host_trampoline(ty, host_fn)?; let wasm_to_host = self.wasm_to_host_trampoline(ty, host_fn)?;
let target = ObjectBuilderTarget::elf(self.isa.triple().architecture)?;
let module = Module::new(); let module = Module::new();
let mut builder = ObjectBuilder::new(target, &module); let mut builder = ObjectBuilder::new(obj, &module);
builder.trampoline(SignatureIndex::new(0), &host_to_wasm); builder.trampoline(SignatureIndex::new(0), &host_to_wasm);
builder.trampoline(SignatureIndex::new(1), &wasm_to_host); builder.trampoline(SignatureIndex::new(1), &wasm_to_host);
Ok(builder.finish(&*self.isa)?) builder.finish(&*self.isa)?;
Ok(())
} }
fn triple(&self) -> &target_lexicon::Triple { fn triple(&self) -> &target_lexicon::Triple {

View File

@@ -29,8 +29,8 @@ use object::write::{
SymbolSection, SymbolSection,
}; };
use object::{ use object::{
elf, Architecture, BinaryFormat, Endianness, RelocationEncoding, RelocationKind, SectionKind, elf, Architecture, RelocationEncoding, RelocationKind, SectionKind, SymbolFlags, SymbolKind,
SymbolFlags, SymbolKind, SymbolScope, SymbolScope,
}; };
use std::collections::HashMap; use std::collections::HashMap;
use std::convert::TryFrom; use std::convert::TryFrom;
@@ -39,22 +39,6 @@ use wasmtime_environ::{
DefinedFuncIndex, EntityRef, FuncIndex, Module, PrimaryMap, SignatureIndex, DefinedFuncIndex, EntityRef, FuncIndex, Module, PrimaryMap, SignatureIndex,
}; };
fn to_object_architecture(
arch: target_lexicon::Architecture,
) -> Result<Architecture, anyhow::Error> {
use target_lexicon::Architecture::*;
Ok(match arch {
X86_32(_) => Architecture::I386,
X86_64 => Architecture::X86_64,
Arm(_) => Architecture::Arm,
Aarch64(_) => Architecture::Aarch64,
S390x => Architecture::S390x,
architecture => {
anyhow::bail!("target architecture {:?} is unsupported", architecture,);
}
})
}
const TEXT_SECTION_NAME: &[u8] = b".text"; const TEXT_SECTION_NAME: &[u8] = b".text";
/// Iterates through all `LibCall` members and all runtime exported functions. /// Iterates through all `LibCall` members and all runtime exported functions.
@@ -106,27 +90,8 @@ fn write_libcall_symbols(obj: &mut Object) -> HashMap<LibCall, SymbolId> {
libcalls libcalls
} }
pub struct ObjectBuilderTarget {
pub(crate) binary_format: BinaryFormat,
pub(crate) architecture: Architecture,
pub(crate) endianness: Endianness,
}
impl ObjectBuilderTarget {
pub fn elf(arch: target_lexicon::Architecture) -> Result<Self> {
Ok(Self {
binary_format: BinaryFormat::Elf,
architecture: to_object_architecture(arch)?,
endianness: match arch.endianness().unwrap() {
target_lexicon::Endianness::Little => object::Endianness::Little,
target_lexicon::Endianness::Big => object::Endianness::Big,
},
})
}
}
pub struct ObjectBuilder<'a> { pub struct ObjectBuilder<'a> {
obj: Object, obj: &'a mut Object,
module: &'a Module, module: &'a Module,
text_section: SectionId, text_section: SectionId,
func_symbols: PrimaryMap<FuncIndex, SymbolId>, func_symbols: PrimaryMap<FuncIndex, SymbolId>,
@@ -150,9 +115,7 @@ struct RUNTIME_FUNCTION {
} }
impl<'a> ObjectBuilder<'a> { impl<'a> ObjectBuilder<'a> {
pub fn new(target: ObjectBuilderTarget, module: &'a Module) -> Self { pub fn new(obj: &'a mut Object, module: &'a Module) -> Self {
let mut obj = Object::new(target.binary_format, target.architecture, target.endianness);
// Entire code (functions and trampolines) will be placed // Entire code (functions and trampolines) will be placed
// in the ".text" section. // in the ".text" section.
let text_section = obj.add_section( let text_section = obj.add_section(
@@ -179,7 +142,7 @@ impl<'a> ObjectBuilder<'a> {
func_symbols.push(symbol_id); func_symbols.push(symbol_id);
} }
let libcalls = write_libcall_symbols(&mut obj); let libcalls = write_libcall_symbols(obj);
Self { Self {
obj, obj,
@@ -309,7 +272,7 @@ impl<'a> ObjectBuilder<'a> {
Ok(()) Ok(())
} }
pub fn finish(&mut self, isa: &dyn TargetIsa) -> Result<Vec<u8>> { pub fn finish(&mut self, isa: &dyn TargetIsa) -> Result<()> {
self.append_relocations()?; self.append_relocations()?;
if self.windows_unwind_info.len() > 0 { if self.windows_unwind_info.len() > 0 {
self.append_windows_unwind_info(); self.append_windows_unwind_info();
@@ -317,7 +280,7 @@ impl<'a> ObjectBuilder<'a> {
if self.systemv_unwind_info.len() > 0 { if self.systemv_unwind_info.len() > 0 {
self.append_systemv_unwind_info(isa); self.append_systemv_unwind_info(isa);
} }
Ok(self.obj.write()?) Ok(())
} }
fn append_relocations(&mut self) -> Result<()> { fn append_relocations(&mut self) -> Result<()> {

View File

@@ -22,6 +22,7 @@ log = { version = "0.4.8", default-features = false }
more-asserts = "0.2.1" more-asserts = "0.2.1"
cfg-if = "1.0" cfg-if = "1.0"
gimli = { version = "0.25.0", default-features = false, features = ['read'] } gimli = { version = "0.25.0", default-features = false, features = ['read'] }
object = { version = "0.26.0", default-features = false, features = ['write_core', 'elf'] }
target-lexicon = "0.12" target-lexicon = "0.12"
[badges] [badges]

View File

@@ -6,6 +6,8 @@ use crate::{
StackMap, Tunables, TypeTables, WasmError, WasmFuncType, StackMap, Tunables, TypeTables, WasmError, WasmFuncType,
}; };
use anyhow::Result; use anyhow::Result;
use object::write::Object;
use object::{Architecture, BinaryFormat};
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use std::any::Any; use std::any::Any;
use std::borrow::Cow; use std::borrow::Cow;
@@ -184,21 +186,70 @@ pub trait Compiler: Send + Sync {
types: &TypeTables, types: &TypeTables,
) -> Result<Box<dyn Any + Send>, CompileError>; ) -> Result<Box<dyn Any + Send>, CompileError>;
/// Collects the results of compilation and emits an in-memory ELF object /// Collects the results of compilation into an in-memory object.
/// which is the serialized representation of all compiler artifacts.
/// ///
/// Note that ELF is used regardless of the target architecture. /// This function will receive the same `Box<dyn Ayn>` produced as part of
/// `compile_function`, as well as the general compilation environment with
/// the translation/types. This method is expected to populate information
/// in the object file such as:
///
/// * Compiled code in a `.text` section
/// * Unwind information in Wasmtime-specific sections
/// * DWARF debugging information for the host, if `emit_dwarf` is `true`
/// and the compiler supports it.
/// * Relocations, if necessary, for the text section
///
/// The final result of compilation will contain more sections inserted by
/// the compiler-agnostic runtime.
fn emit_obj( fn emit_obj(
&self, &self,
module: &ModuleTranslation, module: &ModuleTranslation,
types: &TypeTables, types: &TypeTables,
funcs: PrimaryMap<DefinedFuncIndex, Box<dyn Any + Send>>, funcs: PrimaryMap<DefinedFuncIndex, Box<dyn Any + Send>>,
emit_dwarf: bool, emit_dwarf: bool,
) -> Result<(Vec<u8>, PrimaryMap<DefinedFuncIndex, FunctionInfo>)>; obj: &mut Object,
) -> Result<PrimaryMap<DefinedFuncIndex, FunctionInfo>>;
/// Emits a small ELF object file in-memory which has two functions for the /// Inserts two functions for host-to-wasm and wasm-to-host trampolines into
/// host-to-wasm and wasm-to-host trampolines for the wasm type given. /// the `obj` provided.
fn emit_trampoline_obj(&self, ty: &WasmFuncType, host_fn: usize) -> Result<Vec<u8>>; ///
/// This will configure the same sections as `emit_obj`, but will likely be
/// much smaller.
fn emit_trampoline_obj(
&self,
ty: &WasmFuncType,
host_fn: usize,
obj: &mut Object,
) -> Result<()>;
/// Creates a new `Object` file which is used to build the results of a
/// compilation into.
///
/// The returned object file will have an appropriate
/// architecture/endianness for `self.triple()`, but at this time it is
/// always an ELF file, regardless of target platform.
fn object(&self) -> Result<Object> {
use target_lexicon::Architecture::*;
let triple = self.triple();
Ok(Object::new(
BinaryFormat::Elf,
match triple.architecture {
X86_32(_) => Architecture::I386,
X86_64 => Architecture::X86_64,
Arm(_) => Architecture::Arm,
Aarch64(_) => Architecture::Aarch64,
S390x => Architecture::S390x,
architecture => {
anyhow::bail!("target architecture {:?} is unsupported", architecture,);
}
},
match triple.endianness().unwrap() {
target_lexicon::Endianness::Little => object::Endianness::Little,
target_lexicon::Endianness::Big => object::Endianness::Big,
},
))
}
/// Returns the target triple that this compiler is compiling for. /// Returns the target triple that this compiler is compiling for.
fn triple(&self) -> &target_lexicon::Triple; fn triple(&self) -> &target_lexicon::Triple;

View File

@@ -38,14 +38,13 @@ impl Drop for CodeMemoryEntry {
} }
} }
pub struct CodeMemoryObjectAllocation<'a, 'b> { pub struct CodeMemoryObjectAllocation<'a> {
pub code_range: &'a mut [u8], pub code_range: &'a mut [u8],
funcs: BTreeMap<FuncIndex, (usize, usize)>, funcs: BTreeMap<FuncIndex, (usize, usize)>,
trampolines: BTreeMap<SignatureIndex, (usize, usize)>, trampolines: BTreeMap<SignatureIndex, (usize, usize)>,
pub obj: ObjectFile<'b>,
} }
impl<'a> CodeMemoryObjectAllocation<'a, '_> { impl<'a> CodeMemoryObjectAllocation<'a> {
pub fn funcs_len(&self) -> usize { pub fn funcs_len(&self) -> usize {
self.funcs.len() self.funcs.len()
} }
@@ -140,14 +139,22 @@ impl CodeMemory {
unsafe { &mut *body_ptr } unsafe { &mut *body_ptr }
} }
/// Alternative to `allocate_for_object`, but when the object file isn't
/// already parsed.
pub fn allocate_for_object_unparsed<'a>(
&'a mut self,
obj: &[u8],
) -> Result<CodeMemoryObjectAllocation<'a>> {
let obj = ObjectFile::parse(obj)?;
self.allocate_for_object(&obj)
}
/// Allocates and copies the ELF image code section into CodeMemory. /// Allocates and copies the ELF image code section into CodeMemory.
/// Returns references to functions and trampolines defined there. /// Returns references to functions and trampolines defined there.
pub fn allocate_for_object<'a, 'b>( pub fn allocate_for_object<'a>(
&'a mut self, &'a mut self,
obj: &'b [u8], obj: &ObjectFile,
) -> Result<CodeMemoryObjectAllocation<'a, 'b>> { ) -> Result<CodeMemoryObjectAllocation<'a>> {
let obj = ObjectFile::parse(obj)
.with_context(|| "failed to parse internal ELF compilation artifact")?;
let text_section = obj.section_by_name(".text").unwrap(); let text_section = obj.section_by_name(".text").unwrap();
let text_section_size = text_section.size() as usize; let text_section_size = text_section.size() as usize;
@@ -157,7 +164,6 @@ impl CodeMemory {
code_range: &mut [], code_range: &mut [],
funcs: BTreeMap::new(), funcs: BTreeMap::new(),
trampolines: BTreeMap::new(), trampolines: BTreeMap::new(),
obj,
}); });
} }
@@ -212,7 +218,6 @@ impl CodeMemory {
code_range: &mut entry.mmap.as_mut_slice()[..text_section_size], code_range: &mut entry.mmap.as_mut_slice()[..text_section_size],
funcs, funcs,
trampolines, trampolines,
obj,
}) })
} }
} }

View File

@@ -6,15 +6,18 @@
use crate::code_memory::CodeMemory; use crate::code_memory::CodeMemory;
use crate::debug::create_gdbjit_image; use crate::debug::create_gdbjit_image;
use crate::link::link_module; use crate::link::link_module;
use anyhow::Result; use anyhow::{anyhow, Context, Result};
use object::read::File;
use object::write::{Object, StandardSegment};
use object::{Object as _, ObjectSection, SectionKind};
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use std::ops::Range; use std::ops::Range;
use std::sync::Arc; use std::sync::Arc;
use thiserror::Error; use thiserror::Error;
use wasmtime_environ::{ use wasmtime_environ::{
CompileError, DebugInfoData, DefinedFuncIndex, FunctionInfo, InstanceSignature, CompileError, DefinedFuncIndex, FunctionInfo, InstanceSignature, InstanceTypeIndex, Module,
InstanceTypeIndex, Module, ModuleSignature, ModuleTranslation, ModuleTypeIndex, PrimaryMap, ModuleSignature, ModuleTranslation, ModuleTypeIndex, PrimaryMap, SignatureIndex,
SignatureIndex, StackMapInformation, Tunables, WasmFuncType, StackMapInformation, Tunables, WasmFuncType,
}; };
use wasmtime_profiling::ProfilingAgent; use wasmtime_profiling::ProfilingAgent;
use wasmtime_runtime::{GdbJitImageRegistration, InstantiationError, VMFunctionBody, VMTrampoline}; use wasmtime_runtime::{GdbJitImageRegistration, InstantiationError, VMFunctionBody, VMTrampoline};
@@ -51,9 +54,6 @@ pub struct CompilationArtifacts {
/// ELF image with functions code. /// ELF image with functions code.
obj: Box<[u8]>, obj: Box<[u8]>,
/// All data segments referenced by this module, both active and passive.
wasm_data: Box<[u8]>,
/// Descriptions of compiled functions /// Descriptions of compiled functions
funcs: PrimaryMap<DefinedFuncIndex, FunctionInfo>, funcs: PrimaryMap<DefinedFuncIndex, FunctionInfo>,
@@ -64,25 +64,10 @@ pub struct CompilationArtifacts {
/// we skipped and did not parse. /// we skipped and did not parse.
has_unparsed_debuginfo: bool, has_unparsed_debuginfo: bool,
/// Debug information found in the wasm file, used for symbolicating /// Offset in the original wasm file to the code section.
/// backtraces.
debug_info: Option<DebugInfo>,
}
#[derive(Serialize, Deserialize)]
struct DebugInfo {
data: Box<[u8]>,
code_section_offset: u64, code_section_offset: u64,
debug_abbrev: Range<usize>,
debug_addr: Range<usize>, has_wasm_debuginfo: bool,
debug_aranges: Range<usize>,
debug_info: Range<usize>,
debug_line: Range<usize>,
debug_line_str: Range<usize>,
debug_ranges: Range<usize>,
debug_rnglists: Range<usize>,
debug_str: Range<usize>,
debug_str_offsets: Range<usize>,
} }
impl CompilationArtifacts { impl CompilationArtifacts {
@@ -90,10 +75,10 @@ impl CompilationArtifacts {
/// compilation. /// compilation.
pub fn new( pub fn new(
translation: ModuleTranslation<'_>, translation: ModuleTranslation<'_>,
obj: Vec<u8>, mut obj: Object,
funcs: PrimaryMap<DefinedFuncIndex, FunctionInfo>, funcs: PrimaryMap<DefinedFuncIndex, FunctionInfo>,
tunables: &Tunables, tunables: &Tunables,
) -> CompilationArtifacts { ) -> Result<CompilationArtifacts> {
let ModuleTranslation { let ModuleTranslation {
mut module, mut module,
debuginfo, debuginfo,
@@ -103,45 +88,67 @@ impl CompilationArtifacts {
.. ..
} = translation; } = translation;
// Concatenate all the wasm data together, placing both active and // Place all data from the wasm module into a section which will the
// passive data into the same chunk of data. Note that this // source of the data later at runtime.
// implementation doesn't allow for unmapping or somehow releasing let segment = obj.segment_name(StandardSegment::Data).to_vec();
// passive data on `data.drop`, and if we want to do that in the future let section_id = obj.add_section(segment, b".wasmdata".to_vec(), SectionKind::ReadOnlyData);
// we'll have to change this to store passive data segments separately let mut total_data_len = 0;
// from the main data segments.
//
// Also note that here we have to update all passive data segments and
// their relative indices.
let wasm_data_size = data
.iter()
.map(|s| s.len())
.chain(passive_data.iter().map(|s| s.len()))
.sum();
let mut wasm_data = Vec::with_capacity(wasm_data_size);
for data in data.iter() { for data in data.iter() {
wasm_data.extend_from_slice(data); obj.append_section_data(section_id, data, 1);
total_data_len += data.len();
} }
let total_data_len = wasm_data.len();
for data in passive_data.iter() { for data in passive_data.iter() {
wasm_data.extend_from_slice(data); obj.append_section_data(section_id, data, 1);
} }
// Update passive data offsets since they're all located after the other
// data in the module.
for (_, range) in module.passive_data_map.iter_mut() { for (_, range) in module.passive_data_map.iter_mut() {
range.start = range.start.checked_add(total_data_len as u32).unwrap(); range.start = range.start.checked_add(total_data_len as u32).unwrap();
range.end = range.end.checked_add(total_data_len as u32).unwrap(); range.end = range.end.checked_add(total_data_len as u32).unwrap();
} }
CompilationArtifacts { // Insert the wasm raw wasm-based debuginfo into the output, if
// requested. Note that this is distinct from the native debuginfo
// possibly generated by the native compiler, hence these sections
// getting wasm-specific names.
if tunables.parse_wasm_debuginfo {
push_debug(&mut obj, &debuginfo.dwarf.debug_abbrev);
push_debug(&mut obj, &debuginfo.dwarf.debug_addr);
push_debug(&mut obj, &debuginfo.dwarf.debug_aranges);
push_debug(&mut obj, &debuginfo.dwarf.debug_info);
push_debug(&mut obj, &debuginfo.dwarf.debug_line);
push_debug(&mut obj, &debuginfo.dwarf.debug_line_str);
push_debug(&mut obj, &debuginfo.dwarf.debug_str);
push_debug(&mut obj, &debuginfo.dwarf.debug_str_offsets);
push_debug(&mut obj, &debuginfo.debug_ranges);
push_debug(&mut obj, &debuginfo.debug_rnglists);
}
return Ok(CompilationArtifacts {
module: Arc::new(module), module: Arc::new(module),
obj: obj.into_boxed_slice(), obj: obj.write()?.into(),
wasm_data: wasm_data.into(),
funcs, funcs,
native_debug_info_present: tunables.generate_native_debuginfo, native_debug_info_present: tunables.generate_native_debuginfo,
debug_info: if tunables.parse_wasm_debuginfo {
Some(debuginfo.into())
} else {
None
},
has_unparsed_debuginfo, has_unparsed_debuginfo,
code_section_offset: debuginfo.wasm_file.code_section_offset,
has_wasm_debuginfo: tunables.parse_wasm_debuginfo,
});
fn push_debug<'a, T>(obj: &mut Object, section: &T)
where
T: gimli::Section<gimli::EndianSlice<'a, gimli::LittleEndian>>,
{
if section.reader().slice().is_empty() {
return;
}
let segment = obj.segment_name(StandardSegment::Debug).to_vec();
let section_id = obj.add_section(
segment,
wasm_section_name(T::id()).as_bytes().to_vec(),
SectionKind::Debug,
);
obj.append_section_data(section_id, section.reader().slice(), 1);
} }
} }
} }
@@ -178,6 +185,7 @@ impl ModuleCode {
/// A compiled wasm module, ready to be instantiated. /// A compiled wasm module, ready to be instantiated.
pub struct CompiledModule { pub struct CompiledModule {
wasm_data: Range<usize>,
artifacts: CompilationArtifacts, artifacts: CompilationArtifacts,
code: Arc<ModuleCode>, code: Arc<ModuleCode>,
finished_functions: FinishedFunctions, finished_functions: FinishedFunctions,
@@ -189,11 +197,14 @@ impl CompiledModule {
pub fn from_artifacts( pub fn from_artifacts(
artifacts: CompilationArtifacts, artifacts: CompilationArtifacts,
profiler: &dyn ProfilingAgent, profiler: &dyn ProfilingAgent,
) -> Result<Arc<Self>, SetupError> { ) -> Result<Arc<Self>> {
let obj = File::parse(&artifacts.obj[..])
.with_context(|| "failed to parse internal ELF compilation artifact")?;
// Allocate all of the compiled functions into executable memory, // Allocate all of the compiled functions into executable memory,
// copying over their contents. // copying over their contents.
let (code_memory, code_range, finished_functions, trampolines) = let (code_memory, code_range, finished_functions, trampolines) =
build_code_memory(&artifacts.obj, &artifacts.module).map_err(|message| { build_code_memory(&obj, &artifacts.module).map_err(|message| {
SetupError::Instantiate(InstantiationError::Resource(anyhow::anyhow!( SetupError::Instantiate(InstantiationError::Resource(anyhow::anyhow!(
"failed to build code memory for functions: {}", "failed to build code memory for functions: {}",
message message
@@ -220,8 +231,14 @@ impl CompiledModule {
let start = code_range.0 as usize; let start = code_range.0 as usize;
let end = start + code_range.1; let end = start + code_range.1;
let data = obj
.section_by_name(".wasmdata")
.ok_or_else(|| anyhow!("failed to find internal data section for wasm module"))?;
let wasm_data = subslice_range(data.data()?, &artifacts.obj);
Ok(Arc::new(Self { Ok(Arc::new(Self {
artifacts, artifacts,
wasm_data,
code: Arc::new(ModuleCode { code: Arc::new(ModuleCode {
range: (start, end), range: (start, end),
code_memory, code_memory,
@@ -243,7 +260,7 @@ impl CompiledModule {
/// This is used for initialization of memories and all data ranges stored /// This is used for initialization of memories and all data ranges stored
/// in a `Module` are relative to the slice returned here. /// in a `Module` are relative to the slice returned here.
pub fn wasm_data(&self) -> &[u8] { pub fn wasm_data(&self) -> &[u8] {
&self.artifacts.wasm_data &self.artifacts.obj[self.wasm_data.clone()]
} }
/// Return a reference-counting pointer to a module. /// Return a reference-counting pointer to a module.
@@ -338,38 +355,25 @@ impl CompiledModule {
/// ///
/// Basically this makes a thing which parses debuginfo and can tell you /// Basically this makes a thing which parses debuginfo and can tell you
/// what filename and line number a wasm pc comes from. /// what filename and line number a wasm pc comes from.
pub fn symbolize_context(&self) -> Result<Option<SymbolizeContext>, gimli::Error> { pub fn symbolize_context(&self) -> Result<Option<SymbolizeContext<'_>>> {
use gimli::EndianSlice; use gimli::EndianSlice;
let info = match &self.artifacts.debug_info { if !self.artifacts.has_wasm_debuginfo {
Some(info) => info, return Ok(None);
None => return Ok(None), }
}; let obj = File::parse(&self.artifacts.obj[..])
// For now we clone the data into the `SymbolizeContext`, but if this .context("failed to parse internal ELF file representation")?;
// becomes prohibitive we could always `Arc` it with our own allocation let dwarf = gimli::Dwarf::load(|id| -> Result<_> {
// here. let data = obj
let data = info.data.clone(); .section_by_name(wasm_section_name(id))
let endian = gimli::LittleEndian; .and_then(|s| s.data().ok())
let cx = addr2line::Context::from_sections( .unwrap_or(&[]);
EndianSlice::new(&data[info.debug_abbrev.clone()], endian).into(), Ok(EndianSlice::new(data, gimli::LittleEndian))
EndianSlice::new(&data[info.debug_addr.clone()], endian).into(), })?;
EndianSlice::new(&data[info.debug_aranges.clone()], endian).into(), let cx = addr2line::Context::from_dwarf(dwarf)
EndianSlice::new(&data[info.debug_info.clone()], endian).into(), .context("failed to create addr2line dwarf mapping context")?;
EndianSlice::new(&data[info.debug_line.clone()], endian).into(),
EndianSlice::new(&data[info.debug_line_str.clone()], endian).into(),
EndianSlice::new(&data[info.debug_ranges.clone()], endian).into(),
EndianSlice::new(&data[info.debug_rnglists.clone()], endian).into(),
EndianSlice::new(&data[info.debug_str.clone()], endian).into(),
EndianSlice::new(&data[info.debug_str_offsets.clone()], endian).into(),
EndianSlice::new(&[], endian),
)?;
Ok(Some(SymbolizeContext { Ok(Some(SymbolizeContext {
// See comments on `SymbolizeContext` for why we do this static inner: cx,
// lifetime promotion. code_section_offset: self.artifacts.code_section_offset,
inner: unsafe {
std::mem::transmute::<Addr2LineContext<'_>, Addr2LineContext<'static>>(cx)
},
code_section_offset: info.code_section_offset,
_data: data,
})) }))
} }
@@ -384,27 +388,16 @@ type Addr2LineContext<'a> = addr2line::Context<gimli::EndianSlice<'a, gimli::Lit
/// A context which contains dwarf debug information to translate program /// A context which contains dwarf debug information to translate program
/// counters back to filenames and line numbers. /// counters back to filenames and line numbers.
pub struct SymbolizeContext { pub struct SymbolizeContext<'a> {
// Note the `'static` lifetime on `inner`. That's actually a bunch of slices inner: Addr2LineContext<'a>,
// which point back into the `_data` field. We currently unsafely manage
// this by saying that when inside the struct it's `'static` (since we own
// the referenced data just next to it) and we only loan out borrowed
// references.
_data: Box<[u8]>,
inner: Addr2LineContext<'static>,
code_section_offset: u64, code_section_offset: u64,
} }
impl SymbolizeContext { impl<'a> SymbolizeContext<'a> {
/// Returns access to the [`addr2line::Context`] which can be used to query /// Returns access to the [`addr2line::Context`] which can be used to query
/// frame information with. /// frame information with.
pub fn addr2line(&self) -> &Addr2LineContext<'_> { pub fn addr2line(&self) -> &Addr2LineContext<'a> {
// Here we demote our synthetic `'static` lifetime which doesn't &self.inner
// actually exist back to a lifetime that's tied to `&self`, which
// should be safe.
unsafe {
std::mem::transmute::<&Addr2LineContext<'static>, &Addr2LineContext<'_>>(&self.inner)
}
} }
/// Returns the offset of the code section in the original wasm file, used /// Returns the offset of the code section in the original wasm file, used
@@ -429,7 +422,7 @@ fn create_dbg_image(
} }
fn build_code_memory( fn build_code_memory(
obj: &[u8], obj: &File,
module: &Module, module: &Module,
) -> Result<( ) -> Result<(
CodeMemory, CodeMemory,
@@ -469,7 +462,7 @@ fn build_code_memory(
trampolines.push((i, fnptr)); trampolines.push((i, fnptr));
} }
link_module(&allocation.obj, allocation.code_range); link_module(obj, allocation.code_range);
let code_range = (allocation.code_range.as_ptr(), allocation.code_range.len()); let code_range = (allocation.code_range.as_ptr(), allocation.code_range.len());
@@ -479,42 +472,6 @@ fn build_code_memory(
Ok((code_memory, code_range, finished_functions, trampolines)) Ok((code_memory, code_range, finished_functions, trampolines))
} }
impl From<DebugInfoData<'_>> for DebugInfo {
fn from(raw: DebugInfoData<'_>) -> DebugInfo {
use gimli::Section;
let mut data = Vec::new();
let mut push = |section: &[u8]| {
data.extend_from_slice(section);
data.len() - section.len()..data.len()
};
let debug_abbrev = push(raw.dwarf.debug_abbrev.reader().slice());
let debug_addr = push(raw.dwarf.debug_addr.reader().slice());
let debug_aranges = push(raw.dwarf.debug_aranges.reader().slice());
let debug_info = push(raw.dwarf.debug_info.reader().slice());
let debug_line = push(raw.dwarf.debug_line.reader().slice());
let debug_line_str = push(raw.dwarf.debug_line_str.reader().slice());
let debug_ranges = push(raw.debug_ranges.reader().slice());
let debug_rnglists = push(raw.debug_rnglists.reader().slice());
let debug_str = push(raw.dwarf.debug_str.reader().slice());
let debug_str_offsets = push(raw.dwarf.debug_str_offsets.reader().slice());
DebugInfo {
data: data.into(),
debug_abbrev,
debug_addr,
debug_aranges,
debug_info,
debug_line,
debug_line_str,
debug_ranges,
debug_rnglists,
debug_str,
debug_str_offsets,
code_section_offset: raw.wasm_file.code_section_offset,
}
}
}
mod arc_serde { mod arc_serde {
use super::Arc; use super::Arc;
use serde::{de::Deserialize, ser::Serialize, Deserializer, Serializer}; use serde::{de::Deserialize, ser::Serialize, Deserializer, Serializer};
@@ -535,3 +492,52 @@ mod arc_serde {
Ok(Arc::new(T::deserialize(de)?)) Ok(Arc::new(T::deserialize(de)?))
} }
} }
/// Returns the range of `inner` within `outer`, such that `outer[range]` is the
/// same as `inner`.
///
/// This method requires that `inner` is a sub-slice of `outer`, and if that
/// isn't true then this method will panic.
fn subslice_range(inner: &[u8], outer: &[u8]) -> Range<usize> {
if inner.len() == 0 {
return 0..0;
}
assert!(outer.as_ptr() <= inner.as_ptr());
assert!((&inner[inner.len() - 1] as *const _) <= (&outer[outer.len() - 1] as *const _));
let start = inner.as_ptr() as usize - outer.as_ptr() as usize;
start..start + inner.len()
}
/// Returns the Wasmtime-specific section name for dwarf debugging sections.
///
/// These sections, if configured in Wasmtime, will contain the original raw
/// dwarf debugging information found in the wasm file, unmodified. These tables
/// are then consulted later to convert wasm program counters to original wasm
/// source filenames/line numbers with `addr2line`.
fn wasm_section_name(id: gimli::SectionId) -> &'static str {
use gimli::SectionId::*;
match id {
DebugAbbrev => ".debug_abbrev.wasm",
DebugAddr => ".debug_addr.wasm",
DebugAranges => ".debug_aranges.wasm",
DebugFrame => ".debug_frame.wasm",
EhFrame => ".eh_frame.wasm",
EhFrameHdr => ".eh_frame_hdr.wasm",
DebugInfo => ".debug_info.wasm",
DebugLine => ".debug_line.wasm",
DebugLineStr => ".debug_line_str.wasm",
DebugLoc => ".debug_loc.wasm",
DebugLocLists => ".debug_loc_lists.wasm",
DebugMacinfo => ".debug_macinfo.wasm",
DebugMacro => ".debug_macro.wasm",
DebugPubNames => ".debug_pub_names.wasm",
DebugPubTypes => ".debug_pub_types.wasm",
DebugRanges => ".debug_ranges.wasm",
DebugRngLists => ".debug_rng_lists.wasm",
DebugStr => ".debug_str.wasm",
DebugStrOffsets => ".debug_str_offsets.wasm",
DebugTypes => ".debug_types.wasm",
}
}

View File

@@ -19,3 +19,4 @@ lightbeam = { path = "..", version = "0.29.0" }
wasmparser = "0.80" wasmparser = "0.80"
cranelift-codegen = { path = "../../../cranelift/codegen", version = "0.76.0" } cranelift-codegen = { path = "../../../cranelift/codegen", version = "0.76.0" }
wasmtime-environ = { path = "../../environ", version = "0.29.0" } wasmtime-environ = { path = "../../environ", version = "0.29.0" }
object = { version = "0.26.0", default-features = false }

View File

@@ -8,6 +8,7 @@
use anyhow::Result; use anyhow::Result;
use cranelift_codegen::binemit; use cranelift_codegen::binemit;
use cranelift_codegen::ir::{self, ExternalName}; use cranelift_codegen::ir::{self, ExternalName};
use object::write::Object;
use std::any::Any; use std::any::Any;
use std::collections::BTreeMap; use std::collections::BTreeMap;
use wasmtime_environ::{ use wasmtime_environ::{
@@ -84,11 +85,17 @@ impl Compiler for Lightbeam {
_types: &TypeTables, _types: &TypeTables,
_funcs: PrimaryMap<DefinedFuncIndex, Box<dyn Any + Send>>, _funcs: PrimaryMap<DefinedFuncIndex, Box<dyn Any + Send>>,
_emit_dwarf: bool, _emit_dwarf: bool,
) -> Result<(Vec<u8>, PrimaryMap<DefinedFuncIndex, FunctionInfo>)> { _obj: &mut Object,
) -> Result<PrimaryMap<DefinedFuncIndex, FunctionInfo>> {
unimplemented!() unimplemented!()
} }
fn emit_trampoline_obj(&self, _ty: &WasmFuncType, _host_fn: usize) -> Result<Vec<u8>> { fn emit_trampoline_obj(
&self,
_ty: &WasmFuncType,
_host_fn: usize,
_obj: &mut Object,
) -> Result<()> {
unimplemented!() unimplemented!()
} }

View File

@@ -369,11 +369,13 @@ impl Module {
.into_iter() .into_iter()
.collect(); .collect();
let (obj, funcs) = engine.compiler().emit_obj( let mut obj = engine.compiler().object()?;
let funcs = engine.compiler().emit_obj(
&translation, &translation,
&types, &types,
funcs, funcs,
tunables.generate_native_debuginfo, tunables.generate_native_debuginfo,
&mut obj,
)?; )?;
// If configured, attempt to use paged memory initialization // If configured, attempt to use paged memory initialization
@@ -382,7 +384,12 @@ impl Module {
translation.try_paged_init(); translation.try_paged_init();
} }
Ok(CompilationArtifacts::new(translation, obj, funcs, tunables)) Ok(CompilationArtifacts::new(
translation,
obj,
funcs,
tunables,
)?)
})?; })?;
Ok(( Ok((

View File

@@ -76,12 +76,14 @@ pub fn create_function(
func: Box<dyn Fn(*mut VMContext, *mut u128) -> Result<(), Trap> + Send + Sync>, func: Box<dyn Fn(*mut VMContext, *mut u128) -> Result<(), Trap> + Send + Sync>,
engine: &Engine, engine: &Engine,
) -> Result<(InstanceHandle, VMTrampoline)> { ) -> Result<(InstanceHandle, VMTrampoline)> {
let obj = engine let mut obj = engine.compiler().object()?;
engine
.compiler() .compiler()
.emit_trampoline_obj(ft.as_wasm_func_type(), stub_fn as usize)?; .emit_trampoline_obj(ft.as_wasm_func_type(), stub_fn as usize, &mut obj)?;
let obj = obj.write()?;
let mut code_memory = CodeMemory::new(); let mut code_memory = CodeMemory::new();
let alloc = code_memory.allocate_for_object(&obj)?; let alloc = code_memory.allocate_for_object_unparsed(&obj)?;
let mut trampolines = alloc.trampolines(); let mut trampolines = alloc.trampolines();
let (host_i, host_trampoline) = trampolines.next().unwrap(); let (host_i, host_trampoline) = trampolines.next().unwrap();
assert_eq!(host_i.as_u32(), 0); assert_eq!(host_i.as_u32(), 0);

View File

@@ -55,11 +55,13 @@ pub fn compile_to_obj(
for (index, func) in mem::take(&mut translation[0].function_body_inputs) { for (index, func) in mem::take(&mut translation[0].function_body_inputs) {
funcs.push(compiler.compile_function(&translation[0], index, func, &tunables, &types)?); funcs.push(compiler.compile_function(&translation[0], index, func, &tunables, &types)?);
} }
let (obj, _) = compiler.emit_obj( let mut obj = compiler.object()?;
compiler.emit_obj(
&translation[0], &translation[0],
&types, &types,
funcs, funcs,
tunables.generate_native_debuginfo, tunables.generate_native_debuginfo,
&mut obj,
)?; )?;
Ok(obj) Ok(obj.write()?)
} }