Rewrite gdbjit support with safety and fewer deps (#3246)

This refactoring primarily removes the dependency of the gdbjit image creation on the `finished_functions` array, which shouldn't be necessary given the input object being passed in since information can be read from the object instead. Additionally, though, this commit also removes all `unsafe` from the file, relying on various tools in the `object` crate to parse the internals and update various fields.
2021-08-26 10:44:05 -05:00
parent 78c1e4032f
commit def394eca2
2 changed files with 53 additions and 116 deletions
--- a/crates/jit/src/debug.rs
+++ b/crates/jit/src/debug.rs
@@ -1,18 +1,21 @@
-use anyhow::{bail, ensure, Error};
+use anyhow::{anyhow, bail, ensure, Error};
+use object::elf::*;
 use object::endian::{BigEndian, Endian, Endianness, LittleEndian};
-use object::{RelocationEncoding, RelocationKind};
-use std::collections::HashMap;
+use object::read::elf::{FileHeader, SectionHeader};
+use object::{
+    File, NativeEndian as NE, Object, ObjectSection, ObjectSymbol, RelocationEncoding,
+    RelocationKind, RelocationTarget, U64Bytes,
+};
+use std::mem::size_of;

 pub fn create_gdbjit_image(
    mut bytes: Vec<u8>,
    code_region: (*const u8, usize),
-    defined_funcs_offset: usize,
-    funcs: &[*const u8],
 ) -> Result<Vec<u8>, Error> {
    let e = ensure_supported_elf_format(&bytes)?;

    // patch relocs
-    relocate_dwarf_sections(&bytes, defined_funcs_offset, funcs)?;
+    relocate_dwarf_sections(&mut bytes, code_region)?;

    // elf is still missing details...
    match e {
@@ -30,27 +33,14 @@ pub fn create_gdbjit_image(
    Ok(bytes)
 }

-fn relocate_dwarf_sections(
-    bytes: &[u8],
-    defined_funcs_offset: usize,
-    funcs: &[*const u8],
-) -> Result<(), Error> {
-    use object::read::{File, Object, ObjectSection, ObjectSymbol, RelocationTarget};
-
-    let obj = File::parse(bytes)?;
-    let mut func_symbols = HashMap::new();
-    for sym in obj.symbols() {
-        match (sym.name(), sym.section_index()) {
-            (Ok(name), Some(_section_index)) if name.starts_with("_wasm_function_") => {
-                let index = name["_wasm_function_".len()..].parse::<usize>()?;
-                let data = funcs[index - defined_funcs_offset];
-                func_symbols.insert(sym.index(), data);
-            }
-            _ => (),
-        }
-    }
-
+fn relocate_dwarf_sections(bytes: &mut [u8], code_region: (*const u8, usize)) -> Result<(), Error> {
+    let mut relocations = Vec::new();
+    let obj = File::parse(&bytes[..])?;
    for section in obj.sections() {
+        let section_start = match section.file_range() {
+            Some((start, _)) => start,
+            None => continue,
+        };
        for (off, r) in section.relocations() {
            if r.kind() != RelocationKind::Absolute
                || r.encoding() != RelocationEncoding::Generic
@@ -59,24 +49,26 @@ fn relocate_dwarf_sections(
                continue;
            }

-            let data = match r.target() {
-                RelocationTarget::Symbol(ref index) => func_symbols.get(index),
-                _ => None,
+            let sym = match r.target() {
+                RelocationTarget::Symbol(index) => match obj.symbol_by_index(index) {
+                    Ok(sym) => sym,
+                    Err(_) => continue,
+                },
+                _ => continue,
            };
-            let data: *const u8 = match data {
-                Some(data) => *data,
-                None => {
-                    continue;
+            relocations.push((
+                section_start + off,
+                (code_region.0 as u64)
+                    .wrapping_add(sym.address())
+                    .wrapping_add(r.addend() as u64),
+            ));
+        }
    }
-            };

-            let target = (data as u64).wrapping_add(r.addend() as u64);
-
-            let entry_ptr = section.data_range(off, 8).unwrap().unwrap().as_ptr();
-            unsafe {
-                std::ptr::write(entry_ptr as *mut u64, target);
-            }
-        }
+    for (offset, value) in relocations {
+        let (loc, _) = object::from_bytes_mut::<U64Bytes<NE>>(&mut bytes[offset as usize..])
+            .map_err(|()| anyhow!("invalid dwarf relocations"))?;
+        loc.set(NE, value);
    }
    Ok(())
 }
@@ -84,7 +76,6 @@ fn relocate_dwarf_sections(
 fn ensure_supported_elf_format(bytes: &[u8]) -> Result<Endianness, Error> {
    use object::elf::*;
    use object::read::elf::*;
-    use std::mem::size_of;

    let kind = match object::FileKind::parse(bytes) {
        Ok(file) => file,
@@ -130,68 +121,33 @@ fn convert_object_elf_to_loadable_file<E: Endian>(
    bytes: &mut Vec<u8>,
    code_region: (*const u8, usize),
 ) {
-    use object::elf::*;
-    use std::ffi::CStr;
-    use std::mem::size_of;
-    use std::os::raw::c_char;
-
    let e = E::default();
-    let header: &FileHeader64<E> = unsafe { &*(bytes.as_mut_ptr() as *const FileHeader64<_>) };

-    let e_shentsize = header.e_shentsize.get(e);
-    let e_shoff = header.e_shoff.get(e);
-    let e_shnum = header.e_shnum.get(e);
-    let mut shstrtab_off = 0;
-    for i in 0..e_shnum {
-        let off = e_shoff as isize + i as isize * e_shentsize as isize;
-        let section: &SectionHeader64<E> =
-            unsafe { &*(bytes.as_ptr().offset(off) as *const SectionHeader64<_>) };
-        if section.sh_type.get(e) != SHT_STRTAB {
-            continue;
-        }
-        shstrtab_off = section.sh_offset.get(e);
-    }
-    let mut segment: Option<_> = None;
-    for i in 0..e_shnum {
-        let off = e_shoff as isize + i as isize * e_shentsize as isize;
+    let header = FileHeader64::<E>::parse(&bytes[..]).unwrap();
+    let sections = header.sections(e, &bytes[..]).unwrap();
+    let text_range = match sections.section_by_name(e, b".text") {
+        Some((i, text)) => {
+            let range = text.file_range(e);
+            let off = header.e_shoff.get(e) as usize + i * header.e_shentsize.get(e) as usize;
+
            let section: &mut SectionHeader64<E> =
-            unsafe { &mut *(bytes.as_mut_ptr().offset(off) as *mut SectionHeader64<_>) };
-        if section.sh_type.get(e) != SHT_PROGBITS {
-            continue;
-        }
-        // It is a SHT_PROGBITS, but we need to check sh_name to ensure it is our function
-        let sh_name_off = section.sh_name.get(e);
-        let sh_name = unsafe {
-            CStr::from_ptr(
-                bytes
-                    .as_ptr()
-                    .offset((shstrtab_off + sh_name_off as u64) as isize)
-                    as *const c_char,
-            )
-            .to_str()
-            .expect("name")
-        };
-        if sh_name != ".text" {
-            continue;
-        }
-
-        assert!(segment.is_none());
+                object::from_bytes_mut(&mut bytes[off..]).unwrap().0;
            // Patch vaddr, and save file location and its size.
            section.sh_addr.set(e, code_region.0 as u64);
-        let sh_offset = section.sh_offset.get(e);
-        let sh_size = section.sh_size.get(e);
-        segment = Some((sh_offset, sh_size));
+            range
        }
+        None => None,
+    };

    // LLDB wants segment with virtual address set, placing them at the end of ELF.
    let ph_off = bytes.len();
    let e_phentsize = size_of::<ProgramHeader64<E>>();
    let e_phnum = 1;
    bytes.resize(ph_off + e_phentsize * e_phnum, 0);
-    if let Some((sh_offset, sh_size)) = segment {
+    if let Some((sh_offset, sh_size)) = text_range {
        let (v_offset, size) = code_region;
        let program: &mut ProgramHeader64<E> =
-            unsafe { &mut *(bytes.as_ptr().add(ph_off) as *mut ProgramHeader64<_>) };
+            object::from_bytes_mut(&mut bytes[ph_off..]).unwrap().0;
        program.p_type.set(e, PT_LOAD);
        program.p_offset.set(e, sh_offset);
        program.p_vaddr.set(e, v_offset as u64);
@@ -203,8 +159,7 @@ fn convert_object_elf_to_loadable_file<E: Endian>(
    }

    // It is somewhat loadable ELF file at this moment.
-    let header: &mut FileHeader64<E> =
-        unsafe { &mut *(bytes.as_mut_ptr() as *mut FileHeader64<_>) };
+    let header: &mut FileHeader64<E> = object::from_bytes_mut(bytes).unwrap().0;
    header.e_type.set(e, ET_DYN);
    header.e_phoff.set(e, ph_off as u64);
    header.e_phentsize.set(e, e_phentsize as u16);
--- a/crates/jit/src/instantiate.rs
+++ b/crates/jit/src/instantiate.rs
@@ -213,12 +213,8 @@ impl CompiledModule {

        // Register GDB JIT images; initialize profiler and load the wasm module.
        let dbg_jit_registration = if artifacts.native_debug_info_present {
-            let bytes = create_dbg_image(
-                artifacts.obj.to_vec(),
-                code_range,
-                &artifacts.module,
-                &finished_functions,
-            )?;
+            let bytes = create_gdbjit_image(artifacts.obj.to_vec(), code_range)
+                .map_err(SetupError::DebugInfo)?;
            profiler.module_load(&artifacts.module, &finished_functions, Some(&bytes));
            let reg = GdbJitImageRegistration::register(bytes);
            Some(reg)
@@ -407,20 +403,6 @@ impl<'a> SymbolizeContext<'a> {
    }
 }

-fn create_dbg_image(
-    obj: Vec<u8>,
-    code_range: (*const u8, usize),
-    module: &Module,
-    finished_functions: &PrimaryMap<DefinedFuncIndex, *mut [VMFunctionBody]>,
-) -> Result<Vec<u8>, SetupError> {
-    let funcs = finished_functions
-        .values()
-        .map(|allocated: &*mut [VMFunctionBody]| (*allocated) as *const u8)
-        .collect::<Vec<_>>();
-    create_gdbjit_image(obj, code_range, module.num_imported_funcs, &funcs)
-        .map_err(SetupError::DebugInfo)
-}
-
 fn build_code_memory(
    obj: &File,
    module: &Module,