debug: Support big-endian architectures

This fixes some hard-coded assumptions in the debug crate that the native ELF files being accessed are little-endian; specifically in create_gdbjit_image as well as in emit_dwarf. In addition, data in WebAssembly memory always uses little-endian byte order. Therefore, if the native architecture is big-endian, all references to base types need to be marked as little-endian using the DW_AT_endianity attribute, so that the debugger will be able to correctly access them.
2021-04-21 14:14:59 +02:00
parent 1243cea455
commit 801358333d
4 changed files with 79 additions and 39 deletions
--- a/crates/debug/src/lib.rs
+++ b/crates/debug/src/lib.rs
@@ -3,6 +3,7 @@
 #![allow(clippy::cast_ptr_alignment)]
 use anyhow::{bail, ensure, Error};
 use object::endian::{BigEndian, Endian, Endianness, LittleEndian};
 use object::{RelocationEncoding, RelocationKind};
 use std::collections::HashMap;
@@ -18,13 +19,20 @@ pub fn create_gdbjit_image(
    defined_funcs_offset: usize,
    funcs: &[*const u8],
 ) -> Result<Vec<u8>, Error> {
-    ensure_supported_elf_format(&mut bytes)?;
+    let e = ensure_supported_elf_format(&mut bytes)?;
    // patch relocs
    relocate_dwarf_sections(&mut bytes, defined_funcs_offset, funcs)?;
    // elf is still missing details...
-    convert_object_elf_to_loadable_file(&mut bytes, code_region);
+    match e {
        Endianness::Little => {
            convert_object_elf_to_loadable_file::<LittleEndian>(&mut bytes, code_region)
        }
        Endianness::Big => {
            convert_object_elf_to_loadable_file::<BigEndian>(&mut bytes, code_region)
        }
    }
    // let mut file = ::std::fs::File::create(::std::path::Path::new("test.o")).expect("file");
    // ::std::io::Write::write_all(&mut file, &bytes).expect("write");
@@ -83,18 +91,33 @@ fn relocate_dwarf_sections(
    Ok(())
 }
-fn ensure_supported_elf_format(bytes: &mut Vec<u8>) -> Result<(), Error> {
+fn ensure_supported_elf_format(bytes: &mut Vec<u8>) -> Result<Endianness, Error> {
    use object::elf::*;
-    use object::endian::LittleEndian;
+    use object::read::elf::*;
    use object::Bytes;
    use std::mem::size_of;
-    let e = LittleEndian;
+    let kind = match object::FileKind::parse(bytes) {
-    let header: &FileHeader64<LittleEndian> =
+        Ok(file) => file,
-        unsafe { &*(bytes.as_mut_ptr() as *const FileHeader64<_>) };
+        Err(err) => {
-    ensure!(
+            bail!("Failed to parse file: {}", err);
-        header.e_ident.class == ELFCLASS64 && header.e_ident.data == ELFDATA2LSB,
+        }
-        "bits and endianess in .ELF",
+    };
-    );
+    let header = match kind {
        object::FileKind::Elf64 => {
            match object::elf::FileHeader64::<Endianness>::parse(Bytes(bytes)) {
                Ok(header) => header,
                Err(err) => {
                    bail!("Unsupported ELF file: {}", err);
                }
            }
        }
        _ => {
            bail!("only 64-bit ELF files currently supported")
        }
    };
    let e = header.endian().unwrap();
    match header.e_machine.get(e) {
        EM_X86_64 => (),
        machine => {
@@ -106,23 +129,25 @@ fn ensure_supported_elf_format(bytes: &mut Vec<u8>) -> Result<(), Error> {
        "program header table is empty"
    );
    let e_shentsize = header.e_shentsize.get(e);
-    ensure!(
+    let req_shentsize = match e {
-        e_shentsize as usize == size_of::<SectionHeader64<LittleEndian>>(),
+        Endianness::Little => size_of::<SectionHeader64<LittleEndian>>(),
-        "size of sh"
+        Endianness::Big => size_of::<SectionHeader64<BigEndian>>(),
-    );
+    };
-    Ok(())
+    ensure!(e_shentsize as usize == req_shentsize, "size of sh");
    Ok(e)
 }
-fn convert_object_elf_to_loadable_file(bytes: &mut Vec<u8>, code_region: (*const u8, usize)) {
+fn convert_object_elf_to_loadable_file<E: Endian>(
    bytes: &mut Vec<u8>,
    code_region: (*const u8, usize),
 ) {
    use object::elf::*;
    use object::endian::LittleEndian;
    use std::ffi::CStr;
    use std::mem::size_of;
    use std::os::raw::c_char;
-    let e = LittleEndian;
+    let e = E::default();
-    let header: &FileHeader64<LittleEndian> =
+    let header: &FileHeader64<E> = unsafe { &*(bytes.as_mut_ptr() as *const FileHeader64<_>) };
        unsafe { &*(bytes.as_mut_ptr() as *const FileHeader64<_>) };
    let e_shentsize = header.e_shentsize.get(e);
    let e_shoff = header.e_shoff.get(e);
@@ -130,7 +155,7 @@ fn convert_object_elf_to_loadable_file(bytes: &mut Vec<u8>, code_region: (*const
    let mut shstrtab_off = 0;
    for i in 0..e_shnum {
        let off = e_shoff as isize + i as isize * e_shentsize as isize;
-        let section: &SectionHeader64<LittleEndian> =
+        let section: &SectionHeader64<E> =
            unsafe { &*(bytes.as_ptr().offset(off) as *const SectionHeader64<_>) };
        if section.sh_type.get(e) != SHT_STRTAB {
            continue;
@@ -140,7 +165,7 @@ fn convert_object_elf_to_loadable_file(bytes: &mut Vec<u8>, code_region: (*const
    let mut segment: Option<_> = None;
    for i in 0..e_shnum {
        let off = e_shoff as isize + i as isize * e_shentsize as isize;
-        let section: &mut SectionHeader64<LittleEndian> =
+        let section: &mut SectionHeader64<E> =
            unsafe { &mut *(bytes.as_mut_ptr().offset(off) as *mut SectionHeader64<_>) };
        if section.sh_type.get(e) != SHT_PROGBITS {
            continue;
@@ -171,12 +196,12 @@ fn convert_object_elf_to_loadable_file(bytes: &mut Vec<u8>, code_region: (*const
    // LLDB wants segment with virtual address set, placing them at the end of ELF.
    let ph_off = bytes.len();
-    let e_phentsize = size_of::<ProgramHeader64<LittleEndian>>();
+    let e_phentsize = size_of::<ProgramHeader64<E>>();
    let e_phnum = 1;
    bytes.resize(ph_off + e_phentsize * e_phnum, 0);
    if let Some((sh_offset, sh_size)) = segment {
        let (v_offset, size) = code_region;
-        let program: &mut ProgramHeader64<LittleEndian> =
+        let program: &mut ProgramHeader64<E> =
            unsafe { &mut *(bytes.as_ptr().add(ph_off) as *mut ProgramHeader64<_>) };
        program.p_type.set(e, PT_LOAD);
        program.p_offset.set(e, sh_offset);
@@ -189,7 +214,7 @@ fn convert_object_elf_to_loadable_file(bytes: &mut Vec<u8>, code_region: (*const
    }
    // It is somewhat loadable ELF file at this moment.
-    let header: &mut FileHeader64<LittleEndian> =
+    let header: &mut FileHeader64<E> =
        unsafe { &mut *(bytes.as_mut_ptr() as *mut FileHeader64<_>) };
    header.e_type.set(e, ET_DYN);
    header.e_phoff.set(e, ph_off as u64);
--- a/crates/debug/src/transform/unit.rs
+++ b/crates/debug/src/transform/unit.rs
@@ -10,6 +10,7 @@ use anyhow::{Context, Error};
 use gimli::write;
 use gimli::{AttributeValue, DebuggingInformationEntry, Unit};
 use std::collections::HashSet;
 use wasmtime_environ::ir::Endianness;
 use wasmtime_environ::isa::TargetIsa;
 use wasmtime_environ::wasm::DefinedFuncIndex;
 use wasmtime_environ::{CompiledFunctions, ModuleMemoryOffset};
@@ -463,6 +464,19 @@ where
            isa,
        )?;
        // Data in WebAssembly memory always uses little-endian byte order.
        // If the native architecture is big-endian, we need to mark all
        // base types used to refer to WebAssembly memory as little-endian
        // using the DW_AT_endianity attribute, so that the debugger will
        // be able to correctly access them.
        if entry.tag() == gimli::DW_TAG_base_type && isa.endianness() == Endianness::Big {
            let current_scope = comp_unit.get_mut(die_id);
            current_scope.set(
                gimli::DW_AT_endianity,
                write::AttributeValue::Endianity(gimli::DW_END_little),
            );
        }
        if entry.tag() == gimli::DW_TAG_subprogram && !current_scope_ranges.is_empty() {
            append_vmctx_info(
                comp_unit,
--- a/crates/debug/src/write_debuginfo.rs
+++ b/crates/debug/src/write_debuginfo.rs
@@ -2,6 +2,7 @@ pub use crate::transform::transform_dwarf;
 use gimli::write::{Address, Dwarf, EndianVec, FrameTable, Result, Sections, Writer};
 use gimli::{RunTimeEndian, SectionId};
 use wasmtime_environ::entity::EntityRef;
 use wasmtime_environ::ir::Endianness;
 use wasmtime_environ::isa::{unwind::UnwindInfo, TargetIsa};
 use wasmtime_environ::{CompiledFunctions, DebugInfoData, ModuleMemoryOffset};
@@ -26,10 +27,19 @@ pub struct DwarfSection {
 }
 fn emit_dwarf_sections(
    isa: &dyn TargetIsa,
    mut dwarf: Dwarf,
    frames: Option<FrameTable>,
 ) -> anyhow::Result<Vec<DwarfSection>> {
-    let mut sections = Sections::new(WriterRelocate::default());
+    let endian = match isa.endianness() {
        Endianness::Little => RunTimeEndian::Little,
        Endianness::Big => RunTimeEndian::Big,
    };
    let writer = WriterRelocate {
        relocs: Vec::new(),
        writer: EndianVec::new(endian),
    };
    let mut sections = Sections::new(writer);
    dwarf.write(&mut sections)?;
    if let Some(frames) = frames {
        frames.write_debug_frame(&mut sections.debug_frame)?;
@@ -54,15 +64,6 @@ pub struct WriterRelocate {
    writer: EndianVec<RunTimeEndian>,
 }
 impl Default for WriterRelocate {
    fn default() -> Self {
        WriterRelocate {
            relocs: Vec::new(),
            writer: EndianVec::new(RunTimeEndian::Little),
        }
    }
 }
 impl Writer for WriterRelocate {
    type Endian = RunTimeEndian;
@@ -156,6 +157,6 @@ pub fn emit_dwarf<'a>(
 ) -> anyhow::Result<Vec<DwarfSection>> {
    let dwarf = transform_dwarf(isa, debuginfo_data, funcs, memory_offset)?;
    let frame_table = create_frame_table(isa, funcs);
-    let sections = emit_dwarf_sections(dwarf, frame_table)?;
+    let sections = emit_dwarf_sections(isa, dwarf, frame_table)?;
    Ok(sections)
 }
--- a/crates/environ/src/data_structures.rs
+++ b/crates/environ/src/data_structures.rs
@@ -3,8 +3,8 @@
 pub mod ir {
    pub use cranelift_codegen::binemit::{Reloc, StackMap};
    pub use cranelift_codegen::ir::{
-        types, AbiParam, ArgumentPurpose, JumpTableOffsets, LabelValueLoc, LibCall, Signature,
+        types, AbiParam, ArgumentPurpose, Endianness, JumpTableOffsets, LabelValueLoc, LibCall,
-        SourceLoc, StackSlots, TrapCode, Type, ValueLabel, ValueLoc,
+        Signature, SourceLoc, StackSlots, TrapCode, Type, ValueLabel, ValueLoc,
    };
    pub use cranelift_codegen::{ValueLabelsRanges, ValueLocRange};
 }