From 801358333dabfb9348aafede7efb4e6dd56eba2f Mon Sep 17 00:00:00 2001 From: Ulrich Weigand Date: Wed, 21 Apr 2021 14:14:59 +0200 Subject: [PATCH] debug: Support big-endian architectures This fixes some hard-coded assumptions in the debug crate that the native ELF files being accessed are little-endian; specifically in create_gdbjit_image as well as in emit_dwarf. In addition, data in WebAssembly memory always uses little-endian byte order. Therefore, if the native architecture is big-endian, all references to base types need to be marked as little-endian using the DW_AT_endianity attribute, so that the debugger will be able to correctly access them. --- crates/debug/src/lib.rs | 77 ++++++++++++++++++--------- crates/debug/src/transform/unit.rs | 14 +++++ crates/debug/src/write_debuginfo.rs | 23 ++++---- crates/environ/src/data_structures.rs | 4 +- 4 files changed, 79 insertions(+), 39 deletions(-) diff --git a/crates/debug/src/lib.rs b/crates/debug/src/lib.rs index 597ab2e25c..b438051b45 100644 --- a/crates/debug/src/lib.rs +++ b/crates/debug/src/lib.rs @@ -3,6 +3,7 @@ #![allow(clippy::cast_ptr_alignment)] use anyhow::{bail, ensure, Error}; +use object::endian::{BigEndian, Endian, Endianness, LittleEndian}; use object::{RelocationEncoding, RelocationKind}; use std::collections::HashMap; @@ -18,13 +19,20 @@ pub fn create_gdbjit_image( defined_funcs_offset: usize, funcs: &[*const u8], ) -> Result, Error> { - ensure_supported_elf_format(&mut bytes)?; + let e = ensure_supported_elf_format(&mut bytes)?; // patch relocs relocate_dwarf_sections(&mut bytes, defined_funcs_offset, funcs)?; // elf is still missing details... - convert_object_elf_to_loadable_file(&mut bytes, code_region); + match e { + Endianness::Little => { + convert_object_elf_to_loadable_file::(&mut bytes, code_region) + } + Endianness::Big => { + convert_object_elf_to_loadable_file::(&mut bytes, code_region) + } + } // let mut file = ::std::fs::File::create(::std::path::Path::new("test.o")).expect("file"); // ::std::io::Write::write_all(&mut file, &bytes).expect("write"); @@ -83,18 +91,33 @@ fn relocate_dwarf_sections( Ok(()) } -fn ensure_supported_elf_format(bytes: &mut Vec) -> Result<(), Error> { +fn ensure_supported_elf_format(bytes: &mut Vec) -> Result { use object::elf::*; - use object::endian::LittleEndian; + use object::read::elf::*; + use object::Bytes; use std::mem::size_of; - let e = LittleEndian; - let header: &FileHeader64 = - unsafe { &*(bytes.as_mut_ptr() as *const FileHeader64<_>) }; - ensure!( - header.e_ident.class == ELFCLASS64 && header.e_ident.data == ELFDATA2LSB, - "bits and endianess in .ELF", - ); + let kind = match object::FileKind::parse(bytes) { + Ok(file) => file, + Err(err) => { + bail!("Failed to parse file: {}", err); + } + }; + let header = match kind { + object::FileKind::Elf64 => { + match object::elf::FileHeader64::::parse(Bytes(bytes)) { + Ok(header) => header, + Err(err) => { + bail!("Unsupported ELF file: {}", err); + } + } + } + _ => { + bail!("only 64-bit ELF files currently supported") + } + }; + let e = header.endian().unwrap(); + match header.e_machine.get(e) { EM_X86_64 => (), machine => { @@ -106,23 +129,25 @@ fn ensure_supported_elf_format(bytes: &mut Vec) -> Result<(), Error> { "program header table is empty" ); let e_shentsize = header.e_shentsize.get(e); - ensure!( - e_shentsize as usize == size_of::>(), - "size of sh" - ); - Ok(()) + let req_shentsize = match e { + Endianness::Little => size_of::>(), + Endianness::Big => size_of::>(), + }; + ensure!(e_shentsize as usize == req_shentsize, "size of sh"); + Ok(e) } -fn convert_object_elf_to_loadable_file(bytes: &mut Vec, code_region: (*const u8, usize)) { +fn convert_object_elf_to_loadable_file( + bytes: &mut Vec, + code_region: (*const u8, usize), +) { use object::elf::*; - use object::endian::LittleEndian; use std::ffi::CStr; use std::mem::size_of; use std::os::raw::c_char; - let e = LittleEndian; - let header: &FileHeader64 = - unsafe { &*(bytes.as_mut_ptr() as *const FileHeader64<_>) }; + let e = E::default(); + let header: &FileHeader64 = unsafe { &*(bytes.as_mut_ptr() as *const FileHeader64<_>) }; let e_shentsize = header.e_shentsize.get(e); let e_shoff = header.e_shoff.get(e); @@ -130,7 +155,7 @@ fn convert_object_elf_to_loadable_file(bytes: &mut Vec, code_region: (*const let mut shstrtab_off = 0; for i in 0..e_shnum { let off = e_shoff as isize + i as isize * e_shentsize as isize; - let section: &SectionHeader64 = + let section: &SectionHeader64 = unsafe { &*(bytes.as_ptr().offset(off) as *const SectionHeader64<_>) }; if section.sh_type.get(e) != SHT_STRTAB { continue; @@ -140,7 +165,7 @@ fn convert_object_elf_to_loadable_file(bytes: &mut Vec, code_region: (*const let mut segment: Option<_> = None; for i in 0..e_shnum { let off = e_shoff as isize + i as isize * e_shentsize as isize; - let section: &mut SectionHeader64 = + let section: &mut SectionHeader64 = unsafe { &mut *(bytes.as_mut_ptr().offset(off) as *mut SectionHeader64<_>) }; if section.sh_type.get(e) != SHT_PROGBITS { continue; @@ -171,12 +196,12 @@ fn convert_object_elf_to_loadable_file(bytes: &mut Vec, code_region: (*const // LLDB wants segment with virtual address set, placing them at the end of ELF. let ph_off = bytes.len(); - let e_phentsize = size_of::>(); + let e_phentsize = size_of::>(); let e_phnum = 1; bytes.resize(ph_off + e_phentsize * e_phnum, 0); if let Some((sh_offset, sh_size)) = segment { let (v_offset, size) = code_region; - let program: &mut ProgramHeader64 = + let program: &mut ProgramHeader64 = unsafe { &mut *(bytes.as_ptr().add(ph_off) as *mut ProgramHeader64<_>) }; program.p_type.set(e, PT_LOAD); program.p_offset.set(e, sh_offset); @@ -189,7 +214,7 @@ fn convert_object_elf_to_loadable_file(bytes: &mut Vec, code_region: (*const } // It is somewhat loadable ELF file at this moment. - let header: &mut FileHeader64 = + let header: &mut FileHeader64 = unsafe { &mut *(bytes.as_mut_ptr() as *mut FileHeader64<_>) }; header.e_type.set(e, ET_DYN); header.e_phoff.set(e, ph_off as u64); diff --git a/crates/debug/src/transform/unit.rs b/crates/debug/src/transform/unit.rs index 15f165418a..655d7738ee 100644 --- a/crates/debug/src/transform/unit.rs +++ b/crates/debug/src/transform/unit.rs @@ -10,6 +10,7 @@ use anyhow::{Context, Error}; use gimli::write; use gimli::{AttributeValue, DebuggingInformationEntry, Unit}; use std::collections::HashSet; +use wasmtime_environ::ir::Endianness; use wasmtime_environ::isa::TargetIsa; use wasmtime_environ::wasm::DefinedFuncIndex; use wasmtime_environ::{CompiledFunctions, ModuleMemoryOffset}; @@ -463,6 +464,19 @@ where isa, )?; + // Data in WebAssembly memory always uses little-endian byte order. + // If the native architecture is big-endian, we need to mark all + // base types used to refer to WebAssembly memory as little-endian + // using the DW_AT_endianity attribute, so that the debugger will + // be able to correctly access them. + if entry.tag() == gimli::DW_TAG_base_type && isa.endianness() == Endianness::Big { + let current_scope = comp_unit.get_mut(die_id); + current_scope.set( + gimli::DW_AT_endianity, + write::AttributeValue::Endianity(gimli::DW_END_little), + ); + } + if entry.tag() == gimli::DW_TAG_subprogram && !current_scope_ranges.is_empty() { append_vmctx_info( comp_unit, diff --git a/crates/debug/src/write_debuginfo.rs b/crates/debug/src/write_debuginfo.rs index 56c7231db0..491267b495 100644 --- a/crates/debug/src/write_debuginfo.rs +++ b/crates/debug/src/write_debuginfo.rs @@ -2,6 +2,7 @@ pub use crate::transform::transform_dwarf; use gimli::write::{Address, Dwarf, EndianVec, FrameTable, Result, Sections, Writer}; use gimli::{RunTimeEndian, SectionId}; use wasmtime_environ::entity::EntityRef; +use wasmtime_environ::ir::Endianness; use wasmtime_environ::isa::{unwind::UnwindInfo, TargetIsa}; use wasmtime_environ::{CompiledFunctions, DebugInfoData, ModuleMemoryOffset}; @@ -26,10 +27,19 @@ pub struct DwarfSection { } fn emit_dwarf_sections( + isa: &dyn TargetIsa, mut dwarf: Dwarf, frames: Option, ) -> anyhow::Result> { - let mut sections = Sections::new(WriterRelocate::default()); + let endian = match isa.endianness() { + Endianness::Little => RunTimeEndian::Little, + Endianness::Big => RunTimeEndian::Big, + }; + let writer = WriterRelocate { + relocs: Vec::new(), + writer: EndianVec::new(endian), + }; + let mut sections = Sections::new(writer); dwarf.write(&mut sections)?; if let Some(frames) = frames { frames.write_debug_frame(&mut sections.debug_frame)?; @@ -54,15 +64,6 @@ pub struct WriterRelocate { writer: EndianVec, } -impl Default for WriterRelocate { - fn default() -> Self { - WriterRelocate { - relocs: Vec::new(), - writer: EndianVec::new(RunTimeEndian::Little), - } - } -} - impl Writer for WriterRelocate { type Endian = RunTimeEndian; @@ -156,6 +157,6 @@ pub fn emit_dwarf<'a>( ) -> anyhow::Result> { let dwarf = transform_dwarf(isa, debuginfo_data, funcs, memory_offset)?; let frame_table = create_frame_table(isa, funcs); - let sections = emit_dwarf_sections(dwarf, frame_table)?; + let sections = emit_dwarf_sections(isa, dwarf, frame_table)?; Ok(sections) } diff --git a/crates/environ/src/data_structures.rs b/crates/environ/src/data_structures.rs index 36eec310ec..12b321d779 100644 --- a/crates/environ/src/data_structures.rs +++ b/crates/environ/src/data_structures.rs @@ -3,8 +3,8 @@ pub mod ir { pub use cranelift_codegen::binemit::{Reloc, StackMap}; pub use cranelift_codegen::ir::{ - types, AbiParam, ArgumentPurpose, JumpTableOffsets, LabelValueLoc, LibCall, Signature, - SourceLoc, StackSlots, TrapCode, Type, ValueLabel, ValueLoc, + types, AbiParam, ArgumentPurpose, Endianness, JumpTableOffsets, LabelValueLoc, LibCall, + Signature, SourceLoc, StackSlots, TrapCode, Type, ValueLabel, ValueLoc, }; pub use cranelift_codegen::{ValueLabelsRanges, ValueLocRange}; }