Skip to content

Commit

Permalink
Add basic support for DWARF processing with components
Browse files Browse the repository at this point in the history
This commit updates the native-DWARF processing (the `-D debug-info` CLI
flag) to support components. Previously component support was not
implemented and if there was more than one core wasm module within a
component then dwarf would be ignored entirely.

This commit contains a number of refactorings to plumb a more full
compilation context throughout the dwarf processing pipeline. Previously
the data structures used only were able to support a single module. A
new `Compilation` structure is used to represent the results of an
entire compilation and is plumbed through the various locations. Most of
the refactorings in this commit were then to extend loops to loop over
more things and handle the case where there is more than one core wasm
module.

I'll admit I'm not expert on DWARF but basic examples appear to work
locally and most of the additions here seemed relatively straightforward
in terms of "add another loop to iterate over more things" but I'm not
100% sure how well this will work. In theory this now supports
concatenating DWARF sections across multiple core wasm modules, but
that's not super well tested.
  • Loading branch information
alexcrichton committed Jun 5, 2024
1 parent fcf1054 commit 4da0b2c
Show file tree
Hide file tree
Showing 18 changed files with 523 additions and 373 deletions.
81 changes: 25 additions & 56 deletions crates/cranelift/src/compiler.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use crate::debug::{DwarfSectionRelocTarget, ModuleMemoryOffset};
use crate::debug::DwarfSectionRelocTarget;
use crate::func_environ::FuncEnvironment;
use crate::DEBUG_ASSERT_TRAP_CODE;
use crate::{array_call_signature, CompiledFunction, ModuleTextBuilder};
Expand All @@ -12,11 +12,9 @@ use cranelift_codegen::isa::{
use cranelift_codegen::print_errors::pretty_error;
use cranelift_codegen::Context;
use cranelift_codegen::{CompiledCode, MachStackMap};
use cranelift_entity::{EntityRef, PrimaryMap};
use cranelift_entity::PrimaryMap;
use cranelift_frontend::FunctionBuilder;
use cranelift_wasm::{
DefinedFuncIndex, FuncTranslator, MemoryIndex, OwnedMemoryIndex, WasmFuncType, WasmValType,
};
use cranelift_wasm::{DefinedFuncIndex, FuncTranslator, WasmFuncType, WasmValType};
use object::write::{Object, StandardSegment, SymbolId};
use object::{RelocationEncoding, RelocationFlags, RelocationKind, SectionKind};
use std::any::Any;
Expand All @@ -29,7 +27,8 @@ use wasmparser::{FuncValidatorAllocations, FunctionBody};
use wasmtime_environ::{
AddressMapSection, BuiltinFunctionIndex, CacheStore, CompileError, FlagValue, FunctionBodyData,
FunctionLoc, ModuleTranslation, ModuleTypesBuilder, PtrSize, RelocationTarget,
StackMapInformation, TrapEncodingBuilder, Tunables, VMOffsets, WasmFunctionInfo,
StackMapInformation, StaticModuleIndex, TrapEncodingBuilder, Tunables, VMOffsets,
WasmFunctionInfo,
};

#[cfg(feature = "component-model")]
Expand Down Expand Up @@ -440,63 +439,33 @@ impl wasmtime_environ::Compiler for Compiler {
self
}

fn append_dwarf(
fn append_dwarf<'a>(
&self,
obj: &mut Object<'_>,
translation: &ModuleTranslation<'_>,
funcs: &PrimaryMap<DefinedFuncIndex, (SymbolId, &(dyn Any + Send))>,
dwarf_package_bytes: Option<&[u8]>,
tunables: &Tunables,
translations: &'a PrimaryMap<StaticModuleIndex, ModuleTranslation<'a>>,
get_func: &'a dyn Fn(
StaticModuleIndex,
DefinedFuncIndex,
) -> (SymbolId, &'a (dyn Any + Send)),
dwarf_package_bytes: Option<&'a [u8]>,
tunables: &'a Tunables,
) -> Result<()> {
let ofs = VMOffsets::new(
self.isa
.triple()
.architecture
.pointer_width()
.unwrap()
.bytes(),
&translation.module,
);

let memory_offset = if ofs.num_imported_memories > 0 {
ModuleMemoryOffset::Imported {
offset_to_vm_memory_definition: ofs.vmctx_vmmemory_import(MemoryIndex::new(0))
+ u32::from(ofs.vmmemory_import_from()),
offset_to_memory_base: ofs.ptr.vmmemory_definition_base().into(),
}
} else if ofs.num_defined_memories > 0 {
// The addition of shared memory makes the following assumption,
// "owned memory index = 0", possibly false. If the first memory
// is a shared memory, the base pointer will not be stored in
// the `owned_memories` array. The following code should
// eventually be fixed to not only handle shared memories but
// also multiple memories.
assert_eq!(
ofs.num_defined_memories, ofs.num_owned_memories,
"the memory base pointer may be incorrect due to sharing memory"
);
ModuleMemoryOffset::Defined(
ofs.vmctx_vmmemory_definition_base(OwnedMemoryIndex::new(0)),
let get_func = move |m, f| {
let (sym, any) = get_func(m, f);
(
sym,
any.downcast_ref::<CompiledFunction>().unwrap().metadata(),
)
} else {
ModuleMemoryOffset::None
};
let functions_info = funcs
.iter()
.map(|(_, (_, func))| {
let f = func.downcast_ref::<CompiledFunction>().unwrap();
f.metadata()
})
.collect();
let dwarf_sections = crate::debug::emit_dwarf(
let mut compilation = crate::debug::Compilation::new(
&*self.isa,
&translation.debuginfo,
&functions_info,
&memory_offset,
translations,
&get_func,
dwarf_package_bytes,
tunables,
)
.with_context(|| "failed to emit DWARF debug information")?;
);
let dwarf_sections = crate::debug::emit_dwarf(&*self.isa, &mut compilation)
.with_context(|| "failed to emit DWARF debug information")?;

let (debug_bodies, debug_relocs): (Vec<_>, Vec<_>) = dwarf_sections
.iter()
Expand All @@ -515,7 +484,7 @@ impl wasmtime_environ::Compiler for Compiler {
let section_id = *dwarf_sections_ids.get(name).unwrap();
for reloc in relocs {
let target_symbol = match reloc.target {
DwarfSectionRelocTarget::Func(index) => funcs[DefinedFuncIndex::new(index)].0,
DwarfSectionRelocTarget::Func(id) => compilation.symbol_id(id),
DwarfSectionRelocTarget::Section(name) => {
obj.section_symbol(dwarf_sections_ids[name])
}
Expand Down
157 changes: 157 additions & 0 deletions crates/cranelift/src/debug.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,14 @@
//! Debug utils for WebAssembly using Cranelift.

use crate::CompiledFunctionMetadata;
use cranelift_codegen::isa::TargetIsa;
use object::write::SymbolId;
use std::collections::HashMap;
use wasmtime_environ::{
DefinedFuncIndex, EntityRef, MemoryIndex, ModuleTranslation, OwnedMemoryIndex, PrimaryMap,
PtrSize, StaticModuleIndex, Tunables, VMOffsets,
};

/// Memory definition offset in the VMContext structure.
#[derive(Debug, Clone)]
pub enum ModuleMemoryOffset {
Expand All @@ -18,6 +27,154 @@ pub enum ModuleMemoryOffset {
},
}

type Reader<'input> = gimli::EndianSlice<'input, gimli::LittleEndian>;

/// "Package structure" to collect together various artifacts/results of a
/// compilation.
///
/// This structure is threaded through a number of top-level functions of DWARF
/// processing within in this submodule to pass along all the bits-and-pieces of
/// the compilation context.
pub struct Compilation<'a> {
/// All module translations which were present in this compilation.
///
/// This map has one entry for core wasm modules and may have multiple (or
/// zero) for components.
translations: &'a PrimaryMap<StaticModuleIndex, ModuleTranslation<'a>>,

/// Accessor of a particular compiled function for a module.
///
/// This returns the `object`-based-symbol for the function as well as the
/// `&CompiledFunction`.
get_func:
&'a dyn Fn(StaticModuleIndex, DefinedFuncIndex) -> (SymbolId, &'a CompiledFunctionMetadata),

/// Optionally-specified `*.dwp` file, currently only supported for core
/// wasm modules.
dwarf_package_bytes: Option<&'a [u8]>,

/// Compilation settings used when producing functions.
tunables: &'a Tunables,

/// Translation between `SymbolId` and a `usize`-based symbol which gimli
/// uses.
symbol_index_to_id: Vec<SymbolId>,
symbol_id_to_index: HashMap<SymbolId, (usize, StaticModuleIndex, DefinedFuncIndex)>,

/// The `ModuleMemoryOffset` for each module within `translations`.
///
/// Note that this doesn't support multi-memory at this time.
module_memory_offsets: PrimaryMap<StaticModuleIndex, ModuleMemoryOffset>,
}

impl<'a> Compilation<'a> {
pub fn new(
isa: &dyn TargetIsa,
translations: &'a PrimaryMap<StaticModuleIndex, ModuleTranslation<'a>>,
get_func: &'a dyn Fn(
StaticModuleIndex,
DefinedFuncIndex,
) -> (SymbolId, &'a CompiledFunctionMetadata),
dwarf_package_bytes: Option<&'a [u8]>,
tunables: &'a Tunables,
) -> Compilation<'a> {
// Build the `module_memory_offsets` map based on the modules in
// `translations`.
let mut module_memory_offsets = PrimaryMap::new();
for (i, translation) in translations {
let ofs = VMOffsets::new(
isa.triple().architecture.pointer_width().unwrap().bytes(),
&translation.module,
);

let memory_offset = if ofs.num_imported_memories > 0 {
ModuleMemoryOffset::Imported {
offset_to_vm_memory_definition: ofs.vmctx_vmmemory_import(MemoryIndex::new(0))
+ u32::from(ofs.vmmemory_import_from()),
offset_to_memory_base: ofs.ptr.vmmemory_definition_base().into(),
}
} else if ofs.num_defined_memories > 0 {
// The addition of shared memory makes the following assumption,
// "owned memory index = 0", possibly false. If the first memory
// is a shared memory, the base pointer will not be stored in
// the `owned_memories` array. The following code should
// eventually be fixed to not only handle shared memories but
// also multiple memories.
assert_eq!(
ofs.num_defined_memories, ofs.num_owned_memories,
"the memory base pointer may be incorrect due to sharing memory"
);
ModuleMemoryOffset::Defined(
ofs.vmctx_vmmemory_definition_base(OwnedMemoryIndex::new(0)),
)
} else {
ModuleMemoryOffset::None
};
let j = module_memory_offsets.push(memory_offset);
assert_eq!(i, j);
}

// Build the `symbol <=> usize` mappings
let mut symbol_index_to_id = Vec::new();
let mut symbol_id_to_index = HashMap::new();

for (module, translation) in translations {
for func in translation.module.defined_func_indices() {
let (sym, _func) = get_func(module, func);
symbol_id_to_index.insert(sym, (symbol_index_to_id.len(), module, func));
symbol_index_to_id.push(sym);
}
}

Compilation {
translations,
get_func,
dwarf_package_bytes,
tunables,
symbol_index_to_id,
symbol_id_to_index,
module_memory_offsets,
}
}

/// Returns an iterator over all function indexes present in this
/// compilation.
///
/// Each function is additionally accompanied with its module index.
fn indexes(&self) -> impl Iterator<Item = (StaticModuleIndex, DefinedFuncIndex)> + '_ {
self.translations
.iter()
.flat_map(|(i, t)| t.module.defined_func_indices().map(move |j| (i, j)))
}

/// Returns an iterator of all functions with their module, symbol, and
/// function metadata that were produced during compilation.
fn functions(
&self,
) -> impl Iterator<Item = (StaticModuleIndex, usize, &'a CompiledFunctionMetadata)> + '_ {
self.indexes().map(move |(module, func)| {
let (sym, func) = self.function(module, func);
(module, sym, func)
})
}

/// Returns the symbol and metadata associated with a specific function.
fn function(
&self,
module: StaticModuleIndex,
func: DefinedFuncIndex,
) -> (usize, &'a CompiledFunctionMetadata) {
let (sym, func) = (self.get_func)(module, func);
(self.symbol_id_to_index[&sym].0, func)
}

/// Maps a `usize`-based symbol used by gimli to the object-based
/// `SymbolId`.
pub fn symbol_id(&self, sym: usize) -> SymbolId {
self.symbol_index_to_id[sym]
}
}

pub use write_debuginfo::{emit_dwarf, DwarfSectionRelocTarget};

mod gc;
Expand Down
55 changes: 30 additions & 25 deletions crates/cranelift/src/debug/gc.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
use crate::debug::transform::AddressTransform;
use crate::debug::{Compilation, Reader};
use gimli::constants;
use gimli::read;
use gimli::{Reader, UnitSectionOffset};
use gimli::UnitSectionOffset;
use std::collections::{HashMap, HashSet};
use wasmtime_environ::{PrimaryMap, StaticModuleIndex};

#[derive(Debug)]
pub struct Dependencies {
Expand Down Expand Up @@ -65,23 +67,26 @@ impl Dependencies {
}
}

pub fn build_dependencies<R: Reader<Offset = usize>>(
dwarf: &read::Dwarf<R>,
dwp: &Option<read::DwarfPackage<R>>,
at: &AddressTransform,
pub fn build_dependencies(
compilation: &mut Compilation<'_>,
dwp: &Option<read::DwarfPackage<Reader<'_>>>,
at: &PrimaryMap<StaticModuleIndex, AddressTransform>,
) -> read::Result<Dependencies> {
let mut deps = Dependencies::new();
let mut units = dwarf.units();
while let Some(unit) = units.next()? {
build_unit_dependencies(unit, dwarf, dwp, at, &mut deps)?;
for (i, translation) in compilation.translations.iter() {
let dwarf = &translation.debuginfo.dwarf;
let mut units = dwarf.units();
while let Some(unit) = units.next()? {
build_unit_dependencies(unit, dwarf, dwp, &at[i], &mut deps)?;
}
}
Ok(deps)
}

fn build_unit_dependencies<R: Reader<Offset = usize>>(
header: read::UnitHeader<R>,
dwarf: &read::Dwarf<R>,
dwp: &Option<read::DwarfPackage<R>>,
fn build_unit_dependencies(
header: read::UnitHeader<Reader<'_>>,
dwarf: &read::Dwarf<Reader<'_>>,
dwp: &Option<read::DwarfPackage<Reader<'_>>>,
at: &AddressTransform,
deps: &mut Dependencies,
) -> read::Result<()> {
Expand All @@ -103,7 +108,7 @@ fn build_unit_dependencies<R: Reader<Offset = usize>>(
Ok(())
}

fn has_die_back_edge<R: Reader<Offset = usize>>(die: &read::DebuggingInformationEntry<R>) -> bool {
fn has_die_back_edge(die: &read::DebuggingInformationEntry<Reader<'_>>) -> bool {
match die.tag() {
constants::DW_TAG_variable
| constants::DW_TAG_constant
Expand All @@ -123,10 +128,10 @@ fn has_die_back_edge<R: Reader<Offset = usize>>(die: &read::DebuggingInformation
}
}

fn has_valid_code_range<R: Reader<Offset = usize>>(
die: &read::DebuggingInformationEntry<R>,
dwarf: &read::Dwarf<R>,
unit: &read::Unit<R>,
fn has_valid_code_range(
die: &read::DebuggingInformationEntry<Reader<'_>>,
dwarf: &read::Dwarf<Reader<'_>>,
unit: &read::Unit<Reader<'_>>,
at: &AddressTransform,
) -> read::Result<bool> {
match die.tag() {
Expand Down Expand Up @@ -199,10 +204,10 @@ fn has_valid_code_range<R: Reader<Offset = usize>>(
Ok(false)
}

fn build_die_dependencies<R: Reader<Offset = usize>>(
die: read::EntriesTreeNode<R>,
dwarf: &read::Dwarf<R>,
unit: &read::Unit<R>,
fn build_die_dependencies(
die: read::EntriesTreeNode<Reader<'_>>,
dwarf: &read::Dwarf<Reader<'_>>,
unit: &read::Unit<Reader<'_>>,
at: &AddressTransform,
deps: &mut Dependencies,
) -> read::Result<()> {
Expand All @@ -229,11 +234,11 @@ fn build_die_dependencies<R: Reader<Offset = usize>>(
Ok(())
}

fn build_attr_dependencies<R: Reader<Offset = usize>>(
attr: &read::Attribute<R>,
fn build_attr_dependencies(
attr: &read::Attribute<Reader<'_>>,
offset: UnitSectionOffset,
_dwarf: &read::Dwarf<R>,
unit: &read::Unit<R>,
_dwarf: &read::Dwarf<Reader<'_>>,
unit: &read::Unit<Reader<'_>>,
_at: &AddressTransform,
deps: &mut Dependencies,
) -> read::Result<()> {
Expand Down
Loading

0 comments on commit 4da0b2c

Please sign in to comment.