Skip to content

Commit

Permalink
Add initial DWARF symbol importer
Browse files Browse the repository at this point in the history
  • Loading branch information
chaoticgd committed Nov 5, 2024
1 parent 9af8baa commit 54c7083
Show file tree
Hide file tree
Showing 16 changed files with 461 additions and 92 deletions.
2 changes: 2 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,8 @@ add_library(ccc STATIC
src/ccc/data_refinement.h
src/ccc/dependency.cpp
src/ccc/dependency.h
src/ccc/dwarf_importer.cpp
src/ccc/dwarf_importer.h
src/ccc/dwarf_section.cpp
src/ccc/dwarf_section.h
src/ccc/elf.cpp
Expand Down
7 changes: 6 additions & 1 deletion docs/ProjectStructure.md
Original file line number Diff line number Diff line change
@@ -1,19 +1,23 @@
# Project Structure

- src/demangle.cpp: Main file for demangle.
- src/fuzztest.cpp: Fuzzing harness for libfuzzer.
- src/objdump.cpp: Main file for objdump.
- src/stdump.cpp: Main file for stdump.
- src/tests.cpp: Test runner.
- src/uncc.cpp: Main file for uncc.
- src/ccc/ast.cpp: Defines a C++ AST structure for types.
- src/ccc/ast_json.cpp: Reads/writes the AST structure as JSON.
- src/ccc/data_refinement.cpp: Converts global variable data into structured initializer lists and literals.
- src/ccc/dependency.cpp: Tries to infer information about which types belong to which files.
- src/ccc/dwarf_importer.cpp: Imports .debug (DWARF) symbol tables into the symbol database.
- src/ccc/dwarf_section.cpp: Parses the .debug (DWARF) binary format.
- src/ccc/elf.cpp: Parses ELF files.
- src/ccc/elf_symtab.cpp: Parses the ELF symbol table.
- src/ccc/importer_flags.cpp: An enum and help information printing for importer configuration flags.
- src/ccc/int128.cpp: 128-bit integer types.
- src/ccc/mdebug_analysis.cpp: Accepts a stream of symbols and imports the data.
- src/ccc/mdebug_importer.cpp: Top-level file for parsing .mdebug symbol tables.
- src/ccc/mdebug_importer.cpp: Imports .mdebug (STABS) symbol tables into the symbol database.
- src/ccc/mdebug_section.cpp: Parses the .mdebug binary format.
- src/ccc/mdebug_symbols.cpp: Parses symbols from the .mdebug section.
- src/ccc/print_cpp.cpp: Prints out AST nodes as C++ code.
Expand All @@ -30,3 +34,4 @@
- src/mips/opcodes.h: Enums for different types of EE core MIPS opcodes.
- src/mips/tables.cpp: Table of EE core MIPS instructions.
- src/platform/file.cpp: Utility functions for reading files.
- tests/: Unit tests.
2 changes: 2 additions & 0 deletions src/ccc/ccc.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@
#include "ast_json.h"
#include "data_refinement.h"
#include "dependency.h"
#include "dwarf_importer.h"
#include "dwarf_section.h"
#include "elf.h"
#include "elf_symtab.h"
#include "importer_flags.h"
Expand Down
265 changes: 265 additions & 0 deletions src/ccc/dwarf_importer.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,265 @@
// This file is part of the Chaos Compiler Collection.
// SPDX-License-Identifier: MIT

#include "dwarf_importer.h"

namespace ccc::dwarf {

static Result<std::map<u32, u32>> parse_overlays(SymbolDatabase& database, const DIE& first_die);
static std::string get_name(const Value& name, const Value& mangled_name);

SymbolTableImporter::SymbolTableImporter(
SymbolDatabase& database,
const SectionReader& dwarf,
u32 importer_flags,
const DemanglerFunctions& demangler,
const std::atomic_bool* interrupt)
: m_database(database)
, m_dwarf(dwarf)
, m_importer_flags(importer_flags)
, m_demangler(demangler)
, m_interrupt(interrupt) {}

Result<void> SymbolTableImporter::import_symbol_table(SymbolGroup group)
{
return import_compile_units(std::nullopt, group);
}

Result<void> SymbolTableImporter::import_overlay(u32 overlay_id, SymbolGroup group)
{
return import_compile_units(overlay_id, group);
}

Result<void> SymbolTableImporter::import_compile_units(std::optional<u32> overlay_id, SymbolGroup group)
{
Result<DIE> first_die = m_dwarf.first_die(m_importer_flags);
CCC_RETURN_IF_ERROR(first_die);

Result<std::map<u32, u32>> compile_unit_offset_to_overlay_id = parse_overlays(m_database, *first_die);
CCC_RETURN_IF_ERROR(compile_unit_offset_to_overlay_id);

m_group = group;
m_source_file = nullptr;

std::optional<DIE> die = *first_die;
while (die.has_value()) {
CCC_CHECK(!m_interrupt || !*m_interrupt, "Operation interrupted by user.");

bool process_compile_unit = false;
if (die->tag() == TAG_compile_unit) {
auto overlay_iterator = compile_unit_offset_to_overlay_id->find(die->offset());
if (overlay_iterator != compile_unit_offset_to_overlay_id->end()) {
process_compile_unit = overlay_id.has_value() && overlay_iterator->second == *overlay_id;
} else {
process_compile_unit = !overlay_id.has_value();
}
}

if (process_compile_unit) {
Result<void> compile_unit_result = import_compile_unit(*die);
CCC_RETURN_IF_ERROR(compile_unit_result);
}

Result<std::optional<DIE>> next_die = die->sibling();
CCC_RETURN_IF_ERROR(next_die);
die = *next_die;
}

return Result<void>();
}

Result<void> SymbolTableImporter::import_compile_unit(const DIE& compile_unit)
{
static const AttributesSpec compile_unit_attributes = DIE::specify_attributes({
DIE::required_attribute(AT_name, {FORM_STRING}),
DIE::optional_attribute(AT_producer, {FORM_STRING}),
DIE::optional_attribute(AT_language, {FORM_DATA4}),
DIE::optional_attribute(AT_stmt_list, {FORM_DATA4}),
DIE::optional_attribute(AT_low_pc, {FORM_ADDR}),
DIE::optional_attribute(AT_high_pc, {FORM_ADDR})
});

Value name;
Value producer;
Value language;
Value stmt_list;
Value low_pc;
Value high_pc;
Result<void> attribute_result = compile_unit.attributes(
compile_unit_attributes, {&name, &producer, &language, &stmt_list, &low_pc, &high_pc});
CCC_RETURN_IF_ERROR(attribute_result);

Address address;
if (low_pc.valid()) {
address = low_pc.address();
}

// The Metrowerks compiler outputs multiple compile_unit DIEs for a single
// logical source file, so we need to deduplicate them here.
if (!m_source_file || m_source_file->name() != name.string()) {
Result<SourceFile*> new_source_file = m_database.source_files.create_symbol(
std::string(name.string()), Address(), m_group.source, m_group.module_symbol);
CCC_RETURN_IF_ERROR(new_source_file);
m_source_file = *new_source_file;
}

// Each individual compile_unit DIE seems to either correspond to a
// collection of types or a single function, so we make the source file's
// address and size cover all the low_pc/high_pc pairs.
if (low_pc.valid() && high_pc.valid()) {
if (!m_source_file->address().valid()) {
m_database.source_files.move_symbol(m_source_file->handle(), low_pc.address());
m_source_file->set_size(high_pc.address() - low_pc.address());
}

if (m_source_file->address().value > low_pc.address()) {
u32 new_size = m_source_file->size() + m_source_file->address().value - low_pc.address();
m_database.source_files.move_symbol(m_source_file->handle(), low_pc.address());
m_source_file->set_size(new_size);
}

if(high_pc.valid() > m_source_file->address().value + m_source_file->size()) {
m_source_file->set_size(high_pc.address() - m_source_file->address().value);
}
}

Result<std::optional<DIE>> first_child = compile_unit.first_child();
CCC_RETURN_IF_ERROR(first_child);

std::optional<DIE> child = *first_child;
while (child.has_value()) {
switch (child->tag()) {
case TAG_global_subroutine:
case TAG_subroutine: {
Result<void> subroutine_result = import_subroutine(*child);
CCC_RETURN_IF_ERROR(subroutine_result);
break;
}
default: {}
}

Result<std::optional<DIE>> next_child = child->sibling();
CCC_RETURN_IF_ERROR(next_child);
child = *next_child;
}

return Result<void>();
}

Result<void> SymbolTableImporter::import_subroutine(const DIE& subroutine)
{
static const AttributesSpec compile_unit_attributes = DIE::specify_attributes({
DIE::optional_attribute(AT_name, {FORM_STRING}),
DIE::optional_attribute(AT_mangled_name, {FORM_STRING}),
DIE::optional_attribute(AT_low_pc, {FORM_ADDR}),
DIE::optional_attribute(AT_high_pc, {FORM_ADDR})
});

Value name;
Value mangled_name;
Value low_pc;
Value high_pc;
Result<void> attribute_result = subroutine.attributes(
compile_unit_attributes, {&name, &mangled_name, &low_pc, &high_pc});
CCC_RETURN_IF_ERROR(attribute_result);

Address address;
if (low_pc.valid()) {
address = low_pc.address();
}

Result<Function*> function = m_database.functions.create_symbol(
get_name(name, mangled_name), m_group.source, m_group.module_symbol, address, m_importer_flags, m_demangler);
CCC_RETURN_IF_ERROR(function);

if (low_pc.valid() && high_pc.valid()) {
(*function)->set_size(high_pc.address() - low_pc.address());
}

return Result<void>();
}

Result<std::vector<OverlayInfo>> enumerate_overlays(const SectionReader& dwarf, u32 importer_flags)
{
Result<DIE> first_die = dwarf.first_die(importer_flags);
CCC_RETURN_IF_ERROR(first_die);

std::vector<OverlayInfo> overlays;

std::optional<DIE> die = *first_die;
while (die.has_value()) {
if (die->tag() == TAG_overlay) {
static const AttributesSpec overlay_attributes = DIE::specify_attributes({
DIE::required_attribute(AT_overlay_id, {FORM_DATA4}),
DIE::required_attribute(AT_overlay_name, {FORM_STRING})
});

Value overlay_id;
Value overlay_name;
Result<void> attribute_result = die->attributes(overlay_attributes, {&overlay_id, &overlay_name});
CCC_RETURN_IF_ERROR(attribute_result);

OverlayInfo& info = overlays.emplace_back();
info.id = static_cast<u32>(overlay_id.constant());
info.name = overlay_name.string();
}

Result<std::optional<DIE>> next_die = die->sibling();
CCC_RETURN_IF_ERROR(next_die);
die = *next_die;
}

return overlays;
}

static Result<std::map<u32, u32>> parse_overlays(SymbolDatabase& database, const DIE& first_die)
{
std::map<u32, u32> compile_unit_offset_to_overlay_id;

std::optional<DIE> die = first_die;
while (die.has_value()) {
if (die->tag() == TAG_overlay) {
static const AttributesSpec overlay_attributes = DIE::specify_attributes({
DIE::required_attribute(AT_overlay_id, {FORM_DATA4}),
DIE::required_attribute(AT_overlay_name, {FORM_STRING})
});

Value overlay_id;
Value overlay_name;
Result<void> attribute_result = die->attributes(overlay_attributes, {&overlay_id, &overlay_name});
CCC_RETURN_IF_ERROR(attribute_result);

// We need to iterate over all the attributes here rather than use
// my fancy API because, despite what page 3 of the spec says, there
// are multiple attributes of the same type.
Result<std::vector<AttributeTuple>> attributes = die->all_attributes();
CCC_RETURN_IF_ERROR(attributes);

for (const auto& [offset, attribute, value] : *attributes) {
if (attribute == AT_member && value.form() == FORM_REF) {
compile_unit_offset_to_overlay_id.emplace(
value.reference(), static_cast<u32>(overlay_id.constant()));
}
}
}

Result<std::optional<DIE>> next_die = die->sibling();
CCC_RETURN_IF_ERROR(next_die);
die = *next_die;
}

return compile_unit_offset_to_overlay_id;
}

static std::string get_name(const Value& name, const Value& mangled_name)
{
if (mangled_name.valid()) {
return std::string(mangled_name.string());
} else if (name.valid()) {
return std::string(name.string());
}

return std::string();
}

}
52 changes: 52 additions & 0 deletions src/ccc/dwarf_importer.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
// This file is part of the Chaos Compiler Collection.
// SPDX-License-Identifier: MIT

#pragma once

#include "dwarf_section.h"
#include "symbol_database.h"

namespace ccc::dwarf {

class SymbolTableImporter {
public:
SymbolTableImporter(
SymbolDatabase& database,
const SectionReader& dwarf,
u32 importer_flags,
const DemanglerFunctions& demangler,
const std::atomic_bool* interrupt);

// Import a DWARF symbol table into the symbol database, excluding
// compilation units associated with an overlay.
Result<void> import_symbol_table(SymbolGroup group);

// Import a DWARF symbol table into the symbol database, but only including
// compilation units associated with the specified overlay.
Result<void> import_overlay(u32 overlay_id, SymbolGroup group);

protected:
Result<void> import_compile_units(std::optional<u32> overlay_id, SymbolGroup group);
Result<void> import_compile_unit(const DIE& compile_unit);
Result<void> import_subroutine(const DIE& subroutine);

SymbolDatabase& m_database;
const SectionReader& m_dwarf;
u32 m_importer_flags;
const DemanglerFunctions& m_demangler;
const std::atomic_bool* m_interrupt;

SymbolGroup m_group;
SourceFile* m_source_file = nullptr;
};

struct OverlayInfo {
u32 id;
std::string name;
};

// Enumerate all the overlays defined in the symbol table. The ID values
// provided can then be fed into the import_overlay function above.
Result<std::vector<OverlayInfo>> enumerate_overlays(const SectionReader& dwarf, u32 importer_flags);

}
Loading

0 comments on commit 54c7083

Please sign in to comment.