From 54c70830440d617a9badd71290e84569aac82b89 Mon Sep 17 00:00:00 2001 From: chaoticgd <43898262+chaoticgd@users.noreply.github.com> Date: Tue, 5 Nov 2024 19:43:13 +0000 Subject: [PATCH] Add initial DWARF symbol importer --- CMakeLists.txt | 2 + docs/ProjectStructure.md | 7 +- src/ccc/ccc.h | 2 + src/ccc/dwarf_importer.cpp | 265 ++++++++++++++++++++++++++++++++++++ src/ccc/dwarf_importer.h | 52 +++++++ src/ccc/dwarf_section.cpp | 68 ++++----- src/ccc/dwarf_section.h | 73 +++++++--- src/ccc/elf.cpp | 4 +- src/ccc/elf_symtab.cpp | 21 +-- src/ccc/mdebug_importer.cpp | 4 +- src/ccc/mdebug_section.cpp | 12 +- src/ccc/sndll.cpp | 13 +- src/ccc/symbol_table.cpp | 5 +- src/ccc/util.cpp | 9 +- src/ccc/util.h | 2 +- test/ccc/util_tests.cpp | 14 +- 16 files changed, 461 insertions(+), 92 deletions(-) create mode 100644 src/ccc/dwarf_importer.cpp create mode 100644 src/ccc/dwarf_importer.h diff --git a/CMakeLists.txt b/CMakeLists.txt index 11e9f2c..de61776 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -40,6 +40,8 @@ add_library(ccc STATIC src/ccc/data_refinement.h src/ccc/dependency.cpp src/ccc/dependency.h + src/ccc/dwarf_importer.cpp + src/ccc/dwarf_importer.h src/ccc/dwarf_section.cpp src/ccc/dwarf_section.h src/ccc/elf.cpp diff --git a/docs/ProjectStructure.md b/docs/ProjectStructure.md index c6b3f1d..e0ed145 100644 --- a/docs/ProjectStructure.md +++ b/docs/ProjectStructure.md @@ -1,19 +1,23 @@ # Project Structure - src/demangle.cpp: Main file for demangle. +- src/fuzztest.cpp: Fuzzing harness for libfuzzer. - src/objdump.cpp: Main file for objdump. - src/stdump.cpp: Main file for stdump. +- src/tests.cpp: Test runner. - src/uncc.cpp: Main file for uncc. - src/ccc/ast.cpp: Defines a C++ AST structure for types. - src/ccc/ast_json.cpp: Reads/writes the AST structure as JSON. - src/ccc/data_refinement.cpp: Converts global variable data into structured initializer lists and literals. - src/ccc/dependency.cpp: Tries to infer information about which types belong to which files. +- src/ccc/dwarf_importer.cpp: Imports .debug (DWARF) symbol tables into the symbol database. +- src/ccc/dwarf_section.cpp: Parses the .debug (DWARF) binary format. - src/ccc/elf.cpp: Parses ELF files. - src/ccc/elf_symtab.cpp: Parses the ELF symbol table. - src/ccc/importer_flags.cpp: An enum and help information printing for importer configuration flags. - src/ccc/int128.cpp: 128-bit integer types. - src/ccc/mdebug_analysis.cpp: Accepts a stream of symbols and imports the data. -- src/ccc/mdebug_importer.cpp: Top-level file for parsing .mdebug symbol tables. +- src/ccc/mdebug_importer.cpp: Imports .mdebug (STABS) symbol tables into the symbol database. - src/ccc/mdebug_section.cpp: Parses the .mdebug binary format. - src/ccc/mdebug_symbols.cpp: Parses symbols from the .mdebug section. - src/ccc/print_cpp.cpp: Prints out AST nodes as C++ code. @@ -30,3 +34,4 @@ - src/mips/opcodes.h: Enums for different types of EE core MIPS opcodes. - src/mips/tables.cpp: Table of EE core MIPS instructions. - src/platform/file.cpp: Utility functions for reading files. +- tests/: Unit tests. diff --git a/src/ccc/ccc.h b/src/ccc/ccc.h index 4121934..980dec2 100644 --- a/src/ccc/ccc.h +++ b/src/ccc/ccc.h @@ -7,6 +7,8 @@ #include "ast_json.h" #include "data_refinement.h" #include "dependency.h" +#include "dwarf_importer.h" +#include "dwarf_section.h" #include "elf.h" #include "elf_symtab.h" #include "importer_flags.h" diff --git a/src/ccc/dwarf_importer.cpp b/src/ccc/dwarf_importer.cpp new file mode 100644 index 0000000..356deef --- /dev/null +++ b/src/ccc/dwarf_importer.cpp @@ -0,0 +1,265 @@ +// This file is part of the Chaos Compiler Collection. +// SPDX-License-Identifier: MIT + +#include "dwarf_importer.h" + +namespace ccc::dwarf { + +static Result> parse_overlays(SymbolDatabase& database, const DIE& first_die); +static std::string get_name(const Value& name, const Value& mangled_name); + +SymbolTableImporter::SymbolTableImporter( + SymbolDatabase& database, + const SectionReader& dwarf, + u32 importer_flags, + const DemanglerFunctions& demangler, + const std::atomic_bool* interrupt) + : m_database(database) + , m_dwarf(dwarf) + , m_importer_flags(importer_flags) + , m_demangler(demangler) + , m_interrupt(interrupt) {} + +Result SymbolTableImporter::import_symbol_table(SymbolGroup group) +{ + return import_compile_units(std::nullopt, group); +} + +Result SymbolTableImporter::import_overlay(u32 overlay_id, SymbolGroup group) +{ + return import_compile_units(overlay_id, group); +} + +Result SymbolTableImporter::import_compile_units(std::optional overlay_id, SymbolGroup group) +{ + Result first_die = m_dwarf.first_die(m_importer_flags); + CCC_RETURN_IF_ERROR(first_die); + + Result> compile_unit_offset_to_overlay_id = parse_overlays(m_database, *first_die); + CCC_RETURN_IF_ERROR(compile_unit_offset_to_overlay_id); + + m_group = group; + m_source_file = nullptr; + + std::optional die = *first_die; + while (die.has_value()) { + CCC_CHECK(!m_interrupt || !*m_interrupt, "Operation interrupted by user."); + + bool process_compile_unit = false; + if (die->tag() == TAG_compile_unit) { + auto overlay_iterator = compile_unit_offset_to_overlay_id->find(die->offset()); + if (overlay_iterator != compile_unit_offset_to_overlay_id->end()) { + process_compile_unit = overlay_id.has_value() && overlay_iterator->second == *overlay_id; + } else { + process_compile_unit = !overlay_id.has_value(); + } + } + + if (process_compile_unit) { + Result compile_unit_result = import_compile_unit(*die); + CCC_RETURN_IF_ERROR(compile_unit_result); + } + + Result> next_die = die->sibling(); + CCC_RETURN_IF_ERROR(next_die); + die = *next_die; + } + + return Result(); +} + +Result SymbolTableImporter::import_compile_unit(const DIE& compile_unit) +{ + static const AttributesSpec compile_unit_attributes = DIE::specify_attributes({ + DIE::required_attribute(AT_name, {FORM_STRING}), + DIE::optional_attribute(AT_producer, {FORM_STRING}), + DIE::optional_attribute(AT_language, {FORM_DATA4}), + DIE::optional_attribute(AT_stmt_list, {FORM_DATA4}), + DIE::optional_attribute(AT_low_pc, {FORM_ADDR}), + DIE::optional_attribute(AT_high_pc, {FORM_ADDR}) + }); + + Value name; + Value producer; + Value language; + Value stmt_list; + Value low_pc; + Value high_pc; + Result attribute_result = compile_unit.attributes( + compile_unit_attributes, {&name, &producer, &language, &stmt_list, &low_pc, &high_pc}); + CCC_RETURN_IF_ERROR(attribute_result); + + Address address; + if (low_pc.valid()) { + address = low_pc.address(); + } + + // The Metrowerks compiler outputs multiple compile_unit DIEs for a single + // logical source file, so we need to deduplicate them here. + if (!m_source_file || m_source_file->name() != name.string()) { + Result new_source_file = m_database.source_files.create_symbol( + std::string(name.string()), Address(), m_group.source, m_group.module_symbol); + CCC_RETURN_IF_ERROR(new_source_file); + m_source_file = *new_source_file; + } + + // Each individual compile_unit DIE seems to either correspond to a + // collection of types or a single function, so we make the source file's + // address and size cover all the low_pc/high_pc pairs. + if (low_pc.valid() && high_pc.valid()) { + if (!m_source_file->address().valid()) { + m_database.source_files.move_symbol(m_source_file->handle(), low_pc.address()); + m_source_file->set_size(high_pc.address() - low_pc.address()); + } + + if (m_source_file->address().value > low_pc.address()) { + u32 new_size = m_source_file->size() + m_source_file->address().value - low_pc.address(); + m_database.source_files.move_symbol(m_source_file->handle(), low_pc.address()); + m_source_file->set_size(new_size); + } + + if(high_pc.valid() > m_source_file->address().value + m_source_file->size()) { + m_source_file->set_size(high_pc.address() - m_source_file->address().value); + } + } + + Result> first_child = compile_unit.first_child(); + CCC_RETURN_IF_ERROR(first_child); + + std::optional child = *first_child; + while (child.has_value()) { + switch (child->tag()) { + case TAG_global_subroutine: + case TAG_subroutine: { + Result subroutine_result = import_subroutine(*child); + CCC_RETURN_IF_ERROR(subroutine_result); + break; + } + default: {} + } + + Result> next_child = child->sibling(); + CCC_RETURN_IF_ERROR(next_child); + child = *next_child; + } + + return Result(); +} + +Result SymbolTableImporter::import_subroutine(const DIE& subroutine) +{ + static const AttributesSpec compile_unit_attributes = DIE::specify_attributes({ + DIE::optional_attribute(AT_name, {FORM_STRING}), + DIE::optional_attribute(AT_mangled_name, {FORM_STRING}), + DIE::optional_attribute(AT_low_pc, {FORM_ADDR}), + DIE::optional_attribute(AT_high_pc, {FORM_ADDR}) + }); + + Value name; + Value mangled_name; + Value low_pc; + Value high_pc; + Result attribute_result = subroutine.attributes( + compile_unit_attributes, {&name, &mangled_name, &low_pc, &high_pc}); + CCC_RETURN_IF_ERROR(attribute_result); + + Address address; + if (low_pc.valid()) { + address = low_pc.address(); + } + + Result function = m_database.functions.create_symbol( + get_name(name, mangled_name), m_group.source, m_group.module_symbol, address, m_importer_flags, m_demangler); + CCC_RETURN_IF_ERROR(function); + + if (low_pc.valid() && high_pc.valid()) { + (*function)->set_size(high_pc.address() - low_pc.address()); + } + + return Result(); +} + +Result> enumerate_overlays(const SectionReader& dwarf, u32 importer_flags) +{ + Result first_die = dwarf.first_die(importer_flags); + CCC_RETURN_IF_ERROR(first_die); + + std::vector overlays; + + std::optional die = *first_die; + while (die.has_value()) { + if (die->tag() == TAG_overlay) { + static const AttributesSpec overlay_attributes = DIE::specify_attributes({ + DIE::required_attribute(AT_overlay_id, {FORM_DATA4}), + DIE::required_attribute(AT_overlay_name, {FORM_STRING}) + }); + + Value overlay_id; + Value overlay_name; + Result attribute_result = die->attributes(overlay_attributes, {&overlay_id, &overlay_name}); + CCC_RETURN_IF_ERROR(attribute_result); + + OverlayInfo& info = overlays.emplace_back(); + info.id = static_cast(overlay_id.constant()); + info.name = overlay_name.string(); + } + + Result> next_die = die->sibling(); + CCC_RETURN_IF_ERROR(next_die); + die = *next_die; + } + + return overlays; +} + +static Result> parse_overlays(SymbolDatabase& database, const DIE& first_die) +{ + std::map compile_unit_offset_to_overlay_id; + + std::optional die = first_die; + while (die.has_value()) { + if (die->tag() == TAG_overlay) { + static const AttributesSpec overlay_attributes = DIE::specify_attributes({ + DIE::required_attribute(AT_overlay_id, {FORM_DATA4}), + DIE::required_attribute(AT_overlay_name, {FORM_STRING}) + }); + + Value overlay_id; + Value overlay_name; + Result attribute_result = die->attributes(overlay_attributes, {&overlay_id, &overlay_name}); + CCC_RETURN_IF_ERROR(attribute_result); + + // We need to iterate over all the attributes here rather than use + // my fancy API because, despite what page 3 of the spec says, there + // are multiple attributes of the same type. + Result> attributes = die->all_attributes(); + CCC_RETURN_IF_ERROR(attributes); + + for (const auto& [offset, attribute, value] : *attributes) { + if (attribute == AT_member && value.form() == FORM_REF) { + compile_unit_offset_to_overlay_id.emplace( + value.reference(), static_cast(overlay_id.constant())); + } + } + } + + Result> next_die = die->sibling(); + CCC_RETURN_IF_ERROR(next_die); + die = *next_die; + } + + return compile_unit_offset_to_overlay_id; +} + +static std::string get_name(const Value& name, const Value& mangled_name) +{ + if (mangled_name.valid()) { + return std::string(mangled_name.string()); + } else if (name.valid()) { + return std::string(name.string()); + } + + return std::string(); +} + +} diff --git a/src/ccc/dwarf_importer.h b/src/ccc/dwarf_importer.h new file mode 100644 index 0000000..dbcb28a --- /dev/null +++ b/src/ccc/dwarf_importer.h @@ -0,0 +1,52 @@ +// This file is part of the Chaos Compiler Collection. +// SPDX-License-Identifier: MIT + +#pragma once + +#include "dwarf_section.h" +#include "symbol_database.h" + +namespace ccc::dwarf { + +class SymbolTableImporter { +public: + SymbolTableImporter( + SymbolDatabase& database, + const SectionReader& dwarf, + u32 importer_flags, + const DemanglerFunctions& demangler, + const std::atomic_bool* interrupt); + + // Import a DWARF symbol table into the symbol database, excluding + // compilation units associated with an overlay. + Result import_symbol_table(SymbolGroup group); + + // Import a DWARF symbol table into the symbol database, but only including + // compilation units associated with the specified overlay. + Result import_overlay(u32 overlay_id, SymbolGroup group); + +protected: + Result import_compile_units(std::optional overlay_id, SymbolGroup group); + Result import_compile_unit(const DIE& compile_unit); + Result import_subroutine(const DIE& subroutine); + + SymbolDatabase& m_database; + const SectionReader& m_dwarf; + u32 m_importer_flags; + const DemanglerFunctions& m_demangler; + const std::atomic_bool* m_interrupt; + + SymbolGroup m_group; + SourceFile* m_source_file = nullptr; +}; + +struct OverlayInfo { + u32 id; + std::string name; +}; + +// Enumerate all the overlays defined in the symbol table. The ID values +// provided can then be fed into the import_overlay function above. +Result> enumerate_overlays(const SectionReader& dwarf, u32 importer_flags); + +} diff --git a/src/ccc/dwarf_section.cpp b/src/ccc/dwarf_section.cpp index 5bd7898..1821359 100644 --- a/src/ccc/dwarf_section.cpp +++ b/src/ccc/dwarf_section.cpp @@ -90,11 +90,12 @@ Value Value::from_block_4(std::span block) return result; } -Value Value::from_string(const char* string) +Value Value::from_string(std::string_view string) { Value result; result.m_form = FORM_STRING; - result.m_value.string = string; + result.m_value.string.begin = string.data(); + result.m_value.string.end = string.data() + string.size(); return result; } @@ -122,10 +123,10 @@ std::span Value::block() const return std::span(m_value.block.begin, m_value.block.end); } -const char* Value::string() const +std::string_view Value::string() const { CCC_ASSERT(m_form == FORM_STRING); - return m_value.string; + return std::string_view(m_value.string.begin, m_value.string.end); } // ***************************************************************************** @@ -143,7 +144,9 @@ Result> DIE::parse(std::span debug, u32 offset, u32 die.m_offset = offset; std::optional length = copy_unaligned(debug, offset); - CCC_CHECK(length.has_value(), "Cannot read length for die at 0x%x.", offset); + if (!length.has_value()) { + return std::optional(std::nullopt); + } die.m_length = *length; offset += sizeof(u32); @@ -153,6 +156,7 @@ Result> DIE::parse(std::span debug, u32 offset, u32 std::optional tag = copy_unaligned(debug, offset); CCC_CHECK(tag.has_value(), "Cannot read tag for die at 0x%x.", offset); + CCC_CHECK(tag_to_string(*tag), "Unknown tag 0x%hx for die at 0x%x.", *tag, offset); die.m_tag = static_cast(*tag); offset += sizeof(u16); @@ -161,18 +165,6 @@ Result> DIE::parse(std::span debug, u32 offset, u32 return std::optional(die); } -RequiredAttributes DIE::require_attributes(std::span input) -{ - RequiredAttributes output; - - for (u32 i = 0; i < static_cast(input.size()); i++) { - RequiredAttribute& attribute = output.emplace(input[i].attribute, input[i]).first->second; - attribute.index = i; - } - - return output; -} - Result> DIE::first_child() const { u32 sibling_offset = 0; @@ -221,24 +213,35 @@ Tag DIE::tag() const return m_tag; } -Result DIE::attributes(std::span output, const RequiredAttributes& required) const +Result DIE::attributes(const AttributesSpec& spec, std::vector output) const { + // Parse the attributes and save the ones specified. u32 offset = m_offset + 6; while (offset < m_offset + m_length) { Result attribute = parse_attribute(offset); CCC_RETURN_IF_ERROR(attribute); - auto iterator = required.find(attribute->attribute); - if (iterator == required.end()) { + auto iterator = spec.find(attribute->attribute); + if (iterator == spec.end()) { continue; } DIE_CHECK_ARGS(iterator->second.valid_forms & 1 << (attribute->value.form()), "Attribute %x has an unexpected form %s", attribute->attribute, form_to_string(attribute->value.form())); + CCC_ASSERT(iterator->second.index < output.size()); *output[iterator->second.index] = std::move(attribute->value); } + // Check that we have all the required attributes. + for (auto& [attribute, attribute_spec] : spec) { + if (attribute_spec.required) { + CCC_ASSERT(attribute_spec.index < output.size()); + CCC_CHECK(output[attribute_spec.index]->valid(), + "Missing %s attribute for DIE at 0x%x\n", attribute_to_string(attribute), m_offset); + } + } + return Result(); } @@ -272,14 +275,8 @@ Result DIE::parse_attribute(u32& offset) const u16 attribute = *name >> 4; bool known_attribute = attribute_to_string(attribute); - if (!known_attribute) { - const char* uknown_attribute_error_message = - "Unknown attribute name 0x%03hx at 0x%x inside DIE at 0x%x."; - if ((m_importer_flags & STRICT_PARSING) == 0 && attribute >= AT_lo_user && attribute <= AT_hi_user) { - CCC_WARN(uknown_attribute_error_message, *name, offset, m_offset); - } else { - return CCC_FAILURE(uknown_attribute_error_message, *name, offset, m_offset); - } + if (!known_attribute && (m_importer_flags & STRICT_PARSING)) { + CCC_WARN("Unknown attribute name 0x%03hx at 0x%x inside DIE at 0x%x.", *name, offset, m_offset); } result.attribute = static_cast(attribute); @@ -343,10 +340,10 @@ Result DIE::parse_attribute(u32& offset) const break; } case FORM_STRING: { - const char* string = get_string(m_debug, offset); - DIE_CHECK(string, "Cannot read string attribute"); - result.value = Value::from_string(string); - offset += strlen(string) + 1; + std::optional string = get_string(m_debug, offset); + DIE_CHECK(string.has_value(), "Cannot read string attribute"); + result.value = Value::from_string(*string); + offset += static_cast(string->size()) + 1; break; } } @@ -485,7 +482,7 @@ Result SectionReader::print_attributes(FILE* out, const DIE& die) const break; } case FORM_STRING: { - fprintf(out, "\"%s\"", value.string()); + fprintf(out, "\"%s\"", value.string().data()); break; } } @@ -530,13 +527,14 @@ const char* tag_to_string(u32 tag) case TAG_set_type: return "set_type"; case TAG_subrange_type: return "subrange_type"; case TAG_with_stmt: return "with_stmt"; + case TAG_overlay: return "overlay"; case TAG_format_label: return "format_label"; case TAG_namelist: return "namelist"; case TAG_function_template: return "function_template"; case TAG_class_template: return "class_template"; } - return "unknown"; + return nullptr; } const char* form_to_string(u32 form) @@ -601,6 +599,8 @@ const char* attribute_to_string(u32 attribute) case AT_stride_size: return "stride_size"; case AT_upper_bound: return "upper_bound"; case AT_virtual: return "virtual"; + case AT_overlay_id: return "overlay_id"; + case AT_overlay_name: return "overlay_name"; } return nullptr; diff --git a/src/ccc/dwarf_section.h b/src/ccc/dwarf_section.h index 24e94e6..621f14f 100644 --- a/src/ccc/dwarf_section.h +++ b/src/ccc/dwarf_section.h @@ -42,13 +42,11 @@ enum Tag : u16 { TAG_set_type = 0x0020, TAG_subrange_type = 0x0021, TAG_with_stmt = 0x0022, - - /* GNU extensions */ - - TAG_format_label = 0x8000, /* for FORTRAN 77 and Fortran 90 */ - TAG_namelist = 0x8001, /* For Fortran 90 */ - TAG_function_template = 0x8002, /* for C++ */ - TAG_class_template = 0x8003 /* for C++ */ + TAG_overlay = 0x4080, + TAG_format_label = 0x8000, + TAG_namelist = 0x8001, + TAG_function_template = 0x8002, + TAG_class_template = 0x8003 }; enum Form { @@ -106,8 +104,9 @@ enum Attribute { AT_stride_size = 0x02e, AT_upper_bound = 0x02f, AT_virtual = 0x030, - AT_lo_user = 0x200, - AT_hi_user = 0x3ff + AT_mangled_name = 0x200, + AT_overlay_id = 0x229, + AT_overlay_name = 0x22a }; // The value of an attribute. @@ -128,13 +127,13 @@ class Value { static Value from_constant_8(u64 constant); static Value from_block_2(std::span block); static Value from_block_4(std::span block); - static Value from_string(const char* string); + static Value from_string(std::string_view string); // Must be null terminated. u32 address() const; u32 reference() const; u64 constant() const; std::span block() const; - const char* string() const; + std::string_view string() const; protected: u8 m_form = 0; @@ -146,7 +145,10 @@ class Value { const u8* begin; const u8* end; } block; - const char* string; + struct { + const char* begin; + const char* end; + } string; } m_value; }; @@ -156,13 +158,14 @@ struct AttributeTuple { Value value; }; -struct RequiredAttribute { +struct AttributeSpec { Attribute attribute; - u32 valid_forms; u32 index; + bool required; + u32 valid_forms; }; -using RequiredAttributes = std::map; +using AttributesSpec = std::map; // Represents a Debugging Information Entry. Intended to be used to // incrementally parse a .debug section. @@ -173,7 +176,43 @@ class DIE { static Result> parse(std::span debug, u32 offset, u32 importer_flags); // Generate a map of attributes to read, to be used for parsing attributes. - static RequiredAttributes require_attributes(std::span input); + static inline AttributesSpec specify_attributes(std::vector input) + { + AttributesSpec output; + + for (u32 i = 0; i < static_cast(input.size()); i++) { + AttributeSpec& attribute = output.emplace(input[i].attribute, input[i]).first->second; + attribute.index = i; + } + + return output; + } + + // Generate a specification for a required attribute. + static inline AttributeSpec required_attribute(Attribute attribute, std::vector valid_forms) + { + AttributeSpec result; + result.attribute = attribute; + result.required = true; + result.valid_forms = 0; + for (u32 form : valid_forms) { + result.valid_forms |= 1 << form; + } + return result; + } + + // Generate a specification for an optional attribute. + static inline AttributeSpec optional_attribute(Attribute attribute, std::vector valid_forms) + { + AttributeSpec result; + result.attribute = attribute; + result.required = false; + result.valid_forms = 0; + for (u32 form : valid_forms) { + result.valid_forms |= 1 << form; + } + return result; + } Result> first_child() const; Result> sibling() const; @@ -182,7 +221,7 @@ class DIE { Tag tag() const; // Parse the attributes, and output the ones specified by the required parameter. - Result attributes(std::span output, const RequiredAttributes& required) const; + Result attributes(const AttributesSpec& spec, std::vector output) const; // Parse the attributes, and output them all in order. Result> all_attributes() const; diff --git a/src/ccc/elf.cpp b/src/ccc/elf.cpp index fe7c4d5..d7e4fd3 100644 --- a/src/ccc/elf.cpp +++ b/src/ccc/elf.cpp @@ -43,11 +43,11 @@ Result ElfFile::parse(std::vector image) const ElfSectionHeader* section_header = get_unaligned(elf.image, header_offset); CCC_CHECK(section_header, "ELF section header out of range."); - const char* name = get_string(elf.image, shstr_section_header->offset + section_header->name); + std::optional name = get_string(elf.image, shstr_section_header->offset + section_header->name); CCC_CHECK(name, "ELF section name out of range."); ElfSection& section = elf.sections.emplace_back(); - section.name = name; + section.name = *name; section.header = *section_header; } diff --git a/src/ccc/elf_symtab.cpp b/src/ccc/elf_symtab.cpp index 674f228..173587a 100644 --- a/src/ccc/elf_symtab.cpp +++ b/src/ccc/elf_symtab.cpp @@ -86,13 +86,14 @@ Result import_symbols( } } - const char* string = get_string(strtab, symbol->name); - CCC_CHECK(string, "Symbol string out of range."); + std::optional string_view = get_string(strtab, symbol->name); + CCC_CHECK(string_view.has_value(), "Symbol string out of range."); + std::string string(*string_view); switch (symbol->type()) { case SymbolType::NOTYPE: { Result label = database.labels.create_symbol( - string, group.source, group.module_symbol, address, importer_flags, demangler); + std::move(string), group.source, group.module_symbol, address, importer_flags, demangler); CCC_RETURN_IF_ERROR(label); // These symbols get emitted at the same addresses as functions @@ -108,7 +109,7 @@ Result import_symbols( case SymbolType::OBJECT: { if (symbol->size != 0) { Result global_variable = database.global_variables.create_symbol( - string, group.source, group.module_symbol, address, importer_flags, demangler); + std::move(string), group.source, group.module_symbol, address, importer_flags, demangler); CCC_RETURN_IF_ERROR(global_variable); if (*global_variable) { @@ -116,7 +117,7 @@ Result import_symbols( } } else { Result label = database.labels.create_symbol( - string, group.source, group.module_symbol, address, importer_flags, demangler); + std::move(string), group.source, group.module_symbol, address, importer_flags, demangler); CCC_RETURN_IF_ERROR(label); } @@ -124,7 +125,7 @@ Result import_symbols( } case SymbolType::FUNC: { Result function = database.functions.create_symbol( - string, group.source, group.module_symbol, address, importer_flags, demangler); + std::move(string), group.source, group.module_symbol, address, importer_flags, demangler); CCC_RETURN_IF_ERROR(function); if (*function) { @@ -135,7 +136,7 @@ Result import_symbols( } case SymbolType::FILE: { Result source_file = database.source_files.create_symbol( - string, group.source, group.module_symbol); + std::move(string), group.source, group.module_symbol); CCC_RETURN_IF_ERROR(source_file); break; @@ -160,11 +161,11 @@ Result print_symbol_table(FILE* out, std::span symtab, std::span const char* bind = symbol_bind_to_string(symbol->bind()); const char* visibility = symbol_visibility_to_string(symbol->visibility()); - const char* string = get_string(strtab, symbol->name); - CCC_CHECK(string, "Symbol string out of range."); + std::optional string = get_string(strtab, symbol->name); + CCC_CHECK(string.has_value(), "Symbol string out of range."); fprintf(out, "%6u: %08x %5u %-7s %-7s %-7s %3u %s\n", - i, symbol->value, symbol->size, type, bind, visibility, symbol->shndx, string); + i, symbol->value, symbol->size, type, bind, visibility, symbol->shndx, string->data()); } diff --git a/src/ccc/mdebug_importer.cpp b/src/ccc/mdebug_importer.cpp index 2007123..e326356 100644 --- a/src/ccc/mdebug_importer.cpp +++ b/src/ccc/mdebug_importer.cpp @@ -73,9 +73,7 @@ Result import_files(SymbolDatabase& database, const AnalysisContext& conte CCC_RETURN_IF_ERROR(file_count); for (s32 i = 0; i < *file_count; i++) { - if (interrupt && *interrupt) { - return CCC_FAILURE("Operation interrupted by user."); - } + CCC_CHECK(!interrupt || !*interrupt, "Operation interrupted by user."); Result file = context.reader->parse_file(i); CCC_RETURN_IF_ERROR(file); diff --git a/src/ccc/mdebug_section.cpp b/src/ccc/mdebug_section.cpp index 23ad7ad..409093f 100644 --- a/src/ccc/mdebug_section.cpp +++ b/src/ccc/mdebug_section.cpp @@ -137,9 +137,9 @@ Result SymbolTableReader::parse_file(s32 index) const s32 rel_raw_path_offset = fd_header->strings_offset + fd_header->file_path_string_offset; s32 raw_path_offset = m_hdrr->local_strings_offset + rel_raw_path_offset + m_fudge_offset; - const char* command_line_path = get_string(m_elf, raw_path_offset); - if (command_line_path) { - file.command_line_path = command_line_path; + std::optional command_line_path = get_string(m_elf, raw_path_offset); + if (command_line_path.has_value()) { + file.command_line_path = *command_line_path; } // Parse local symbols. @@ -364,9 +364,9 @@ static Result get_symbol(const SymbolHeader& header, std::span { Symbol symbol; - const char* string = get_string(elf, strings_offset + header.iss); - CCC_CHECK(string, "Symbol has invalid string."); - symbol.string = string; + std::optional string = get_string(elf, strings_offset + header.iss); + CCC_CHECK(string.has_value(), "Symbol has invalid string."); + symbol.string = string->data(); symbol.value = header.value; symbol.symbol_type = static_cast(header.symbol_type()); diff --git a/src/ccc/sndll.cpp b/src/ccc/sndll.cpp index 789fa0b..98d5847 100644 --- a/src/ccc/sndll.cpp +++ b/src/ccc/sndll.cpp @@ -85,10 +85,9 @@ static Result parse_sndll_common( sndll.version = version; if (common.elf_path) { - const char* elf_path = get_string(image, common.elf_path); - if (elf_path) { - sndll.elf_path = elf_path; - } + std::optional elf_path = get_string(image, common.elf_path); + CCC_CHECK(elf_path.has_value(), "SNDLL header has invalid ELF path field."); + sndll.elf_path = *elf_path; } CCC_CHECK(common.symbol_count < (32 * 1024 * 1024) / sizeof(SNDLLSymbol), "SNDLL symbol count is too high."); @@ -99,7 +98,7 @@ static Result parse_sndll_common( const SNDLLSymbolHeader* symbol_header = get_unaligned(image, symbol_offset); CCC_CHECK(symbol_header, "SNDLL symbol out of range."); - const char* string = nullptr; + std::optional string; if (symbol_header->string) { string = get_string(image, symbol_header->string - address.get_or_zero()); } @@ -107,8 +106,8 @@ static Result parse_sndll_common( SNDLLSymbol& symbol = sndll.symbols.emplace_back(); symbol.type = symbol_header->type; symbol.value = symbol_header->value; - if (string) { - symbol.string = string; + if (string.has_value()) { + symbol.string = *string; } } diff --git a/src/ccc/symbol_table.cpp b/src/ccc/symbol_table.cpp index fcfd3d8..300ff69 100644 --- a/src/ccc/symbol_table.cpp +++ b/src/ccc/symbol_table.cpp @@ -5,6 +5,7 @@ #include "elf.h" #include "elf_symtab.h" +#include "dwarf_importer.h" #include "dwarf_section.h" #include "mdebug_importer.h" #include "mdebug_section.h" @@ -220,7 +221,9 @@ Result DwarfSymbolTable::import( const DemanglerFunctions& demangler, const std::atomic_bool* interrupt) const { - return Result(); + dwarf::SectionReader reader(m_debug, m_line); + dwarf::SymbolTableImporter importer(database, reader, importer_flags, demangler, interrupt); + return importer.import_symbol_table(group); } Result DwarfSymbolTable::print_headers(FILE* out) const diff --git a/src/ccc/util.cpp b/src/ccc/util.cpp index a0abd61..d2f4f30 100644 --- a/src/ccc/util.cpp +++ b/src/ccc/util.cpp @@ -51,14 +51,17 @@ void set_custom_error_callback(CustomErrorCallback callback) custom_error_callback = callback; } -const char* get_string(std::span bytes, u64 offset) +std::optional get_string(std::span bytes, u64 offset) { for (u64 i = offset; i < bytes.size(); i++) { if (bytes[i] == '\0') { - return (const char*) &bytes[offset]; + return std::string_view( + reinterpret_cast(&bytes[offset]), + reinterpret_cast(&bytes[i])); } } - return nullptr; + + return std::nullopt; } std::string merge_paths(const std::string& base, const std::string& path) diff --git a/src/ccc/util.h b/src/ccc/util.h index a0cdc60..d9d0505 100644 --- a/src/ccc/util.h +++ b/src/ccc/util.h @@ -239,7 +239,7 @@ const std::optional copy_unaligned(std::span bytes, u64 offset) return value; } -const char* get_string(std::span bytes, u64 offset); +std::optional get_string(std::span bytes, u64 offset); #define CCC_BEGIN_END(x) (x).begin(), (x).end() #define CCC_ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) diff --git a/test/ccc/util_tests.cpp b/test/ccc/util_tests.cpp index cbce05b..9ea717e 100644 --- a/test/ccc/util_tests.cpp +++ b/test/ccc/util_tests.cpp @@ -10,7 +10,7 @@ using namespace ccc; TEST(CCCUtil, GetAligned) { - u8 data[7] = {1, 0, 0, 1, 0, 0, 1}; + alignas(8) u8 data[7] = {1, 0, 0, 1, 0, 0, 1}; EXPECT_EQ(DEREF_OR_ZERO(get_aligned(data, 0)), 0x01000001); EXPECT_EQ(get_aligned(data, 1), nullptr); @@ -22,7 +22,7 @@ TEST(CCCUtil, GetAligned) TEST(CCCUtil, GetUnaligned) { - u8 data[7] = {1, 2, 3, 4, 5, 6, 7}; + alignas(8) u8 data[7] = {1, 2, 3, 4, 5, 6, 7}; EXPECT_EQ(DEREF_OR_ZERO(get_unaligned(data, 0)), 1); EXPECT_EQ(DEREF_OR_ZERO(get_unaligned(data, 1)), 2); @@ -32,7 +32,7 @@ TEST(CCCUtil, GetUnaligned) TEST(CCCUtil, CopyUnaligned) { - u8 data[7] = {1, 0, 0, 1, 0, 0, 1}; + alignas(8) u8 data[7] = {1, 0, 0, 1, 0, 0, 1}; EXPECT_EQ(DEREF_OR_ZERO(copy_unaligned(data, 0)), 0x01000001); EXPECT_EQ(DEREF_OR_ZERO(copy_unaligned(data, 3)), 0x01000001); @@ -43,11 +43,11 @@ TEST(CCCUtil, CopyUnaligned) TEST(CCCUtil, GetString) { - u8 data[7] = {'h', 'e', 'l', 'l', 'o', '\0', '!'}; + alignas(8) u8 data[7] = {'h', 'e', 'l', 'l', 'o', '\0', '!'}; EXPECT_EQ(get_string(data, 0), std::string("hello")); EXPECT_EQ(get_string(data, 5), std::string("")); - EXPECT_EQ(get_string(data, 6), nullptr); - EXPECT_EQ(get_string(data, 7), nullptr); - EXPECT_EQ(get_string(data, 0xffffffffffffffff), nullptr); + EXPECT_EQ(get_string(data, 6), std::nullopt); + EXPECT_EQ(get_string(data, 7), std::nullopt); + EXPECT_EQ(get_string(data, 0xffffffffffffffff), std::nullopt); }