diff --git a/CMakeLists.txt b/CMakeLists.txt index 649e6dbb..257aec03 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -31,37 +31,66 @@ endif() include(cmake/version_finder.cmake) -add_executable(demangle demangle.cpp) -target_link_libraries(demangle ccc demanglegnu versioninfo) - -add_executable(objdump objdump.cpp) -target_link_libraries(objdump ccc versioninfo) - -add_executable(stdump stdump.cpp) -target_link_libraries(stdump ccc versioninfo) -add_test(NAME stdump_test COMMAND stdump test ${CMAKE_SOURCE_DIR}/test) - -add_executable(uncc uncc.cpp) -target_link_libraries(uncc ccc demanglegnu versioninfo) - add_library(ccc STATIC ccc/analysis.cpp + ccc/analysis.h ccc/ast.cpp + ccc/ast.h ccc/data_refinement.cpp + ccc/data_refinement.h ccc/dependency.cpp + ccc/dependency.h ccc/elf.cpp + ccc/elf.h ccc/insn.cpp + ccc/insn.h ccc/mdebug.cpp + ccc/mdebug.h ccc/module.cpp + ccc/module.h ccc/print_cpp.cpp + ccc/print_cpp.h ccc/print_json.cpp + ccc/print_json.h ccc/registers.cpp + ccc/registers.h ccc/stabs.cpp + ccc/stabs.h + ccc/stabs_to_ast.cpp + ccc/stabs_to_ast.h ccc/symbols.cpp + ccc/symbols.h ccc/tables.cpp + ccc/tables.h ccc/util.cpp + ccc/util.h +) + +add_library(ccc_platform STATIC + platform/file.cpp + platform/file.h ) +add_executable(demangle demangle.cpp) +target_link_libraries(demangle ccc demanglegnu versioninfo) + +add_executable(objdump objdump.cpp) +target_link_libraries(objdump ccc ccc_platform versioninfo) + +add_executable(stdump stdump.cpp) +target_link_libraries(stdump ccc ccc_platform versioninfo) +add_test(NAME stdump_test COMMAND stdump test ${CMAKE_SOURCE_DIR}/test) + +add_executable(uncc uncc.cpp) +target_link_libraries(uncc ccc ccc_platform demanglegnu versioninfo) + +if(WIN32) + target_sources(demangle PUBLIC ccc.manifest) + target_sources(objdump PUBLIC ccc.manifest) + target_sources(stdump PUBLIC ccc.manifest) + target_sources(uncc PUBLIC ccc.manifest) +endif() + add_subdirectory(demanglegnu) # All the files to be included in a release. diff --git a/README.md b/README.md index d68cb596..8db58813 100644 --- a/README.md +++ b/README.md @@ -51,7 +51,7 @@ Use of a code formatter such as `clang-format` or `astyle` on the output is reco stdump.cpp: See above. uncc.cpp: See above. ccc/analysis.cpp: Runs all the different analysis passes. - ccc/ast.cpp: Converts parsed STABS types to a C++ AST structure. + ccc/ast.cpp: Defines a C++ AST structure. ccc/data_refinement.cpp: Convert global variable data into a structured AST. ccc/dependency.cpp: Try to infer information about which types belong to which files. ccc/elf.cpp: Parses ELF files. @@ -63,10 +63,12 @@ Use of a code formatter such as `clang-format` or `astyle` on the output is reco ccc/print_json.cpp: Prints out AST nodes as JSON. ccc/registers.cpp: Enums for EE core MIPS registers. ccc/stabs.cpp: Parses STABS types. + ccc/stabs_to_ast.cpp: Converts parsed STABS types into an AST. ccc/symbols.cpp: Parses the STABS and non-STABS symbols. ccc/tables.cpp: Table of EE core MIPS instructions. ccc/util.cpp: Miscellaneous utilities. - + platform/file.cpp: Utility functions for reading files. + ## Resources - [Third Eye Software and the MIPS symbol table (Peter Rowell)](http://datahedron.com/mips.html) / [in-repo mirror](docs/ThirdEyeSoftwareAndTheMIPSSymbolTable.html) / [archive.org mirror](https://web.archive.org/web/20230605005654/http://datahedron.com/mips.html) diff --git a/ccc.manifest b/ccc.manifest new file mode 100644 index 00000000..31078a67 --- /dev/null +++ b/ccc.manifest @@ -0,0 +1,9 @@ + + + + + + UTF-8 + + + diff --git a/ccc/analysis.cpp b/ccc/analysis.cpp index 484a5b4a..c5f1878e 100644 --- a/ccc/analysis.cpp +++ b/ccc/analysis.cpp @@ -1,20 +1,73 @@ #include "analysis.h" #include "elf.h" +#include "stabs_to_ast.h" namespace ccc { -static void create_function(LocalSymbolTableAnalyser& analyser, const char* name); -static void filter_ast_by_flags(ast::Node& ast_node, u32 flags); +class LocalSymbolTableAnalyser { +public: + LocalSymbolTableAnalyser(ast::SourceFile& output, StabsToAstState& stabs_to_ast_state) + : m_output(output), m_stabs_to_ast_state(stabs_to_ast_state) {} + + // Functions for processing individual symbols. + // + // In most cases these symbols will appear in the following order: + // proc + // ... line numbers ... + // end + // func + // ... parameters ... + // ... blocks ... + // + // For some compiler versions the symbols can appear in this order: + // func + // ... parameters ... + // $LM1 + // proc + // ... line numbers ... + // end + // ... blocks ... + Result stab_magic(const char* magic); + Result source_file(const char* path, s32 text_address); + Result data_type(const ParsedSymbol& symbol); + Result global_variable(const char* name, s32 address, const StabsType& type, bool is_static, ast::GlobalVariableLocation location); + Result sub_source_file(const char* name, s32 text_address); + Result procedure(const char* name, s32 address, bool is_static); + Result label(const char* label, s32 address, s32 line_number); + Result text_end(const char* name, s32 function_size); + Result function(const char* name, const StabsType& return_type, s32 function_address); + Result function_end(); + Result parameter(const char* name, const StabsType& type, bool is_stack_variable, s32 offset_or_register, bool is_by_reference); + Result local_variable(const char* name, const StabsType& type, ast::VariableStorageType storage_type, s32 value, ast::GlobalVariableLocation location, bool is_static); + Result lbrac(s32 number, s32 begin_offset); + Result rbrac(s32 number, s32 end_offset); + + Result finish(); + + void create_function(const char* name); + +protected: + enum AnalysisState { + NOT_IN_FUNCTION, + IN_FUNCTION_BEGINNING, + IN_FUNCTION_END + }; + + ast::SourceFile& m_output; + StabsToAstState& m_stabs_to_ast_state; + + AnalysisState m_state = NOT_IN_FUNCTION; + ast::FunctionDefinition* m_current_function = nullptr; + ast::FunctionType* m_current_function_type = nullptr; + std::vector m_pending_variables_begin; + std::map> m_pending_variables_end; + std::string m_next_relative_path; +}; -mdebug::SymbolTable read_symbol_table(Module& mod, const fs::path& input_file) { - mod = loaders::read_elf_file(input_file); - ModuleSection* mdebug_section = mod.lookup_section(".mdebug"); - verify(mdebug_section, "No .mdebug section."); - return mdebug::parse_symbol_table(mod, *mdebug_section); -} +static void filter_ast_by_flags(ast::Node& ast_node, u32 flags); -HighSymbolTable analyse(const mdebug::SymbolTable& symbol_table, u32 flags, s32 file_descriptor_index) { +Result analyse(const mdebug::SymbolTable& symbol_table, u32 flags, s32 file_descriptor_index) { HighSymbolTable high; // The addresses of the global variables aren't present in the local symbol @@ -31,12 +84,14 @@ HighSymbolTable analyse(const mdebug::SymbolTable& symbol_table, u32 flags, s32 // Either analyse a specific file descriptor, or all of them. if(file_descriptor_index > -1) { - assert(file_descriptor_index < symbol_table.files.size()); - analyse_file(high, deduplicator, symbol_table, symbol_table.files[file_descriptor_index], globals, file_descriptor_index, flags); + CCC_CHECK_FATAL(file_descriptor_index < symbol_table.files.size(), "file_descriptor_index out of range."); + Result result = analyse_file(high, deduplicator, symbol_table, symbol_table.files[file_descriptor_index], globals, file_descriptor_index, flags); + CCC_RETURN_IF_ERROR(result); } else { for(s32 i = 0; i < (s32) symbol_table.files.size(); i++) { const mdebug::SymFileDescriptor& fd = symbol_table.files[i]; - analyse_file(high, deduplicator, symbol_table, fd, globals, i, flags); + Result result = analyse_file(high, deduplicator, symbol_table, fd, globals, i, flags); + CCC_RETURN_IF_ERROR(result); } } @@ -65,25 +120,27 @@ HighSymbolTable analyse(const mdebug::SymbolTable& symbol_table, u32 flags, s32 return high; } -void analyse_file(HighSymbolTable& high, ast::TypeDeduplicatorOMatic& deduplicator, const mdebug::SymbolTable& symbol_table, const mdebug::SymFileDescriptor& fd, const std::map& globals, s32 file_index, u32 flags) { +Result analyse_file(HighSymbolTable& high, ast::TypeDeduplicatorOMatic& deduplicator, const mdebug::SymbolTable& symbol_table, const mdebug::SymFileDescriptor& fd, const std::map& globals, s32 file_index, u32 flags) { auto file = std::make_unique(); file->full_path = fd.full_path; file->is_windows_path = fd.is_windows_path; // Parse the stab strings into a data structure that's vaguely // one-to-one with the text-based representation. - file->symbols = parse_symbols(fd.symbols, fd.detected_language); + Result> symbols = parse_symbols(fd.symbols, fd.detected_language); + CCC_RETURN_IF_ERROR(symbols); + file->symbols = std::move(*symbols); // In stabs, types can be referenced by their number from other stabs, // so here we build a map of type numbers to the parsed types. - std::map stabs_types; + std::map stabs_types; for(const ParsedSymbol& symbol : file->symbols) { if(symbol.type == ParsedSymbolType::NAME_COLON_TYPE) { symbol.name_colon_type.type->enumerate_numbered_types(stabs_types); } } - ast::StabsToAstState stabs_to_ast_state; + StabsToAstState stabs_to_ast_state; stabs_to_ast_state.file_index = file_index; stabs_to_ast_state.stabs_types = &stabs_types; @@ -97,7 +154,8 @@ void analyse_file(HighSymbolTable& high, ast::TypeDeduplicatorOMatic& deduplicat case StabsSymbolDescriptor::GLOBAL_FUNCTION: { const char* name = symbol.name_colon_type.name.c_str(); const StabsType& type = *symbol.name_colon_type.type.get(); - analyser.function(name, type, symbol.raw->value); + Result result = analyser.function(name, type, symbol.raw->value); + CCC_RETURN_IF_ERROR(result); break; } case StabsSymbolDescriptor::REFERENCE_PARAMETER_A: @@ -109,7 +167,8 @@ void analyse_file(HighSymbolTable& high, ast::TypeDeduplicatorOMatic& deduplicat bool is_stack_variable = symbol.name_colon_type.descriptor == StabsSymbolDescriptor::VALUE_PARAMETER; bool is_by_reference = symbol.name_colon_type.descriptor == StabsSymbolDescriptor::REFERENCE_PARAMETER_A || symbol.name_colon_type.descriptor == StabsSymbolDescriptor::REFERENCE_PARAMETER_V; - analyser.parameter(name, type, is_stack_variable, symbol.raw->value, is_by_reference); + Result result = analyser.parameter(name, type, is_stack_variable, symbol.raw->value, is_by_reference); + CCC_RETURN_IF_ERROR(result); break; } case StabsSymbolDescriptor::REGISTER_VARIABLE: @@ -123,21 +182,24 @@ void analyse_file(HighSymbolTable& high, ast::TypeDeduplicatorOMatic& deduplicat bool is_static = false; if(symbol.name_colon_type.descriptor == StabsSymbolDescriptor::STATIC_LOCAL_VARIABLE) { storage_type = ast::VariableStorageType::GLOBAL; - location = symbol_class_to_global_variable_location(symbol.raw->storage_class); + std::optional location_opt = symbol_class_to_global_variable_location(symbol.raw->storage_class); + CCC_CHECK(location_opt.has_value(), "Invalid static local variable location."); + location = *location_opt; is_static = true; } else if(symbol.name_colon_type.descriptor == StabsSymbolDescriptor::REGISTER_VARIABLE) { storage_type = ast::VariableStorageType::REGISTER; } else { storage_type = ast::VariableStorageType::STACK; } - analyser.local_variable(name, type, storage_type, symbol.raw->value, location, is_static); + Result result = analyser.local_variable(name, type, storage_type, symbol.raw->value, location, is_static); + CCC_RETURN_IF_ERROR(result); break; } case StabsSymbolDescriptor::GLOBAL_VARIABLE: case StabsSymbolDescriptor::STATIC_GLOBAL_VARIABLE: { const char* name = symbol.name_colon_type.name.c_str(); s32 address = -1; - ast::GlobalVariableLocation location = symbol_class_to_global_variable_location(symbol.raw->storage_class); + std::optional location = symbol_class_to_global_variable_location(symbol.raw->storage_class); if(symbol.name_colon_type.descriptor == StabsSymbolDescriptor::GLOBAL_VARIABLE) { // The address for non-static global variables is // only stored in the external symbol table (and @@ -153,49 +215,61 @@ void analyse_file(HighSymbolTable& high, ast::TypeDeduplicatorOMatic& deduplicat // in the local symbol table. address = symbol.raw->value; } + CCC_CHECK(location.has_value(), "Invalid global variable location.") const StabsType& type = *symbol.name_colon_type.type.get(); bool is_static = symbol.name_colon_type.descriptor == StabsSymbolDescriptor::STATIC_GLOBAL_VARIABLE; - analyser.global_variable(name, address, type, is_static, location); + Result result = analyser.global_variable(name, address, type, is_static, *location); + CCC_RETURN_IF_ERROR(result); break; } case StabsSymbolDescriptor::TYPE_NAME: case StabsSymbolDescriptor::ENUM_STRUCT_OR_TYPE_TAG: { - analyser.data_type(symbol); + Result result = analyser.data_type(symbol); + CCC_RETURN_IF_ERROR(result); break; } } break; } case ParsedSymbolType::SOURCE_FILE: { - analyser.source_file(symbol.raw->string, symbol.raw->value); + Result result = analyser.source_file(symbol.raw->string, symbol.raw->value); + CCC_RETURN_IF_ERROR(result); break; } case ParsedSymbolType::SUB_SOURCE_FILE: { - analyser.sub_source_file(symbol.raw->string, symbol.raw->value); + Result result = analyser.sub_source_file(symbol.raw->string, symbol.raw->value); + CCC_RETURN_IF_ERROR(result); break; } case ParsedSymbolType::LBRAC: { - analyser.lbrac(symbol.lrbrac.number, symbol.raw->value); + Result result = analyser.lbrac(symbol.lrbrac.number, symbol.raw->value); + CCC_RETURN_IF_ERROR(result); break; } case ParsedSymbolType::RBRAC: { - analyser.rbrac(symbol.lrbrac.number, symbol.raw->value); + Result result = analyser.rbrac(symbol.lrbrac.number, symbol.raw->value); + CCC_RETURN_IF_ERROR(result); break; } case ParsedSymbolType::FUNCTION_END: { - analyser.function_end(); + Result result = analyser.function_end(); + CCC_RETURN_IF_ERROR(result); break; } case ParsedSymbolType::NON_STABS: { if(symbol.raw->storage_class == mdebug::SymbolClass::TEXT) { if(symbol.raw->storage_type == mdebug::SymbolType::PROC) { - analyser.procedure(symbol.raw->string, symbol.raw->value, false); + Result result = analyser.procedure(symbol.raw->string, symbol.raw->value, false); + CCC_RETURN_IF_ERROR(result); } else if(symbol.raw->storage_type == mdebug::SymbolType::STATICPROC) { - analyser.procedure(symbol.raw->string, symbol.raw->value, true); + Result result = analyser.procedure(symbol.raw->string, symbol.raw->value, true); + CCC_RETURN_IF_ERROR(result); } else if(symbol.raw->storage_type == mdebug::SymbolType::LABEL) { - analyser.label(symbol.raw->string, symbol.raw->value, symbol.raw->index); + Result result = analyser.label(symbol.raw->string, symbol.raw->value, symbol.raw->index); + CCC_RETURN_IF_ERROR(result); } else if(symbol.raw->storage_type == mdebug::SymbolType::END) { - analyser.text_end(symbol.raw->string, symbol.raw->value); + Result result = analyser.text_end(symbol.raw->string, symbol.raw->value); + CCC_RETURN_IF_ERROR(result); } } break; @@ -203,7 +277,8 @@ void analyse_file(HighSymbolTable& high, ast::TypeDeduplicatorOMatic& deduplicat } } - analyser.finish(); + Result result = analyser.finish(); + CCC_RETURN_IF_ERROR(result); // The STABS types are no longer needed, so delete them now. for(ParsedSymbol& symbol : file->symbols) { @@ -229,43 +304,51 @@ void analyse_file(HighSymbolTable& high, ast::TypeDeduplicatorOMatic& deduplicat if(flags & DEDUPLICATE_TYPES) { deduplicator.process_file(*high.source_files.back().get(), file_index, high.source_files); } + + return Result(); } -ast::GlobalVariableLocation symbol_class_to_global_variable_location(mdebug::SymbolClass symbol_class) { +std::optional symbol_class_to_global_variable_location(mdebug::SymbolClass symbol_class) { + std::optional location; switch(symbol_class) { - case mdebug::SymbolClass::NIL: return ast::GlobalVariableLocation::NIL; - case mdebug::SymbolClass::DATA: return ast::GlobalVariableLocation::DATA; - case mdebug::SymbolClass::BSS: return ast::GlobalVariableLocation::BSS; - case mdebug::SymbolClass::ABS: return ast::GlobalVariableLocation::ABS; - case mdebug::SymbolClass::SDATA: return ast::GlobalVariableLocation::SDATA; - case mdebug::SymbolClass::SBSS: return ast::GlobalVariableLocation::SBSS; - case mdebug::SymbolClass::RDATA: return ast::GlobalVariableLocation::RDATA; - case mdebug::SymbolClass::COMMON: return ast::GlobalVariableLocation::COMMON; - case mdebug::SymbolClass::SCOMMON: return ast::GlobalVariableLocation::SCOMMON; + case mdebug::SymbolClass::NIL: location = ast::GlobalVariableLocation::NIL; break; + case mdebug::SymbolClass::DATA: location = ast::GlobalVariableLocation::DATA; break; + case mdebug::SymbolClass::BSS: location = ast::GlobalVariableLocation::BSS; break; + case mdebug::SymbolClass::ABS: location = ast::GlobalVariableLocation::ABS; break; + case mdebug::SymbolClass::SDATA: location = ast::GlobalVariableLocation::SDATA; break; + case mdebug::SymbolClass::SBSS: location = ast::GlobalVariableLocation::SBSS; break; + case mdebug::SymbolClass::RDATA: location = ast::GlobalVariableLocation::RDATA; break; + case mdebug::SymbolClass::COMMON: location = ast::GlobalVariableLocation::COMMON; break; + case mdebug::SymbolClass::SCOMMON: location = ast::GlobalVariableLocation::SCOMMON; break; default: {} } - verify_not_reached("Bad variable storage location '%s'.", mdebug::symbol_class(symbol_class)); + return location; } -void LocalSymbolTableAnalyser::stab_magic(const char* magic) { - +Result LocalSymbolTableAnalyser::stab_magic(const char* magic) { + return Result(); } -void LocalSymbolTableAnalyser::source_file(const char* path, s32 text_address) { - output.relative_path = path; - output.text_address = text_address; - if(next_relative_path.empty()) { - next_relative_path = output.relative_path; +Result LocalSymbolTableAnalyser::source_file(const char* path, s32 text_address) { + m_output.relative_path = path; + m_output.text_address = text_address; + if(m_next_relative_path.empty()) { + m_next_relative_path = m_output.relative_path; } + + return Result(); } -void LocalSymbolTableAnalyser::data_type(const ParsedSymbol& symbol) { - std::unique_ptr node = ast::stabs_symbol_to_ast(symbol, stabs_to_ast_state); - node->stabs_type_number = symbol.name_colon_type.type->type_number; - output.data_types.emplace_back(std::move(node)); +Result LocalSymbolTableAnalyser::data_type(const ParsedSymbol& symbol) { + Result> node = stabs_symbol_to_ast(symbol, m_stabs_to_ast_state); + CCC_RETURN_IF_ERROR(node); + (*node)->stabs_type_number = symbol.name_colon_type.type->type_number; + m_output.data_types.emplace_back(std::move(*node)); + + return Result(); } -void LocalSymbolTableAnalyser::global_variable(const char* name, s32 address, const StabsType& type, bool is_static, ast::GlobalVariableLocation location) { +Result LocalSymbolTableAnalyser::global_variable(const char* name, s32 address, const StabsType& type, bool is_static, ast::GlobalVariableLocation location) { std::unique_ptr global = std::make_unique(); global->name = name; if(is_static) { @@ -275,68 +358,82 @@ void LocalSymbolTableAnalyser::global_variable(const char* name, s32 address, co global->storage.type = ast::VariableStorageType::GLOBAL; global->storage.global_location = location; global->storage.global_address = address; - global->type = ast::stabs_type_to_ast_no_throw(type, stabs_to_ast_state, 0, 0, true, false); - output.globals.emplace_back(std::move(global)); + global->type = stabs_type_to_ast_and_handle_errors(type, m_stabs_to_ast_state, 0, 0, true, false); + m_output.globals.emplace_back(std::move(global)); + + return Result(); } -void LocalSymbolTableAnalyser::sub_source_file(const char* path, s32 text_address) { - if(current_function && state == IN_FUNCTION_BEGINNING) { - ast::SubSourceFile& sub = current_function->sub_source_files.emplace_back(); +Result LocalSymbolTableAnalyser::sub_source_file(const char* path, s32 text_address) { + if(m_current_function && m_state == IN_FUNCTION_BEGINNING) { + ast::SubSourceFile& sub = m_current_function->sub_source_files.emplace_back(); sub.address = text_address; sub.relative_path = path; } else { - next_relative_path = path; + m_next_relative_path = path; } + + return Result(); } -void LocalSymbolTableAnalyser::procedure(const char* name, s32 address, bool is_static) { - if(!current_function || strcmp(name, current_function->name.c_str())) { - create_function(*this, name); +Result LocalSymbolTableAnalyser::procedure(const char* name, s32 address, bool is_static) { + if(!m_current_function || strcmp(name, m_current_function->name.c_str())) { + create_function(name); } - current_function->address_range.low = address; + m_current_function->address_range.low = address; if(is_static) { - current_function->storage_class = ast::SC_STATIC; + m_current_function->storage_class = ast::SC_STATIC; } - pending_variables_begin.clear(); - pending_variables_end.clear(); + m_pending_variables_begin.clear(); + m_pending_variables_end.clear(); + + return Result(); } -void LocalSymbolTableAnalyser::label(const char* label, s32 address, s32 line_number) { - if(address > -1 && current_function && label[0] == '$') { - assert(address < 256 * 1024 * 1024); - ast::LineNumberPair& pair = current_function->line_numbers.emplace_back(); +Result LocalSymbolTableAnalyser::label(const char* label, s32 address, s32 line_number) { + if(address > -1 && m_current_function && label[0] == '$') { + CCC_ASSERT(address < 256 * 1024 * 1024); + ast::LineNumberPair& pair = m_current_function->line_numbers.emplace_back(); pair.address = address; pair.line_number = line_number; } + + return Result(); } -void LocalSymbolTableAnalyser::text_end(const char* name, s32 function_size) { - if(state == IN_FUNCTION_BEGINNING) { - if(current_function->address_range.low >= 0) { - assert(current_function); - current_function->address_range.high = current_function->address_range.low + function_size; +Result LocalSymbolTableAnalyser::text_end(const char* name, s32 function_size) { + if(m_state == IN_FUNCTION_BEGINNING) { + if(m_current_function->address_range.low >= 0) { + CCC_ASSERT(m_current_function); + m_current_function->address_range.high = m_current_function->address_range.low + function_size; } - state = IN_FUNCTION_END; + m_state = IN_FUNCTION_END; } + + return Result(); } -void LocalSymbolTableAnalyser::function(const char* name, const StabsType& return_type, s32 function_address) { - if(!current_function || strcmp(name, current_function->name.c_str())) { - create_function(*this, name); +Result LocalSymbolTableAnalyser::function(const char* name, const StabsType& return_type, s32 function_address) { + if(!m_current_function || strcmp(name, m_current_function->name.c_str())) { + create_function(name); } - current_function_type->return_type = ast::stabs_type_to_ast_no_throw(return_type, stabs_to_ast_state, 0, 0, true, true); + m_current_function_type->return_type = stabs_type_to_ast_and_handle_errors(return_type, m_stabs_to_ast_state, 0, 0, true, true); + + return Result(); } -void LocalSymbolTableAnalyser::function_end() { - current_function = nullptr; - current_function_type = nullptr; +Result LocalSymbolTableAnalyser::function_end() { + m_current_function = nullptr; + m_current_function_type = nullptr; + + return Result(); } -void LocalSymbolTableAnalyser::parameter(const char* name, const StabsType& type, bool is_stack_variable, s32 offset_or_register, bool is_by_reference) { - assert(current_function_type); +Result LocalSymbolTableAnalyser::parameter(const char* name, const StabsType& type, bool is_stack_variable, s32 offset_or_register, bool is_by_reference) { + CCC_ASSERT(m_current_function_type); std::unique_ptr parameter = std::make_unique(); parameter->name = name; parameter->variable_class = ast::VariableClass::PARAMETER; @@ -350,16 +447,18 @@ void LocalSymbolTableAnalyser::parameter(const char* name, const StabsType& type mips::map_dbx_register_index(parameter->storage.dbx_register_number); parameter->storage.is_by_reference = is_by_reference; } - parameter->type = ast::stabs_type_to_ast_no_throw(type, stabs_to_ast_state, 0, 0, true, true); - current_function_type->parameters->emplace_back(std::move(parameter)); + parameter->type = stabs_type_to_ast_and_handle_errors(type, m_stabs_to_ast_state, 0, 0, true, true); + m_current_function_type->parameters->emplace_back(std::move(parameter)); + + return Result(); } -void LocalSymbolTableAnalyser::local_variable(const char* name, const StabsType& type, ast::VariableStorageType storage_type, s32 value, ast::GlobalVariableLocation location, bool is_static) { - if(!current_function) { - return; +Result LocalSymbolTableAnalyser::local_variable(const char* name, const StabsType& type, ast::VariableStorageType storage_type, s32 value, ast::GlobalVariableLocation location, bool is_static) { + if(!m_current_function) { + return Result(); } std::unique_ptr local = std::make_unique(); - pending_variables_begin.emplace_back(local.get()); + m_pending_variables_begin.emplace_back(local.get()); local->name = name; if(is_static) { local->storage_class = ast::SC_STATIC; @@ -383,47 +482,56 @@ void LocalSymbolTableAnalyser::local_variable(const char* name, const StabsType& break; } } - local->type = ast::stabs_type_to_ast_no_throw(type, stabs_to_ast_state, 0, 0, true, false); - current_function->locals.emplace_back(std::move(local)); + local->type = stabs_type_to_ast_and_handle_errors(type, m_stabs_to_ast_state, 0, 0, true, false); + m_current_function->locals.emplace_back(std::move(local)); + + return Result(); } -void LocalSymbolTableAnalyser::lbrac(s32 number, s32 begin_offset) { - auto& pending_end = pending_variables_end[number]; - for(ast::Variable* variable : pending_variables_begin) { +Result LocalSymbolTableAnalyser::lbrac(s32 number, s32 begin_offset) { + auto& pending_end = m_pending_variables_end[number]; + for(ast::Variable* variable : m_pending_variables_begin) { pending_end.emplace_back(variable); - variable->block.low = output.text_address + begin_offset; + variable->block.low = m_output.text_address + begin_offset; } - pending_variables_begin.clear(); + m_pending_variables_begin.clear(); + + return Result(); } -void LocalSymbolTableAnalyser::rbrac(s32 number, s32 end_offset) { - auto variables = pending_variables_end.find(number); - verify(variables != pending_variables_end.end(), "N_RBRAC symbol without a matching N_LBRAC symbol."); +Result LocalSymbolTableAnalyser::rbrac(s32 number, s32 end_offset) { + auto variables = m_pending_variables_end.find(number); + CCC_CHECK(variables != m_pending_variables_end.end(), "N_RBRAC symbol without a matching N_LBRAC symbol."); + for(ast::Variable* variable : variables->second) { - variable->block.high = output.text_address + end_offset; + variable->block.high = m_output.text_address + end_offset; } + + return Result(); } -void LocalSymbolTableAnalyser::finish() { - verify(state != IN_FUNCTION_BEGINNING, - "Unexpected end of symbol table for '%s'.", output.full_path.c_str()); +Result LocalSymbolTableAnalyser::finish() { + CCC_CHECK(m_state != IN_FUNCTION_BEGINNING, + "Unexpected end of symbol table for '%s'.", m_output.full_path.c_str()); + + return Result(); } -static void create_function(LocalSymbolTableAnalyser& analyser, const char* name) { +void LocalSymbolTableAnalyser::create_function(const char* name) { std::unique_ptr ptr = std::make_unique(); - analyser.current_function = ptr.get(); - analyser.output.functions.emplace_back(std::move(ptr)); - analyser.current_function->name = name; - analyser.state = LocalSymbolTableAnalyser::IN_FUNCTION_BEGINNING; + m_current_function = ptr.get(); + m_output.functions.emplace_back(std::move(ptr)); + m_current_function->name = name; + m_state = LocalSymbolTableAnalyser::IN_FUNCTION_BEGINNING; - if(!analyser.next_relative_path.empty() && analyser.current_function->relative_path != analyser.output.relative_path) { - analyser.current_function->relative_path = analyser.next_relative_path; + if(!m_next_relative_path.empty() && m_current_function->relative_path != m_output.relative_path) { + m_current_function->relative_path = m_next_relative_path; } std::unique_ptr function_type = std::make_unique(); - analyser.current_function_type = function_type.get(); - analyser.current_function_type->parameters.emplace(); - analyser.current_function->type = std::move(function_type); + m_current_function_type = function_type.get(); + m_current_function_type->parameters.emplace(); + m_current_function->type = std::move(function_type); } static void filter_ast_by_flags(ast::Node& ast_node, u32 flags) { @@ -536,7 +644,7 @@ void compute_size_bytes_recursive(ast::Node& node, const HighSymbolTable& high) } case ast::TYPE_NAME: { ast::TypeName& type_name = node.as(); - if(type_name.referenced_file_index > -1 && type_name.referenced_stabs_type_number > -1) { + if(type_name.referenced_file_index > -1 && type_name.referenced_stabs_type_number.type > -1) { const ast::SourceFile& source_file = *high.source_files[type_name.referenced_file_index].get(); auto type_index = source_file.stabs_type_number_to_deduplicated_type_index.find(type_name.referenced_stabs_type_number); if(type_index != source_file.stabs_type_number_to_deduplicated_type_index.end()) { diff --git a/ccc/analysis.h b/ccc/analysis.h index e6ced185..b6a3db17 100644 --- a/ccc/analysis.h +++ b/ccc/analysis.h @@ -25,66 +25,9 @@ enum AnalysisFlags { STRIP_GENERATED_FUNCTIONS = (1 << 4) }; -mdebug::SymbolTable read_symbol_table(Module& mod, const fs::path& input_file); -HighSymbolTable analyse(const mdebug::SymbolTable& symbol_table, u32 flags, s32 file_descriptor_index = -1); -void analyse_file(HighSymbolTable& high, ast::TypeDeduplicatorOMatic& deduplicator, const mdebug::SymbolTable& symbol_table, const mdebug::SymFileDescriptor& fd, const std::map& globals, s32 file_index, u32 flags); -ast::GlobalVariableLocation symbol_class_to_global_variable_location(mdebug::SymbolClass symbol_class); - -struct LocalSymbolTableAnalyser { - ast::SourceFile& output; - ast::StabsToAstState& stabs_to_ast_state; - - LocalSymbolTableAnalyser(ast::SourceFile& o, ast::StabsToAstState& s) - : output(o), stabs_to_ast_state(s) {} - - enum AnalysisState { - NOT_IN_FUNCTION, - IN_FUNCTION_BEGINNING, - IN_FUNCTION_END - }; - - AnalysisState state = NOT_IN_FUNCTION; - ast::FunctionDefinition* current_function = nullptr; - ast::FunctionType* current_function_type = nullptr; - std::vector pending_variables_begin; - std::map> pending_variables_end; - std::string next_relative_path; - - // Functions for processing individual symbols. - // - // In most cases these symbols will appear in the following order: - // proc - // ... line numbers ... - // end - // func - // ... parameters ... - // ... blocks ... - // - // For some compiler versions the symbols can appear in this order: - // func - // ... parameters ... - // $LM1 - // proc - // ... line numbers ... - // end - // ... blocks ... - void stab_magic(const char* magic); - void source_file(const char* path, s32 text_address); - void data_type(const ParsedSymbol& symbol); - void global_variable(const char* name, s32 address, const StabsType& type, bool is_static, ast::GlobalVariableLocation location); - void sub_source_file(const char* name, s32 text_address); - void procedure(const char* name, s32 address, bool is_static); - void label(const char* label, s32 address, s32 line_number); - void text_end(const char* name, s32 function_size); - void function(const char* name, const StabsType& return_type, s32 function_address); - void function_end(); - void parameter(const char* name, const StabsType& type, bool is_stack_variable, s32 offset_or_register, bool is_by_reference); - void local_variable(const char* name, const StabsType& type, ast::VariableStorageType storage_type, s32 value, ast::GlobalVariableLocation location, bool is_static); - void lbrac(s32 number, s32 begin_offset); - void rbrac(s32 number, s32 end_offset); - - void finish(); -}; +Result analyse(const mdebug::SymbolTable& symbol_table, u32 flags, s32 file_descriptor_index = -1); +Result analyse_file(HighSymbolTable& high, ast::TypeDeduplicatorOMatic& deduplicator, const mdebug::SymbolTable& symbol_table, const mdebug::SymFileDescriptor& fd, const std::map& globals, s32 file_index, u32 flags); +std::optional symbol_class_to_global_variable_location(mdebug::SymbolClass symbol_class); void compute_size_bytes_recursive(ast::Node& node, const HighSymbolTable& high); void fill_in_pointers_to_member_function_definitions(HighSymbolTable& high); diff --git a/ccc/ast.cpp b/ccc/ast.cpp index 01548225..9a67ae84 100644 --- a/ccc/ast.cpp +++ b/ccc/ast.cpp @@ -2,388 +2,9 @@ namespace ccc::ast { -#define AST_DEBUG(...) //__VA_ARGS__ -#define AST_DEBUG_PRINTF(...) AST_DEBUG(printf(__VA_ARGS__);) - -static bool detect_bitfield(const StabsField& field, const StabsToAstState& state); static bool compare_nodes_and_merge(CompareResult& dest, const Node& node_lhs, const Node& node_rhs, const TypeLookupInfo& lookup); static void try_to_match_wobbly_typedefs(CompareResult& result, const Node& node_lhs, const Node& node_rhs, const TypeLookupInfo& lookup); -std::unique_ptr stabs_symbol_to_ast(const ParsedSymbol& symbol, const StabsToAstState& state) { - AST_DEBUG_PRINTF("ANALYSING %s\n", symbol.raw->string); - auto node = stabs_type_to_ast_no_throw(*symbol.name_colon_type.type.get(), state, 0, 0, false, false); - node->name = (symbol.name_colon_type.name == " ") ? "" : symbol.name_colon_type.name; - node->symbol = &symbol; - if(symbol.name_colon_type.descriptor == StabsSymbolDescriptor::TYPE_NAME) { - node->storage_class = SC_TYPEDEF; - } - return node; -} - -std::unique_ptr stabs_type_to_ast_no_throw(const StabsType& type, const StabsToAstState& state, s32 absolute_parent_offset_bytes, s32 depth, bool substitute_type_name, bool force_substitute) { - try { - return stabs_type_to_ast(type, state, absolute_parent_offset_bytes, depth, substitute_type_name, false); - } catch(std::runtime_error& e) { - auto error = std::make_unique(); - error->source = TypeNameSource::ERROR; - error->type_name = e.what(); - return error; - } -} - -std::unique_ptr stabs_type_to_ast(const StabsType& type, const StabsToAstState& state, s32 absolute_parent_offset_bytes, s32 depth, bool substitute_type_name, bool force_substitute) { - AST_DEBUG_PRINTF("%-*stype desc=%hhx '%c' num=%d name=%s\n", - depth * 4, "", - (u8) type.descriptor, - isprint((u8) type.descriptor) ? (u8) type.descriptor : '!', - type.type_number, - type.name.has_value() ? type.name->c_str() : ""); - - if(depth > 200) { - throw std::runtime_error("CCC_BADRECURSION"); - } - - // This makes sure that types are replaced with their type name in cases - // where that would be more appropriate. - if(type.name.has_value()) { - bool try_substitute = depth > 0 && (type.is_root - || type.descriptor == StabsTypeDescriptor::RANGE - || type.descriptor == StabsTypeDescriptor::BUILTIN); - bool is_name_empty = type.name == "" || type.name == " "; - // Unfortunately, a common case seems to be that __builtin_va_list is - // indistinguishable from void*, so we prevent it from being output to - // avoid confusion. - bool is_va_list = type.name == "__builtin_va_list"; - if((substitute_type_name || try_substitute) && !is_name_empty && !is_va_list) { - auto type_name = std::make_unique(); - type_name->source = TypeNameSource::REFERENCE; - type_name->type_name = *type.name; - type_name->referenced_file_index = state.file_index; - type_name->referenced_stabs_type_number = type.type_number; - return type_name; - } - } - - // This prevents infinite recursion when an automatically generated member - // function references an unnamed type. - if(force_substitute) { - const char* type_string = nullptr; - if(type.descriptor == StabsTypeDescriptor::ENUM) type_string = "__unnamed_enum"; - if(type.descriptor == StabsTypeDescriptor::STRUCT) type_string = "__unnamed_struct"; - if(type.descriptor == StabsTypeDescriptor::UNION) type_string = "__unnamed_union"; - if(type_string) { - auto type_name = std::make_unique(); - type_name->source = TypeNameSource::REFERENCE; - type_name->type_name = type_string; - type_name->referenced_file_index = state.file_index; - type_name->referenced_stabs_type_number = type.type_number; - return type_name; - } - } - - if(!type.has_body) { - // The definition of the type has been defined previously, so we have to - // look it up by its type number. - auto stabs_type = state.stabs_types->find(type.type_number); - if(type.anonymous || stabs_type == state.stabs_types->end()) { - auto type_name = std::make_unique(); - type_name->source = TypeNameSource::ERROR; - type_name->type_name = stringf("CCC_BADTYPELOOKUP(%d)", type.type_number); - return type_name; - } - return stabs_type_to_ast(*stabs_type->second, state, absolute_parent_offset_bytes, depth + 1, substitute_type_name, force_substitute); - } - - std::unique_ptr result; - - switch(type.descriptor) { - case StabsTypeDescriptor::TYPE_REFERENCE: { - const auto& stabs_type_ref = type.as(); - if(type.anonymous | stabs_type_ref.type->anonymous || stabs_type_ref.type->type_number != type.type_number) { - result = stabs_type_to_ast(*stabs_type_ref.type, state, absolute_parent_offset_bytes, depth + 1, substitute_type_name, force_substitute); - } else { - // I still don't know why in STABS void is a reference to - // itself, maybe because I'm not a philosopher. - auto type_name = std::make_unique(); - type_name->source = TypeNameSource::REFERENCE; - type_name->type_name = "void"; - result = std::move(type_name); - } - break; - } - case StabsTypeDescriptor::ARRAY: { - auto array = std::make_unique(); - const auto& stabs_array = type.as(); - array->element_type = stabs_type_to_ast(*stabs_array.element_type, state, absolute_parent_offset_bytes, depth + 1, true, force_substitute); - const auto& index = stabs_array.index_type->as(); - // The low and high values are not wrong in this case. - verify(index.low_maybe_wrong == 0, "Invalid index type for array."); - array->element_count = index.high_maybe_wrong + 1; - result = std::move(array); - break; - } - case StabsTypeDescriptor::ENUM: { - auto inline_enum = std::make_unique(); - const auto& stabs_enum = type.as(); - inline_enum->constants = stabs_enum.fields; - result = std::move(inline_enum); - break; - } - case StabsTypeDescriptor::FUNCTION: { - auto function = std::make_unique(); - function->return_type = stabs_type_to_ast(*type.as().return_type, state, absolute_parent_offset_bytes, depth + 1, true, force_substitute); - result = std::move(function); - break; - } - case StabsTypeDescriptor::VOLATILE_QUALIFIER: { - const auto& volatile_qualifier = type.as(); - result = stabs_type_to_ast(*volatile_qualifier.type.get(), state, absolute_parent_offset_bytes, depth + 1, substitute_type_name, force_substitute); - result->is_volatile = true; - break; - } - case StabsTypeDescriptor::CONST_QUALIFIER: { - const auto& const_qualifier = type.as(); - result = stabs_type_to_ast(*const_qualifier.type.get(), state, absolute_parent_offset_bytes, depth + 1, substitute_type_name, force_substitute); - result->is_const = true; - break; - } - case StabsTypeDescriptor::RANGE: { - auto builtin = std::make_unique(); - builtin->bclass = type.as().range_class; - result = std::move(builtin); - break; - } - case StabsTypeDescriptor::STRUCT: - case StabsTypeDescriptor::UNION: { - const StabsStructOrUnionType* stabs_struct_or_union; - if(type.descriptor == StabsTypeDescriptor::STRUCT) { - stabs_struct_or_union = &type.as(); - } else { - stabs_struct_or_union = &type.as(); - } - auto struct_or_union = std::make_unique(); - struct_or_union->is_struct = type.descriptor == StabsTypeDescriptor::STRUCT; - struct_or_union->size_bits = (s32) stabs_struct_or_union->size * 8; - for(const StabsBaseClass& stabs_base_class : stabs_struct_or_union->base_classes) { - std::unique_ptr base_class = stabs_type_to_ast(*stabs_base_class.type, state, absolute_parent_offset_bytes, depth + 1, true, force_substitute); - base_class->is_base_class = true; - base_class->absolute_offset_bytes = stabs_base_class.offset; - base_class->access_specifier = stabs_field_visibility_to_access_specifier(stabs_base_class.visibility); - struct_or_union->base_classes.emplace_back(std::move(base_class)); - } - AST_DEBUG_PRINTF("%-*s beginfields\n", depth * 4, ""); - for(const StabsField& field : stabs_struct_or_union->fields) { - auto node = stabs_field_to_ast(field, state, absolute_parent_offset_bytes, depth); - struct_or_union->fields.emplace_back(std::move(node)); - } - AST_DEBUG_PRINTF("%-*s endfields\n", depth * 4, ""); - AST_DEBUG_PRINTF("%-*s beginmemberfuncs\n", depth * 4, ""); - std::string struct_or_union_name_no_template_parameters; - if(type.name.has_value()) { - struct_or_union_name_no_template_parameters = - type.name->substr(0, type.name->find("<")); - } - for(const StabsMemberFunctionSet& function_set : stabs_struct_or_union->member_functions) { - for(const StabsMemberFunction& stabs_func : function_set.overloads) { - auto node = stabs_type_to_ast(*stabs_func.type, state, absolute_parent_offset_bytes, depth + 1, true, true); - if(function_set.name == "__as") { - node->name = "operator="; - } else { - node->name = function_set.name; - } - if(node->descriptor == FUNCTION_TYPE) { - FunctionType& function = node->as(); - function.modifier = stabs_func.modifier; - function.is_constructor = false; - if(type.name.has_value()) { - function.is_constructor |= function_set.name == type.name; - function.is_constructor |= function_set.name == struct_or_union_name_no_template_parameters; - } - function.vtable_index = stabs_func.vtable_index; - } - node->access_specifier = stabs_field_visibility_to_access_specifier(stabs_func.visibility); - struct_or_union->member_functions.emplace_back(std::move(node)); - } - } - AST_DEBUG_PRINTF("%-*s endmemberfuncs\n", depth * 4, ""); - result = std::move(struct_or_union); - break; - } - case StabsTypeDescriptor::CROSS_REFERENCE: { - auto type_name = std::make_unique(); - type_name->source = TypeNameSource::CROSS_REFERENCE; - type_name->type_name = type.as().identifier; - result = std::move(type_name); - break; - } - case ccc::StabsTypeDescriptor::FLOATING_POINT_BUILTIN: { - const auto& fp_builtin = type.as(); - auto builtin = std::make_unique(); - switch(fp_builtin.bytes) { - case 1: builtin->bclass = BuiltInClass::UNSIGNED_8; break; - case 2: builtin->bclass = BuiltInClass::UNSIGNED_16; break; - case 4: builtin->bclass = BuiltInClass::UNSIGNED_32; break; - case 8: builtin->bclass = BuiltInClass::UNSIGNED_64; break; - case 16: builtin->bclass = BuiltInClass::UNSIGNED_128; break; - default: builtin->bclass = BuiltInClass::UNSIGNED_8; break; - } - result = std::move(builtin); - break; - } - case StabsTypeDescriptor::METHOD: { - const auto& stabs_method = type.as(); - auto function = std::make_unique(); - function->return_type = stabs_type_to_ast(*stabs_method.return_type.get(), state, absolute_parent_offset_bytes, depth + 1, true, true); - function->parameters.emplace(); - for(const std::unique_ptr& parameter_type : stabs_method.parameter_types) { - auto node = stabs_type_to_ast(*parameter_type, state, absolute_parent_offset_bytes, depth + 1, true, true); - function->parameters->emplace_back(std::move(node)); - } - result = std::move(function); - break; - } - case StabsTypeDescriptor::POINTER: { - auto pointer = std::make_unique(); - pointer->value_type = stabs_type_to_ast(*type.as().value_type, state, absolute_parent_offset_bytes, depth + 1, true, force_substitute); - result = std::move(pointer); - break; - } - case StabsTypeDescriptor::REFERENCE: { - auto reference = std::make_unique(); - reference->value_type = stabs_type_to_ast(*type.as().value_type.get(), state, absolute_parent_offset_bytes, depth + 1, true, force_substitute); - result = std::move(reference); - break; - } - case StabsTypeDescriptor::TYPE_ATTRIBUTE: { - const auto& stabs_type_attribute = type.as(); - result = stabs_type_to_ast(*stabs_type_attribute.type, state, absolute_parent_offset_bytes, depth + 1, substitute_type_name, force_substitute); - result->size_bits = stabs_type_attribute.size_bits; - break; - } - case StabsTypeDescriptor::POINTER_TO_NON_STATIC_MEMBER: { - const auto& stabs_member_pointer = type.as(); - auto member_pointer = std::make_unique(); - member_pointer->class_type = stabs_type_to_ast(*stabs_member_pointer.class_type.get(), state, absolute_parent_offset_bytes, depth + 1, true, true); - member_pointer->member_type = stabs_type_to_ast(*stabs_member_pointer.member_type.get(), state, absolute_parent_offset_bytes, depth + 1, true, true); - result = std::move(member_pointer); - break; - } - case StabsTypeDescriptor::BUILTIN: { - verify(type.as().type_id == 16, - "Unknown built-in type! Please file a bug report."); - auto builtin = std::make_unique(); - builtin->bclass = BuiltInClass::BOOL_8; - result = std::move(builtin); - break; - } - } - assert(result); - return result; -} - -std::unique_ptr stabs_field_to_ast(const StabsField& field, const StabsToAstState& state, s32 absolute_parent_offset_bytes, s32 depth) { - AST_DEBUG_PRINTF("%-*s field %s\n", depth * 4, "", field.name.c_str()); - - if(detect_bitfield(field, state)) { - // Process bitfields. - std::unique_ptr bitfield = std::make_unique(); - bitfield->name = (field.name == " ") ? "" : field.name; - bitfield->relative_offset_bytes = field.offset_bits / 8; - bitfield->absolute_offset_bytes = absolute_parent_offset_bytes + bitfield->relative_offset_bytes; - bitfield->size_bits = field.size_bits; - bitfield->underlying_type = stabs_type_to_ast(*field.type, state, bitfield->absolute_offset_bytes, depth + 1, true, false); - bitfield->bitfield_offset_bits = field.offset_bits % 8; - if(field.is_static) { - bitfield->storage_class = SC_STATIC; - } - bitfield->access_specifier = stabs_field_visibility_to_access_specifier(field.visibility); - return bitfield; - } - - // Process a normal field. - s32 relative_offset_bytes = field.offset_bits / 8; - s32 absolute_offset_bytes = absolute_parent_offset_bytes + relative_offset_bytes; - std::unique_ptr child = stabs_type_to_ast(*field.type, state, absolute_offset_bytes, depth + 1, true, false); - child->name = (field.name == " ") ? "" : field.name; - child->relative_offset_bytes = relative_offset_bytes; - child->absolute_offset_bytes = absolute_offset_bytes; - child->size_bits = field.size_bits; - if(field.is_static) { - child->storage_class = SC_STATIC; - } - child->access_specifier = stabs_field_visibility_to_access_specifier(field.visibility); - return child; -} - -static bool detect_bitfield(const StabsField& field, const StabsToAstState& state) { - // Static fields can't be bitfields. - if(field.is_static) { - return false; - } - - // Resolve type references. - const StabsType* type = field.type.get(); - for(s32 i = 0; i < 50; i++) { - if(!type->has_body) { - if(type->anonymous) { - return false; - } - auto next_type = state.stabs_types->find(type->type_number); - if(next_type == state.stabs_types->end() || next_type->second == type) { - return false; - } - type = next_type->second; - } else if(type->descriptor == StabsTypeDescriptor::TYPE_REFERENCE) { - type = type->as().type.get(); - } else if(type->descriptor == StabsTypeDescriptor::CONST_QUALIFIER) { - type = type->as().type.get(); - } else if(type->descriptor == StabsTypeDescriptor::VOLATILE_QUALIFIER) { - type = type->as().type.get(); - } else { - break; - } - - // Prevent an infinite loop if there's a cycle (fatal frame). - if(i == 49) { - return false; - } - } - - // Determine the size of the underlying type. - s32 underlying_type_size_bits = 0; - switch(type->descriptor) { - case ccc::StabsTypeDescriptor::RANGE: { - underlying_type_size_bits = builtin_class_size(type->as().range_class) * 8; - break; - } - case ccc::StabsTypeDescriptor::CROSS_REFERENCE: { - if(type->as().type == StabsCrossReferenceType::ENUM) { - underlying_type_size_bits = 32; - } else { - return false; - } - break; - } - case ccc::StabsTypeDescriptor::TYPE_ATTRIBUTE: { - underlying_type_size_bits = type->as().size_bits; - break; - } - case ccc::StabsTypeDescriptor::BUILTIN: { - underlying_type_size_bits = 8; // bool - break; - } - default: { - return false; - } - } - - if(underlying_type_size_bits == 0) { - return false; - } - - return field.size_bits != underlying_type_size_bits; -} - // Some enums have two symbols associated with them: One named " " and another // one referencing the first. void remove_duplicate_enums(std::vector>& ast_nodes) { @@ -440,7 +61,7 @@ void TypeDeduplicatorOMatic::process_file(SourceFile& file, s32 file_index, cons node->files = {file_index}; name_to_deduplicated_index[node->name] = deduplicated_nodes_grouped_by_name.size(); deduplicated_nodes_grouped_by_name.emplace_back().emplace_back((s32) flat_nodes.size()); - if(node->stabs_type_number > -1) { + if(node->stabs_type_number.type > -1) { file.stabs_type_number_to_deduplicated_type_index[node->stabs_type_number] = (s32) flat_nodes.size(); } flat_nodes.emplace_back(std::move(node)); @@ -451,8 +72,8 @@ void TypeDeduplicatorOMatic::process_file(SourceFile& file, s32 file_index, cons bool match = false; for(s32 existing_node_index : nodes_with_the_same_name) { std::unique_ptr& existing_node = flat_nodes[existing_node_index]; - assert(existing_node.get()); - assert(node.get()); + CCC_ASSERT(existing_node.get()); + CCC_ASSERT(node.get()); TypeLookupInfo lookup; lookup.files = &files; lookup.nodes = &flat_nodes; @@ -468,7 +89,7 @@ void TypeDeduplicatorOMatic::process_file(SourceFile& file, s32 file_index, cons } else { // The new node matches this existing node. existing_node->files.emplace_back(file_index); - if(node->stabs_type_number > -1) { + if(node->stabs_type_number.type > -1) { file.stabs_type_number_to_deduplicated_type_index[node->stabs_type_number] = existing_node_index; } if(compare_result.type == CompareResultType::MATCHES_FAVOUR_RHS) { @@ -487,7 +108,7 @@ void TypeDeduplicatorOMatic::process_file(SourceFile& file, s32 file_index, cons // that have already been processed. node->files = {file_index}; nodes_with_the_same_name.emplace_back((s32) flat_nodes.size()); - if(node->stabs_type_number > -1) { + if(node->stabs_type_number.type > -1) { file.stabs_type_number_to_deduplicated_type_index[node->stabs_type_number] = (s32) flat_nodes.size(); } flat_nodes.emplace_back(std::move(node)); @@ -542,11 +163,11 @@ CompareResult compare_nodes(const Node& node_lhs, const Node& node_rhs, const Ty break; } case DATA: { - verify_not_reached("Tried to compare data AST nodes."); + CCC_FATAL("Tried to compare data AST nodes."); break; } case FUNCTION_DEFINITION: { - verify_not_reached("Tried to compare function definition AST nodes."); + CCC_FATAL("Tried to compare function definition AST nodes."); } case FUNCTION_TYPE: { const auto [lhs, rhs] = Node::as(node_lhs, node_rhs); @@ -567,7 +188,7 @@ CompareResult compare_nodes(const Node& node_lhs, const Node& node_rhs, const Ty break; } case INITIALIZER_LIST: { - verify_not_reached("Tried to compare initializer list AST nodes."); + CCC_FATAL("Tried to compare initializer list AST nodes."); break; } case INLINE_ENUM: { @@ -608,7 +229,7 @@ CompareResult compare_nodes(const Node& node_lhs, const Node& node_rhs, const Ty break; } case SOURCE_FILE: { - verify_not_reached("Tried to compare source file AST nodes."); + CCC_FATAL("Tried to compare source file AST nodes."); } case TYPE_NAME: { const auto [lhs, rhs] = Node::as(node_lhs, node_rhs); @@ -644,7 +265,8 @@ static bool compare_nodes_and_merge(CompareResult& dest, const Node& node_lhs, c // Propagate confusion. dest.type = CompareResultType::MATCHES_CONFUSED; } else if(dest.type == CompareResultType::MATCHES_FAVOUR_LHS && result.type == CompareResultType::MATCHES_FAVOUR_RHS) { - // Propagate confusion. + // One of the results favours the LHS node and the other favours the + // RHS node so we are confused. dest.type = CompareResultType::MATCHES_CONFUSED; } else if(dest.type == CompareResultType::MATCHES_FAVOUR_RHS && result.type == CompareResultType::MATCHES_FAVOUR_LHS) { // One of the results favours the LHS node and the other favours the @@ -674,7 +296,7 @@ static void try_to_match_wobbly_typedefs(CompareResult& result, const Node& node for(s32 i = 0; result.type == CompareResultType::DIFFERS && i < 2; i++) { if(type_name_node->descriptor == TYPE_NAME) { const TypeName& type_name = type_name_node->as(); - if(type_name.referenced_file_index > -1 && type_name.referenced_stabs_type_number > -1) { + if(type_name.referenced_file_index > -1 && type_name.referenced_stabs_type_number.type > -1) { const std::unique_ptr& file = lookup.files->at(type_name.referenced_file_index); auto index = file->stabs_type_number_to_deduplicated_type_index.find(type_name.referenced_stabs_type_number); if(index != file->stabs_type_number_to_deduplicated_type_index.end()) { @@ -789,16 +411,4 @@ const char* access_specifier_to_string(AccessSpecifier specifier) { return ""; } -AccessSpecifier stabs_field_visibility_to_access_specifier(StabsFieldVisibility visibility) { - AccessSpecifier access_specifier = AS_PUBLIC; - switch(visibility) { - case ccc::StabsFieldVisibility::NONE: access_specifier = AS_PUBLIC; break; - case ccc::StabsFieldVisibility::PUBLIC: access_specifier = AS_PUBLIC; break; - case ccc::StabsFieldVisibility::PROTECTED: access_specifier = AS_PROTECTED; break; - case ccc::StabsFieldVisibility::PRIVATE: access_specifier = AS_PRIVATE; break; - case ccc::StabsFieldVisibility::PUBLIC_OPTIMIZED_OUT: access_specifier = AS_PUBLIC; break; - } - return access_specifier; -} - } diff --git a/ccc/ast.h b/ccc/ast.h index 2ce0e9c1..9f4e5a9a 100644 --- a/ccc/ast.h +++ b/ccc/ast.h @@ -81,7 +81,7 @@ struct Node { std::vector files; // List of files for which a given top-level type is present. const ParsedSymbol* symbol = nullptr; const char* compare_fail_reason = ""; - s64 stabs_type_number = -1; + StabsTypeNumber stabs_type_number; s32 relative_offset_bytes = -1; // Offset relative to start of last inline struct/union. s32 absolute_offset_bytes = -1; // Offset relative to outermost struct/union. @@ -92,14 +92,20 @@ struct Node { virtual ~Node() {} template - SubType& as() { assert(descriptor == SubType::DESCRIPTOR); return *static_cast(this); } + SubType& as() { + CCC_ASSERT(descriptor == SubType::DESCRIPTOR); + return *static_cast(this); + } template - const SubType& as() const { assert(descriptor == SubType::DESCRIPTOR); return *static_cast(this); } + const SubType& as() const { + CCC_ASSERT(descriptor == SubType::DESCRIPTOR); + return *static_cast(this); + } template static std::pair as(const Node& lhs, const Node& rhs) { - assert(lhs.descriptor == SubType::DESCRIPTOR && rhs.descriptor == SubType::DESCRIPTOR); + CCC_ASSERT(lhs.descriptor == SubType::DESCRIPTOR && rhs.descriptor == SubType::DESCRIPTOR); return std::pair(static_cast(lhs), static_cast(rhs)); } }; @@ -231,7 +237,7 @@ struct SourceFile : Node { std::vector> functions; std::vector> globals; std::vector symbols; - std::map stabs_type_number_to_deduplicated_type_index; + std::map stabs_type_number_to_deduplicated_type_index; SourceFile() : Node(DESCRIPTOR) {} static const constexpr NodeDescriptor DESCRIPTOR = SOURCE_FILE; @@ -248,7 +254,7 @@ struct TypeName : Node { TypeNameSource source = TypeNameSource::ERROR; std::string type_name; s32 referenced_file_index = -1; - s64 referenced_stabs_type_number = -1; + StabsTypeNumber referenced_stabs_type_number; TypeName() : Node(DESCRIPTOR) {} static const constexpr NodeDescriptor DESCRIPTOR = TYPE_NAME; @@ -302,25 +308,17 @@ struct Variable : Node { static const constexpr NodeDescriptor DESCRIPTOR = VARIABLE; }; -struct TypeDeduplicatorOMatic { +class TypeDeduplicatorOMatic { +private: std::vector> flat_nodes; std::vector> deduplicated_nodes_grouped_by_name; std::map name_to_deduplicated_index; +public: void process_file(SourceFile& file, s32 file_index, const std::vector>& files); std::vector> finish(); }; - - -struct StabsToAstState { - s32 file_index; - std::map* stabs_types; -}; -std::unique_ptr stabs_type_to_ast_no_throw(const StabsType& type, const StabsToAstState& state, s32 absolute_parent_offset_bytes, s32 depth, bool substitute_type_name, bool force_substitute); -std::unique_ptr stabs_symbol_to_ast(const ParsedSymbol& symbol, const StabsToAstState& state); -std::unique_ptr stabs_type_to_ast(const StabsType& type, const StabsToAstState& state, s32 absolute_parent_offset_bytes, s32 depth, bool substitute_type_name, bool force_substitute); -std::unique_ptr stabs_field_to_ast(const StabsField& field, const StabsToAstState& state, s32 absolute_parent_offset_bytes, s32 depth); void remove_duplicate_enums(std::vector>& ast_nodes); void remove_duplicate_self_typedefs(std::vector>& ast_nodes); enum class CompareResultType { @@ -374,7 +372,6 @@ const char* node_type_to_string(const Node& node); const char* storage_class_to_string(StorageClass storage_class); const char* global_variable_location_to_string(GlobalVariableLocation location); const char* access_specifier_to_string(AccessSpecifier specifier); -AccessSpecifier stabs_field_visibility_to_access_specifier(StabsFieldVisibility visibility); enum TraversalOrder { PREORDER_TRAVERSAL, diff --git a/ccc/data_refinement.cpp b/ccc/data_refinement.cpp index 8d0ec60f..950a62e4 100644 --- a/ccc/data_refinement.cpp +++ b/ccc/data_refinement.cpp @@ -155,7 +155,7 @@ static std::unique_ptr refine_node(s32 virtual_address, const ast::No } case ast::TYPE_NAME: { const ast::TypeName& type_name = type.as(); - if(type_name.referenced_file_index > -1 && type_name.referenced_stabs_type_number > -1) { + if(type_name.referenced_file_index > -1 && type_name.referenced_stabs_type_number.type > -1) { const ast::SourceFile& source_file = *context.high.source_files[type_name.referenced_file_index].get(); auto type_index = source_file.stabs_type_number_to_deduplicated_type_index.find(type_name.referenced_stabs_type_number); if(type_index != source_file.stabs_type_number_to_deduplicated_type_index.end()) { @@ -177,7 +177,7 @@ static std::unique_ptr refine_node(s32 virtual_address, const ast::No } } - verify_not_reached("Failed to refine global variable (%s).", ast::node_type_to_string(type)); + CCC_FATAL("Failed to refine global variable (%s).", ast::node_type_to_string(type)); } static std::unique_ptr refine_builtin(s32 virtual_address, BuiltInClass bclass, const DataRefinementContext& context) { @@ -257,7 +257,7 @@ static std::unique_ptr refine_builtin(s32 virtual_address, BuiltInCla } } - verify(data != nullptr, "Failed to refine builtin."); + CCC_CHECK_FATAL(data != nullptr, "Failed to refine builtin."); return data; } diff --git a/ccc/dependency.cpp b/ccc/dependency.cpp index a90141b8..59d05e1b 100644 --- a/ccc/dependency.cpp +++ b/ccc/dependency.cpp @@ -40,7 +40,7 @@ void map_types_to_files_based_on_this_pointers(HighSymbolTable& high) { ast::Node& class_node = *parameter_type.as().value_type.get(); if(class_node.descriptor == ast::TYPE_NAME) { ast::TypeName& class_type = class_node.as(); - if(class_type.referenced_stabs_type_number > -1) { + if(class_type.referenced_stabs_type_number.type > -1) { const ast::SourceFile& foreign_file = *high.source_files.at(class_type.referenced_file_index).get(); // Lookup the type pointed to by the this pointer. auto type_index = foreign_file.stabs_type_number_to_deduplicated_type_index.find(class_type.referenced_stabs_type_number); @@ -91,7 +91,7 @@ static void map_types_to_files_based_on_reference_count_single_pass(HighSymbolTa } case ast::TYPE_NAME: { const ast::TypeName& type_name = node.as(); - if(type_name.referenced_file_index > -1 && type_name.referenced_stabs_type_number > -1) { + if(type_name.referenced_file_index > -1 && type_name.referenced_stabs_type_number.type > -1) { const std::unique_ptr& source_file = high.source_files.at(type_name.referenced_file_index); auto type_index = source_file->stabs_type_number_to_deduplicated_type_index.find(type_name.referenced_stabs_type_number); if(type_index != source_file->stabs_type_number_to_deduplicated_type_index.end() @@ -101,6 +101,7 @@ static void map_types_to_files_based_on_reference_count_single_pass(HighSymbolTa } break; } + default: {} } return ast::EXPLORE_CHILDREN; }; @@ -144,7 +145,7 @@ TypeDependencyAdjacencyList build_type_dependency_graph(const HighSymbolTable& h // Filter out forward declarations. if(type_name.source == ast::TypeNameSource::REFERENCE && type_name.referenced_file_index > -1 - && type_name.referenced_stabs_type_number > -1) { + && type_name.referenced_stabs_type_number.type > -1) { const ast::SourceFile& source_file = *high.source_files[type_name.referenced_file_index].get(); auto type_index = source_file.stabs_type_number_to_deduplicated_type_index.find(type_name.referenced_stabs_type_number); if(type_index != source_file.stabs_type_number_to_deduplicated_type_index.end() && type_index->second != i) { diff --git a/ccc/elf.cpp b/ccc/elf.cpp index 550270b3..fcc52aff 100644 --- a/ccc/elf.cpp +++ b/ccc/elf.cpp @@ -1,15 +1,6 @@ #include "elf.h" -namespace ccc::loaders { - -static void parse_elf_file(Module& mod); - -Module read_elf_file(fs::path path) { - Module mod; - mod.image = read_binary_file(path); - parse_elf_file(mod); - return mod; -} +namespace ccc { enum class ElfIdentClass : u8 { B32 = 0x1, @@ -32,7 +23,7 @@ enum class ElfMachine : u16 { MIPS = 0x08 }; -packed_struct(ElfIdentHeader, +CCC_PACKED_STRUCT(ElfIdentHeader, /* 0x0 */ u8 magic[4]; // 7f 45 4c 46 /* 0x4 */ ElfIdentClass e_class; /* 0x5 */ u8 endianess; @@ -42,7 +33,7 @@ packed_struct(ElfIdentHeader, /* 0x9 */ u8 pad[7]; ) -packed_struct(ElfFileHeader32, +CCC_PACKED_STRUCT(ElfFileHeader32, /* 0x10 */ ElfFileType type; /* 0x12 */ ElfMachine machine; /* 0x14 */ u32 version; @@ -58,7 +49,7 @@ packed_struct(ElfFileHeader32, /* 0x32 */ u16 shstrndx; ) -packed_struct(ElfProgramHeader32, +CCC_PACKED_STRUCT(ElfProgramHeader32, /* 0x00 */ u32 type; /* 0x04 */ u32 offset; /* 0x08 */ u32 vaddr; @@ -69,7 +60,7 @@ packed_struct(ElfProgramHeader32, /* 0x1c */ u32 align; ) -packed_struct(ElfSectionHeader32, +CCC_PACKED_STRUCT(ElfSectionHeader32, /* 0x00 */ u32 name; /* 0x04 */ ElfSectionType type; /* 0x08 */ u32 flags; @@ -82,39 +73,49 @@ packed_struct(ElfSectionHeader32, /* 0x24 */ u32 entsize; ) -void parse_elf_file(Module& mod) { - const auto& ident = get_packed(mod.image, 0, "ELF ident bytes"); - verify(memcmp(ident.magic, "\x7f\x45\x4c\x46", 4) == 0, "Invalid ELF file."); - verify(ident.e_class == ElfIdentClass::B32, "Wrong ELF class (not 32 bit)."); +Result parse_elf_file(Module& mod) { + const ElfIdentHeader* ident = get_packed(mod.image, 0); + CCC_CHECK(ident, "ELF ident out of range."); + CCC_CHECK(memcmp(ident->magic, "\x7f\x45\x4c\x46", 4) == 0, "Invalid ELF file."); + CCC_CHECK(ident->e_class == ElfIdentClass::B32, "Wrong ELF class (not 32 bit)."); - const auto& header = get_packed(mod.image, sizeof(ElfIdentHeader), "ELF file header"); - verify(header.machine == ElfMachine::MIPS, "Wrong architecture."); + const ElfFileHeader32* header = get_packed(mod.image, sizeof(ElfIdentHeader)); + CCC_CHECK(ident, "ELF file header out of range."); + CCC_CHECK(header->machine == ElfMachine::MIPS, "Wrong architecture."); - for(u32 i = 0; i < header.phnum; i++) { - u64 header_offset = header.phoff + i * sizeof(ElfProgramHeader32); - const auto& program_header = get_packed(mod.image, header_offset, "ELF program header"); + for(u32 i = 0; i < header->phnum; i++) { + u64 header_offset = header->phoff + i * sizeof(ElfProgramHeader32); + const ElfProgramHeader32* program_header = get_packed(mod.image, header_offset); + CCC_CHECK(program_header, "ELF program header out of range."); + ModuleSegment& segment = mod.segments.emplace_back(); - segment.file_offset = program_header.offset; - segment.size = program_header.filesz; - segment.virtual_address = program_header.vaddr; + segment.file_offset = program_header->offset; + segment.size = program_header->filesz; + segment.virtual_address = program_header->vaddr; } - for(u32 i = 0; i < header.shnum; i++) { - u64 header_offset = header.shoff + i * sizeof(ElfSectionHeader32); - const auto& section_header = get_packed(mod.image, header_offset, "ELF section header"); + for(u32 i = 0; i < header->shnum; i++) { + u64 header_offset = header->shoff + i * sizeof(ElfSectionHeader32); + const auto& section_header = get_packed(mod.image, header_offset); + CCC_CHECK(section_header, "ELF section header out of range."); + ModuleSection& section = mod.sections.emplace_back(); - section.file_offset = section_header.offset; - section.size = section_header.size; - section.type = section_header.type; - section.name_offset = section_header.name; - section.virtual_address = section_header.addr; + section.file_offset = section_header->offset; + section.size = section_header->size; + section.type = section_header->type; + section.name_offset = section_header->name; + section.virtual_address = section_header->addr; } - if(header.shstrndx < mod.sections.size()) { + if(header->shstrndx < mod.sections.size()) { for(ModuleSection& section : mod.sections) { - section.name = get_string(mod.image, mod.sections[header.shstrndx].file_offset + section.name_offset); + Result name = get_string(mod.image, mod.sections[header->shstrndx].file_offset + section.name_offset); + CCC_CHECK(name.success(), "Section name out of bounds."); + section.name = *name; } } + + return Result(); } } diff --git a/ccc/elf.h b/ccc/elf.h index b3f3a738..045ff6e2 100644 --- a/ccc/elf.h +++ b/ccc/elf.h @@ -4,9 +4,9 @@ #include "util.h" #include "module.h" -namespace ccc::loaders { +namespace ccc { -Module read_elf_file(fs::path path); +Result parse_elf_file(Module& mod); } diff --git a/ccc/insn.cpp b/ccc/insn.cpp index 5fdca794..39fad1b2 100644 --- a/ccc/insn.cpp +++ b/ccc/insn.cpp @@ -75,7 +75,7 @@ const InsnInfo& Insn::info() const { case INSN_CLASS_COP1_S: return COP1_S_TABLE[func()]; case INSN_CLASS_COP1_W: return COP1_W_TABLE[func()]; case INSN_CLASS_COP2: return MIPS_OPCODE_TABLE[OPCODE_COP2]; - default: verify_not_reached("Invalid instruction %08x.", value); + default: CCC_FATAL("Invalid instruction %08x.", value); } } diff --git a/ccc/mdebug.cpp b/ccc/mdebug.cpp index 741a0c8b..c2730637 100644 --- a/ccc/mdebug.cpp +++ b/ccc/mdebug.cpp @@ -2,7 +2,7 @@ namespace ccc::mdebug { -packed_struct(SymbolicHeader, +CCC_PACKED_STRUCT(SymbolicHeader, /* 0x00 */ s16 magic; /* 0x02 */ s16 version_stamp; /* 0x04 */ s32 line_number_count; @@ -30,7 +30,7 @@ packed_struct(SymbolicHeader, /* 0x5c */ s32 external_symbols_offset; ) -packed_struct(FileDescriptor, +CCC_PACKED_STRUCT(FileDescriptor, /* 0x00 */ u32 address; /* 0x04 */ s32 file_path_string_offset; /* 0x08 */ s32 strings_offset; @@ -57,7 +57,7 @@ packed_struct(FileDescriptor, ) static_assert(sizeof(FileDescriptor) == 0x48); -packed_struct(ProcedureDescriptor, +CCC_PACKED_STRUCT(ProcedureDescriptor, /* 0x00 */ u32 address; /* 0x04 */ s32 isym; /* 0x08 */ s32 iline; @@ -74,7 +74,7 @@ packed_struct(ProcedureDescriptor, /* 0x30 */ s32 cb_line_offset; ) -packed_struct(SymbolHeader, +CCC_PACKED_STRUCT(SymbolHeader, /* 0x0 */ u32 iss; /* 0x4 */ s32 value; /* 0x8:00 */ u32 st : 6; @@ -84,33 +84,40 @@ packed_struct(SymbolHeader, ) static_assert(sizeof(SymbolHeader) == 0xc); -packed_struct(ExternalSymbolHeader, +CCC_PACKED_STRUCT(ExternalSymbolHeader, /* 0x0 */ u16 flags; /* 0x2 */ s16 ifd; /* 0x4 */ SymbolHeader symbol; ) -static s32 get_corruption_fixing_fudge_offset(const Module& mod, const ModuleSection& section, const SymbolicHeader& hdrr); -static Symbol parse_symbol(const SymbolHeader& header, const std::vector& image, s32 strings_offset); +static s32 get_corruption_fixing_fudge_offset(u32 section_offset, const SymbolicHeader& hdrr); +static Result parse_symbol(const SymbolHeader& header, const std::vector& elf, s32 strings_offset); -SymbolTable parse_symbol_table(const Module& mod, const ModuleSection& section) { +Result parse_symbol_table(const std::vector& elf, u32 section_offset) { SymbolTable symbol_table; - const auto& hdrr = get_packed(mod.image, section.file_offset, "MIPS debug section"); - verify(hdrr.magic == 0x7009, "Invalid symbolic header."); - symbol_table.header = &hdrr; + const SymbolicHeader* hdrr = get_packed(elf, section_offset); + CCC_CHECK(hdrr != nullptr, "MIPS_DEBUG section header out of bounds."); + CCC_CHECK(hdrr->magic == 0x7009, "Invalid symbolic header."); - s32 fudge_offset = get_corruption_fixing_fudge_offset(mod, section, hdrr); + symbol_table.header = hdrr; + + s32 fudge_offset = get_corruption_fixing_fudge_offset(section_offset, *hdrr); // Iterate over file descriptors. - for(s64 i = 0; i < hdrr.file_descriptor_count; i++) { - u64 fd_offset = hdrr.file_descriptors_offset + i * sizeof(FileDescriptor); - const auto& fd_header = get_packed(mod.image, fd_offset + fudge_offset, "file descriptor"); - verify(fd_header.f_big_endian == 0, "Not little endian or bad file descriptor table."); + for(s64 i = 0; i < hdrr->file_descriptor_count; i++) { + u64 fd_offset = hdrr->file_descriptors_offset + i * sizeof(FileDescriptor); + const FileDescriptor* fd_header = get_packed(elf, fd_offset + fudge_offset); + CCC_CHECK(fd_header != nullptr, "MIPS_DEBUG file descriptor out of bounds."); + CCC_CHECK(fd_header->f_big_endian == 0, "Not little endian or bad file descriptor table."); + + SymFileDescriptor& fd = symbol_table.files.emplace_back(); + fd.header = fd_header; - SymFileDescriptor fd; - fd.header = &fd_header; - fd.raw_path = get_string(mod.image, hdrr.local_strings_offset + fd_header.strings_offset + fd_header.file_path_string_offset + fudge_offset); + s32 raw_path_offset = hdrr->local_strings_offset + fd_header->strings_offset + fd_header->file_path_string_offset + fudge_offset; + Result raw_path = get_string(elf, raw_path_offset); + CCC_RETURN_IF_ERROR(raw_path); + fd.raw_path = *raw_path; // Try to detect the source language. std::string lower_name = fd.raw_path; @@ -124,49 +131,42 @@ SymbolTable parse_symbol_table(const Module& mod, const ModuleSection& section) } // Parse local symbols. - for(s64 j = 0; j < fd_header.symbol_count; j++) { - u64 sym_offset = hdrr.local_symbols_offset + (fd_header.isym_base + j) * sizeof(SymbolHeader); - const auto& symbol_header = get_packed(mod.image, sym_offset + fudge_offset, "local symbol"); - Symbol& sym = fd.symbols.emplace_back(parse_symbol(symbol_header, mod.image, hdrr.local_strings_offset + fd_header.strings_offset + fudge_offset)); + for(s64 j = 0; j < fd_header->symbol_count; j++) { + u64 sym_offset = hdrr->local_symbols_offset + (fd_header->isym_base + j) * sizeof(SymbolHeader); + const SymbolHeader* symbol_header = get_packed(elf, sym_offset + fudge_offset); + CCC_CHECK(symbol_header != nullptr, "Symbol header out of bounds."); - if(fd.base_path.empty() && symbol_header.iss == fd_header.file_path_string_offset && sym.is_stabs && sym.code == N_SO && fd.symbols.size() > 2) { - const Symbol& base_path = fd.symbols[fd.symbols.size() - 2]; + s32 strings_offset = hdrr->local_strings_offset + fd_header->strings_offset + fudge_offset; + Result sym = parse_symbol(*symbol_header, elf, strings_offset); + CCC_RETURN_IF_ERROR(sym); + + if(fd.base_path.empty() && symbol_header->iss == fd_header->file_path_string_offset && sym->is_stabs && sym->code == N_SO && fd.symbols.size() > 2) { + const Symbol& base_path = fd.symbols.back(); if(base_path.is_stabs && base_path.code == N_SO) { fd.base_path = base_path.string; } } + + fd.symbols.emplace_back(std::move(*sym)); } fd.full_path = merge_paths(fd.base_path, fd.raw_path); - - // Parse procedure descriptors. - // This is buggy. - //for(s64 j = 0; j < fd_header.cpd; j++) { - // u64 pd_offset = hdrr.cb_pd_offset + (fd_header.ipd_first + j) * sizeof(ProcedureDescriptor); - // auto pd_entry = get_packed(mod.image, pd_offset + fudge_offset, "procedure descriptor"); - // - // u64 sym_offset = hdrr.cb_sym_offset + (fd_header.isym_base + pd_entry.isym) * sizeof(SymbolHeader); - // const auto& external_header = get_packed(mod.image, sym_offset + fudge_offset, "local symbol"); - // - // SymProcedureDescriptor& pd = fd.procedures.emplace_back(); - // pd.name = get_string(mod.image, hdrr.strings_base_offset + fd_header.strings_offset + external_header.iss + fudge_offset); - // pd.address = pd_entry.address; - //} - - symbol_table.files.emplace_back(fd); } // Parse external symbols. - for(s64 i = 0; i < hdrr.external_symbols_count; i++) { - u64 sym_offset = hdrr.external_symbols_offset + i * sizeof(ExternalSymbolHeader); - const auto& external_header = get_packed(mod.image, sym_offset + fudge_offset, "local symbol"); - symbol_table.externals.emplace_back(parse_symbol(external_header.symbol, mod.image, hdrr.external_strings_offset + fudge_offset)); + for(s64 i = 0; i < hdrr->external_symbols_count; i++) { + u64 sym_offset = hdrr->external_symbols_offset + i * sizeof(ExternalSymbolHeader); + const ExternalSymbolHeader* external_header = get_packed(elf, sym_offset + fudge_offset); + CCC_CHECK(external_header != nullptr, "External header out of bounds."); + Result sym = parse_symbol(external_header->symbol, elf, hdrr->external_strings_offset + fudge_offset); + CCC_RETURN_IF_ERROR(sym); + symbol_table.externals.emplace_back(std::move(*sym)); } return symbol_table; } -static s32 get_corruption_fixing_fudge_offset(const Module& mod, const ModuleSection& section, const SymbolicHeader& hdrr) { +static s32 get_corruption_fixing_fudge_offset(u32 section_offset, const SymbolicHeader& hdrr) { // If the .mdebug section was moved without updating its contents all the // absolute file offsets stored within will be incorrect by a fixed amount. @@ -184,7 +184,7 @@ static s32 get_corruption_fixing_fudge_offset(const Module& mod, const ModuleSec if(hdrr.relative_file_descriptors_offset > 0) right_after_header = std::min(hdrr.relative_file_descriptors_offset, right_after_header); if(hdrr.external_symbols_offset > 0) right_after_header = std::min(hdrr.external_symbols_offset, right_after_header); - if(right_after_header == section.file_offset + sizeof(SymbolicHeader)) { + if(right_after_header == section_offset + sizeof(SymbolicHeader)) { return 0; // It's probably fine. } @@ -194,16 +194,20 @@ static s32 get_corruption_fixing_fudge_offset(const Module& mod, const ModuleSec } // Try to fix it. - s32 fudge_offset = section.file_offset - (right_after_header - sizeof(SymbolicHeader)); + s32 fudge_offset = section_offset - (right_after_header - sizeof(SymbolicHeader)); if(fudge_offset != 0) { - warn("The .mdebug section is probably corrupted, but I can try to fix it for you (fudge offset %d).", fudge_offset); + CCC_WARN("The .mdebug section is probably corrupted, but I can try to fix it for you (fudge offset %d).", fudge_offset); } return fudge_offset; } -static Symbol parse_symbol(const SymbolHeader& header, const std::vector& image, s32 strings_offset) { +static Result parse_symbol(const SymbolHeader& header, const std::vector& elf, s32 strings_offset) { Symbol symbol; - symbol.string = get_c_string(image, strings_offset + header.iss); + + Result string = get_string(elf, strings_offset + header.iss); + CCC_RETURN_IF_ERROR(string); + symbol.string = *string; + symbol.value = header.value; symbol.storage_type = (SymbolType) header.st; symbol.storage_class = (SymbolClass) header.sc; @@ -211,7 +215,7 @@ static Symbol parse_symbol(const SymbolHeader& header, const std::vector& im if((symbol.index & 0xfff00) == 0x8f300) { symbol.is_stabs = true; symbol.code = (StabsCode) (symbol.index - 0x8f300); - verify(stabs_code(symbol.code) != nullptr, "Bad STABS symbol code '%x'. Please file a bug report!", symbol.code); + CCC_CHECK(stabs_code(symbol.code) != nullptr, "Bad stabs symbol code '%x'.", symbol.code); } else { symbol.is_stabs = false; } diff --git a/ccc/mdebug.h b/ccc/mdebug.h index 5002925d..f4ffa48d 100644 --- a/ccc/mdebug.h +++ b/ccc/mdebug.h @@ -2,7 +2,6 @@ #define _CCC_MDEBUG_H #include "util.h" -#include "module.h" namespace ccc::mdebug { @@ -141,7 +140,7 @@ struct SymbolTable { std::vector externals; }; -SymbolTable parse_symbol_table(const Module& module, const ModuleSection& section); +Result parse_symbol_table(const std::vector& elf, u32 section_offset); void print_headers(FILE* dest, const SymbolTable& symbol_table); const char* symbol_type(SymbolType type); const char* symbol_class(SymbolClass symbol_class); diff --git a/ccc/module.cpp b/ccc/module.cpp index 2782a476..8118de3e 100644 --- a/ccc/module.cpp +++ b/ccc/module.cpp @@ -11,16 +11,16 @@ ModuleSection* Module::lookup_section(const char* name) { return nullptr; } -u32 Module::file_offset_to_virtual_address(u32 file_offset) { +std::optional Module::file_offset_to_virtual_address(u32 file_offset) { for(ModuleSegment& segment : segments) { if(file_offset >= segment.file_offset && file_offset < segment.file_offset + segment.size) { return segment.virtual_address + file_offset - segment.file_offset; } } - verify_not_reached("Failed to translate file offset to virtual address."); + return std::nullopt; } -void read_virtual(u8* dest, u32 address, u32 size, const std::vector& modules) { +Result read_virtual(u8* dest, u32 address, u32 size, const std::vector& modules) { while(size > 0) { bool mapped = false; @@ -29,7 +29,7 @@ void read_virtual(u8* dest, u32 address, u32 size, const std::vector& m if(address >= segment.virtual_address && address < segment.virtual_address + segment.size) { u32 offset = address - segment.virtual_address; u32 copy_size = std::min(segment.size - offset, size); - verify(segment.file_offset + offset + copy_size <= module->image.size(), "Segment is bad or image is too small."); + CCC_CHECK(segment.file_offset + offset + copy_size <= module->image.size(), "Segment is bad or image is too small."); memcpy(dest, &module->image[segment.file_offset + offset], copy_size); dest += copy_size; address += copy_size; @@ -39,8 +39,9 @@ void read_virtual(u8* dest, u32 address, u32 size, const std::vector& m } } - verify(mapped, "Tried to read from memory that wouldn't have come from any of the loaded modules."); + CCC_CHECK(mapped, "Tried to read from memory that wouldn't have come from any of the loaded modules"); } + return Result(); } } diff --git a/ccc/module.h b/ccc/module.h index fdb306f9..bdc781f7 100644 --- a/ccc/module.h +++ b/ccc/module.h @@ -49,10 +49,10 @@ struct Module { std::vector segments; ModuleSection* lookup_section(const char* name); - u32 file_offset_to_virtual_address(u32 file_offset); + std::optional file_offset_to_virtual_address(u32 file_offset); }; -void read_virtual(u8* dest, u32 address, u32 size, const std::vector& modules); +Result read_virtual(u8* dest, u32 address, u32 size, const std::vector& modules); template std::vector read_virtual_vector(u32 address, u32 count, const std::vector& modules) { diff --git a/ccc/print_cpp.cpp b/ccc/print_cpp.cpp index 8f8a4f85..4247ead1 100644 --- a/ccc/print_cpp.cpp +++ b/ccc/print_cpp.cpp @@ -16,7 +16,7 @@ static void print_cpp_variable_name(FILE* out, VariableName& name, u32 flags); static void print_cpp_offset(FILE* out, const ast::Node& node, const CppPrinter& printer); static void indent(FILE* out, s32 level); -void CppPrinter::comment_block_beginning(const fs::path& input_file) { +void CppPrinter::comment_block_beginning(const char* input_file) { if(has_anything_been_printed) { fprintf(out, "\n"); } @@ -29,7 +29,7 @@ void CppPrinter::comment_block_beginning(const fs::path& input_file) { } fprintf(out, "\n// \n"); fprintf(out, "// Input file:\n"); - fprintf(out, "// %s\n", input_file.filename().string().c_str()); + fprintf(out, "// %s\n", input_file); last_wants_spacing = true; has_anything_been_printed = true; @@ -239,14 +239,14 @@ void CppPrinter::ast_node(const ast::Node& node, VariableName& parent_name, s32 switch(node.descriptor) { case ast::ARRAY: { const ast::Array& array = node.as(); - assert(array.element_type.get()); + CCC_ASSERT(array.element_type.get()); name.array_indices.emplace_back(array.element_count); ast_node(*array.element_type.get(), name, indentation_level); break; } case ast::BITFIELD: { const ast::BitField& bit_field = node.as(); - assert(bit_field.underlying_type.get()); + CCC_ASSERT(bit_field.underlying_type.get()); ast_node(*bit_field.underlying_type.get(), name, indentation_level); fprintf(out, " : %d", bit_field.size_bits); break; @@ -414,7 +414,7 @@ void CppPrinter::ast_node(const ast::Node& node, VariableName& parent_name, s32 fprintf(out, " : "); for(size_t i = 0; i < struct_or_union.base_classes.size(); i++) { ast::Node& base_class = *struct_or_union.base_classes[i].get(); - assert(base_class.descriptor == ast::TypeName::DESCRIPTOR); + CCC_ASSERT(base_class.descriptor == ast::TypeName::DESCRIPTOR); print_cpp_offset(out, base_class, *this); if(base_class.access_specifier != ast::AS_PUBLIC) { fprintf(out, "%s ", ast::access_specifier_to_string((ast::AccessSpecifier) base_class.access_specifier)); @@ -435,7 +435,7 @@ void CppPrinter::ast_node(const ast::Node& node, VariableName& parent_name, s32 // Print fields. for(const std::unique_ptr& field : struct_or_union.fields) { - assert(field.get()); + CCC_ASSERT(field.get()); if(access_specifier != field->access_specifier) { indent(out, indentation_level); fprintf(out, "%s:\n", ast::access_specifier_to_string((ast::AccessSpecifier) field->access_specifier)); @@ -473,7 +473,7 @@ void CppPrinter::ast_node(const ast::Node& node, VariableName& parent_name, s32 } case ast::POINTER: { const ast::Pointer& pointer = node.as(); - assert(pointer.value_type.get()); + CCC_ASSERT(pointer.value_type.get()); name.pointer_chars.emplace_back('*'); ast_node(*pointer.value_type.get(), name, indentation_level); print_cpp_variable_name(out, name, INSERT_SPACE_TO_LEFT); @@ -493,7 +493,7 @@ void CppPrinter::ast_node(const ast::Node& node, VariableName& parent_name, s32 } case ast::REFERENCE: { const ast::Reference& reference = node.as(); - assert(reference.value_type.get()); + CCC_ASSERT(reference.value_type.get()); name.pointer_chars.emplace_back('&'); ast_node(*reference.value_type.get(), name, indentation_level); print_cpp_variable_name(out, name, INSERT_SPACE_TO_LEFT); @@ -569,7 +569,7 @@ static void print_cpp_variable_name(FILE* out, VariableName& name, u32 flags) { static void print_cpp_offset(FILE* out, const ast::Node& node, const CppPrinter& printer) { if(printer.print_offsets_and_sizes && node.storage_class != ast::SC_STATIC && node.absolute_offset_bytes > -1) { - assert(printer.digits_for_offset > -1 && printer.digits_for_offset < 100); + CCC_ASSERT(printer.digits_for_offset > -1 && printer.digits_for_offset < 100); fprintf(out, "/* 0x%0*x", printer.digits_for_offset, node.absolute_offset_bytes); if(node.descriptor == ast::BITFIELD) { fprintf(out, ":%d", node.as().bitfield_offset_bits); @@ -588,7 +588,7 @@ void CppPrinter::print_variable_storage_comment(const ast::VariableStorage& stor } } else if(storage.type == ast::VariableStorageType::REGISTER) { const char** name_table = mips::REGISTER_STRING_TABLES[(s32) storage.register_class]; - assert(storage.register_index_relative < mips::REGISTER_STRING_TABLE_SIZES[(s32) storage.register_class]); + CCC_ASSERT(storage.register_index_relative < mips::REGISTER_STRING_TABLE_SIZES[(s32) storage.register_class]); const char* register_name = name_table[storage.register_index_relative]; fprintf(out, "%s %d", register_name, storage.dbx_register_number); } else { diff --git a/ccc/print_cpp.h b/ccc/print_cpp.h index b668d282..5c7f4bd3 100644 --- a/ccc/print_cpp.h +++ b/ccc/print_cpp.h @@ -31,7 +31,7 @@ struct CppPrinter { CppPrinter(FILE* o) : out(o) {} - void comment_block_beginning(const fs::path& input_file); + void comment_block_beginning(const char* input_file); void comment_block_compiler_version_info(const mdebug::SymbolTable& symbol_table); void comment_block_builtin_types(const std::vector>& ast_nodes); void comment_block_file(const char* path); diff --git a/ccc/print_json.cpp b/ccc/print_json.cpp index 887a619d..28760b46 100644 --- a/ccc/print_json.cpp +++ b/ccc/print_json.cpp @@ -25,6 +25,7 @@ struct JsonPrinter { static void print_json_ast_node(JsonPrinter& json, const ast::Node* ptr); static void print_json_variable_storage(JsonPrinter& json, const ast::VariableStorage& storage); +static s64 merge_stabs_type_number_parts(const StabsTypeNumber& number); void print_json(FILE* out, const HighSymbolTable& high, bool print_per_file_types) { JsonPrinter json; @@ -54,7 +55,7 @@ void print_json(FILE* out, const HighSymbolTable& high, bool print_per_file_type } static void print_json_ast_node(JsonPrinter& json, const ast::Node* ptr) { - assert(ptr); + CCC_ASSERT(ptr); const ast::Node& node = *ptr; json.begin_object(); json.string_property("descriptor", ast::node_type_to_string(node)); @@ -85,8 +86,8 @@ static void print_json_ast_node(JsonPrinter& json, const ast::Node* ptr) { if(node.conflict) { json.boolean_property("conflict", true); } - if(node.stabs_type_number != -1) { - json.number_property("stabs_type_number", node.stabs_type_number); + if(node.stabs_type_number.type != -1) { + json.number_property("stabs_type_number", merge_stabs_type_number_parts(node.stabs_type_number)); } if(!node.files.empty()) { json.property("files"); @@ -117,7 +118,7 @@ static void print_json_ast_node(JsonPrinter& json, const ast::Node* ptr) { break; } case ast::DATA: { - verify_not_reached("Tried to print a data node as JSON (which is not supported)!"); + CCC_FATAL("Tried to print a data node as JSON (which is not supported)!"); break; } case ast::FUNCTION_DEFINITION: { @@ -186,7 +187,7 @@ static void print_json_ast_node(JsonPrinter& json, const ast::Node* ptr) { break; } case ast::INITIALIZER_LIST: { - verify_not_reached("Tried to print an initializer list node as JSON (which is not supported)!"); + CCC_FATAL("Tried to print an initializer list node as JSON (which is not supported)!"); break; } case ast::INLINE_ENUM: { @@ -272,7 +273,7 @@ static void print_json_ast_node(JsonPrinter& json, const ast::Node* ptr) { json.property("stabs_type_number_to_deduplicated_type_index"); json.begin_object(); for(const auto [stabs_type_number, deduplicated_type_index] : source_file.stabs_type_number_to_deduplicated_type_index) { - json.number_property(stringf("%d", stabs_type_number).c_str(), deduplicated_type_index); + json.number_property(stringf("%lld", merge_stabs_type_number_parts(stabs_type_number)).c_str(), deduplicated_type_index); } json.end_object(); break; @@ -291,8 +292,8 @@ static void print_json_ast_node(JsonPrinter& json, const ast::Node* ptr) { if(type_name.referenced_file_index > -1) { json.number_property("referenced_file_index", type_name.referenced_file_index); } - if(type_name.referenced_stabs_type_number > -1) { - json.number_property("referenced_stabs_type_number", type_name.referenced_stabs_type_number); + if(type_name.referenced_stabs_type_number.type > -1) { + json.number_property("referenced_stabs_type_number", merge_stabs_type_number_parts(type_name.referenced_stabs_type_number)); } break; } @@ -346,6 +347,14 @@ static void print_json_variable_storage(JsonPrinter& json, const ast::VariableSt json.end_object(); } +static s64 merge_stabs_type_number_parts(const StabsTypeNumber& number) { + if(number.file > -1) { + return number.type | (s64) number.file << 32; + } else { + return number.type; + } +} + void JsonPrinter::begin_object() { if(needs_comma) { fprintf(out, ","); diff --git a/ccc/registers.cpp b/ccc/registers.cpp index 6abe9d9d..65c23dcb 100644 --- a/ccc/registers.cpp +++ b/ccc/registers.cpp @@ -14,12 +14,12 @@ const char** REGISTER_STRING_TABLES[7] = { const u64 REGISTER_STRING_TABLE_SIZES[7] = { 1, - ARRAY_SIZE(GPR_STRINGS), - ARRAY_SIZE(SPECIAL_GPR_STRINGS), - ARRAY_SIZE(SCP_STRINGS), - ARRAY_SIZE(FPR_STRINGS), - ARRAY_SIZE(SPECIAL_FPU_STRINGS), - ARRAY_SIZE(VU0_STRINGS) + CCC_ARRAY_SIZE(GPR_STRINGS), + CCC_ARRAY_SIZE(SPECIAL_GPR_STRINGS), + CCC_ARRAY_SIZE(SCP_STRINGS), + CCC_ARRAY_SIZE(FPR_STRINGS), + CCC_ARRAY_SIZE(SPECIAL_FPU_STRINGS), + CCC_ARRAY_SIZE(VU0_STRINGS) }; const char* REGISTER_CLASSES[7] = { diff --git a/ccc/stabs.cpp b/ccc/stabs.cpp index 835c9976..48b73723 100644 --- a/ccc/stabs.cpp +++ b/ccc/stabs.cpp @@ -1,49 +1,52 @@ #include "stabs.h" -#include -#include - namespace ccc { #define STABS_DEBUG(...) //__VA_ARGS__ #define STABS_DEBUG_PRINTF(...) STABS_DEBUG(printf(__VA_ARGS__);) -static std::vector parse_field_list(const char*& input); -static std::vector parse_member_functions(const char*& input); +static Result> parse_field_list(const char*& input); +static Result> parse_member_functions(const char*& input); static BuiltInClass classify_range(const std::string& low, const std::string& high); static void print_field(const StabsField& field); -std::unique_ptr parse_stabs_type(const char*& input) { +Result> parse_stabs_type(const char*& input) { StabsTypeInfo info; - verify(*input != '\0', ERR_END_OF_SYMBOL); + CCC_CHECK(*input != '\0', "Unexpected end of input."); if(*input == '(') { - // Certain compiler versions provide two numbers surrounded in brackets - // instead of a single number. This isn't too common, so here we use a - // hack to deal with this case. - static bool warned_rich_type_numbers = false; - if(!warned_rich_type_numbers) { - warn( - "This file has rich type numbers, which are not handled well by " - "ccc currently. If you are getting this message for a file you " - "care about, open an issue. The included test file will trigger " - "this warning as it was built using the old homebrew toolchain."); - warned_rich_type_numbers = true; - } + // This file has type numbers made up of two pieces: an include file + // index and a type number. + input++; - s64 file_number = eat_s64_literal(input); - expect_char(input, ',', "weird type number"); - s64 type_number = eat_s64_literal(input); - expect_char(input, ')', "weird type number"); + + std::optional file_number = eat_s32_literal(input); + CCC_CHECK(file_number.has_value(), "Cannot parse file number."); + + CCC_EXPECT_CHAR(input, ',', "Weird type number."); + + std::optional type_number = eat_s32_literal(input); + CCC_CHECK(type_number.has_value(), "Cannot parse type number."); + + CCC_EXPECT_CHAR(input, ')', "Weird type number."); + info.anonymous = false; - info.type_number = type_number | (file_number << 32); + info.type_number.file = *file_number; + info.type_number.type = *type_number; if(*input != '=') { info.has_body = false; return std::make_unique(info); } input++; } else if(*input >= '0' && *input <= '9') { + // This file has type numbers which are just a single number. This is + // the more common case for games. + info.anonymous = false; - info.type_number = eat_s64_literal(input); + + std::optional type_number = eat_s32_literal(input); + CCC_CHECK(type_number.has_value(), "Cannot parse type number."); + info.type_number.type = *type_number; + if(*input != '=') { info.has_body = false; return std::make_unique(info); @@ -53,211 +56,340 @@ std::unique_ptr parse_stabs_type(const char*& input) { info.anonymous = true; } info.has_body = true; - verify(*input != '\0', ERR_END_OF_SYMBOL); + + CCC_CHECK(*input != '\0', "Unexpected end of input."); StabsTypeDescriptor descriptor; if((*input >= '0' && *input <= '9') || *input == '(') { descriptor = StabsTypeDescriptor::TYPE_REFERENCE; } else { - descriptor = (StabsTypeDescriptor) eat_char(input); + std::optional descriptor_char = eat_char(input); + CCC_CHECK(descriptor_char.has_value(), "Cannot parse type descriptor."); + descriptor = (StabsTypeDescriptor) *descriptor_char; } - std::unique_ptr type; + std::unique_ptr out_type; switch(descriptor) { case StabsTypeDescriptor::TYPE_REFERENCE: { // 0..9 auto type_reference = std::make_unique(info); - type_reference->type = parse_stabs_type(input); - type = std::move(type_reference); + + auto type = parse_stabs_type(input); + CCC_RETURN_IF_ERROR(type); + type_reference->type = std::move(*type); + + out_type = std::move(type_reference); break; } case StabsTypeDescriptor::ARRAY: { // a auto array = std::make_unique(info); - array->index_type = parse_stabs_type(input); - array->element_type = parse_stabs_type(input); - type = std::move(array); + + auto index_type = parse_stabs_type(input); + CCC_RETURN_IF_ERROR(index_type); + array->index_type = std::move(*index_type); + + auto element_type = parse_stabs_type(input); + CCC_RETURN_IF_ERROR(element_type); + array->element_type = std::move(*element_type); + + out_type = std::move(array); break; } case StabsTypeDescriptor::ENUM: { // e auto enum_type = std::make_unique(info); STABS_DEBUG_PRINTF("enum {\n"); while(*input != ';') { - std::string name = eat_dodgy_stabs_identifier(input); - expect_char(input, ':', "identifier"); - s64 value = eat_s64_literal(input); - enum_type->fields.emplace_back(value, name); - verify(eat_char(input) == ',', - "Expecting ',' while parsing enum, got '%c' (%02hhx)", - *input, *input); + std::optional name = eat_dodgy_stabs_identifier(input); + CCC_CHECK(name.has_value(), "Cannot parse enum field name."); + + CCC_EXPECT_CHAR(input, ':', "enum"); + + std::optional value = eat_s32_literal(input); + CCC_CHECK(value.has_value(), "Cannot parse enum value."); + + enum_type->fields.emplace_back(*value, std::move(*name)); + + CCC_EXPECT_CHAR(input, ',', "enum"); } input++; STABS_DEBUG_PRINTF("}\n"); - type = std::move(enum_type); + out_type = std::move(enum_type); break; } case StabsTypeDescriptor::FUNCTION: { // f auto function = std::make_unique(info); - function->return_type = parse_stabs_type(input); - type = std::move(function); + + auto return_type = parse_stabs_type(input); + CCC_RETURN_IF_ERROR(return_type); + function->return_type = std::move(*return_type); + + out_type = std::move(function); break; } case StabsTypeDescriptor::VOLATILE_QUALIFIER: { // k auto volatile_qualifier = std::make_unique(info); - volatile_qualifier->type = parse_stabs_type(input); - type = std::move(volatile_qualifier); + + auto type = parse_stabs_type(input); + CCC_RETURN_IF_ERROR(type); + volatile_qualifier->type = std::move(*type); + + out_type = std::move(volatile_qualifier); break; } case StabsTypeDescriptor::CONST_QUALIFIER: { // k auto const_qualifier = std::make_unique(info); - const_qualifier->type = parse_stabs_type(input); - type = std::move(const_qualifier); + + auto type = parse_stabs_type(input); + CCC_RETURN_IF_ERROR(type); + const_qualifier->type = std::move(*type); + + out_type = std::move(const_qualifier); break; } case StabsTypeDescriptor::RANGE: { // r auto range = std::make_unique(info); - range->type = parse_stabs_type(input); - expect_char(input, ';', "range type descriptor"); - std::string low = eat_dodgy_stabs_identifier(input); - expect_char(input, ';', "low range value"); - std::string high = eat_dodgy_stabs_identifier(input); - expect_char(input, ';', "high range value"); - range->low_maybe_wrong = strtoll(low.c_str(), nullptr, 10); - range->high_maybe_wrong = strtoll(high.c_str(), nullptr, 10); - range->range_class = classify_range(low, high); - type = std::move(range); + + auto type = parse_stabs_type(input); + CCC_RETURN_IF_ERROR(type); + range->type = std::move(*type); + + CCC_EXPECT_CHAR(input, ';', "range type descriptor"); + + std::optional low = eat_dodgy_stabs_identifier(input); + CCC_CHECK(low.has_value(), "Cannot parse low part of range."); + CCC_EXPECT_CHAR(input, ';', "low range value"); + + std::optional high = eat_dodgy_stabs_identifier(input); + CCC_CHECK(high.has_value(), "Cannot parse high part of range."); + CCC_EXPECT_CHAR(input, ';', "high range value"); + + range->low_maybe_wrong = strtoll(low->c_str(), nullptr, 10); + range->high_maybe_wrong = strtoll(high->c_str(), nullptr, 10); + range->range_class = classify_range(*low, *high); + out_type = std::move(range); break; } case StabsTypeDescriptor::STRUCT: { // s auto struct_type = std::make_unique(info); STABS_DEBUG_PRINTF("struct {\n"); - struct_type->size = eat_s64_literal(input); + + std::optional struct_size = eat_s64_literal(input); + CCC_CHECK(struct_size.has_value(), "Cannot parse struct size."); + struct_type->size = *struct_size; + if(*input == '!') { input++; - s64 base_class_count = eat_s64_literal(input); - expect_char(input, ',', "base class section"); - for(s64 i = 0; i < base_class_count; i++) { + std::optional base_class_count = eat_s32_literal(input); + CCC_CHECK(base_class_count.has_value(), "Cannot parse base class count."); + CCC_EXPECT_CHAR(input, ',', "base class section"); + for(s64 i = 0; i < *base_class_count; i++) { StabsBaseClass base_class; eat_char(input); - base_class.visibility = (StabsFieldVisibility) eat_char(input); - base_class.offset = eat_s64_literal(input); - expect_char(input, ',', "base class section"); - base_class.type = parse_stabs_type(input); - expect_char(input, ';', "base class section"); + + std::optional visibility = eat_char(input); + CCC_CHECK(visibility.has_value(), "Cannot parse base class visibility."); + base_class.visibility = (StabsFieldVisibility) *visibility; + + std::optional offset = eat_s32_literal(input); + CCC_CHECK(offset.has_value(), "Cannot parse base class offset."); + base_class.offset = (s32) *offset; + + CCC_EXPECT_CHAR(input, ',', "base class section"); + + auto base_class_type = parse_stabs_type(input); + CCC_RETURN_IF_ERROR(base_class_type); + base_class.type = std::move(*base_class_type); + + CCC_EXPECT_CHAR(input, ';', "base class section"); struct_type->base_classes.emplace_back(std::move(base_class)); } } - struct_type->fields = parse_field_list(input); - struct_type->member_functions = parse_member_functions(input); + + auto fields = parse_field_list(input); + CCC_RETURN_IF_ERROR(fields); + struct_type->fields = std::move(*fields); + + auto member_functions = parse_member_functions(input); + CCC_RETURN_IF_ERROR(member_functions); + struct_type->member_functions = std::move(*member_functions); + STABS_DEBUG_PRINTF("}\n"); - type = std::move(struct_type); + out_type = std::move(struct_type); break; } case StabsTypeDescriptor::UNION: { // u auto union_type = std::make_unique(info); STABS_DEBUG_PRINTF("union {\n"); - union_type->size = eat_s64_literal(input); - union_type->fields = parse_field_list(input); - union_type->member_functions = parse_member_functions(input); + + std::optional union_size = eat_s64_literal(input); + CCC_CHECK(union_size.has_value(), "Cannot parse struct size."); + union_type->size = *union_size; + + auto fields = parse_field_list(input); + CCC_RETURN_IF_ERROR(fields); + union_type->fields = std::move(*fields); + + auto member_functions = parse_member_functions(input); + CCC_RETURN_IF_ERROR(member_functions); + union_type->member_functions = std::move(*member_functions); + STABS_DEBUG_PRINTF("}\n"); - type = std::move(union_type); + out_type = std::move(union_type); break; } case StabsTypeDescriptor::CROSS_REFERENCE: { // x auto cross_reference = std::make_unique(info); - switch(eat_char(input)) { + + std::optional c = eat_char(input); + CCC_CHECK(c.has_value(), "Cannot parse cross reference type."); + + switch(*c) { case 'e': cross_reference->type = StabsCrossReferenceType::ENUM; break; case 's': cross_reference->type = StabsCrossReferenceType::STRUCT; break; case 'u': cross_reference->type = StabsCrossReferenceType::UNION; break; break; default: - verify_not_reached("Invalid cross reference type '%c'.", - cross_reference->type); + return CCC_FAILURE("invalid cross reference type '%c'", cross_reference->type); } - cross_reference->identifier = eat_dodgy_stabs_identifier(input); + + std::optional identifier = eat_dodgy_stabs_identifier(input); + CCC_CHECK(identifier.has_value(), "Cannot parse cross reference identifier."); + cross_reference->identifier = std::move(*identifier); + cross_reference->name = cross_reference->identifier; - expect_char(input, ':', "cross reference"); - type = std::move(cross_reference); + CCC_EXPECT_CHAR(input, ':', "cross reference"); + out_type = std::move(cross_reference); break; } case StabsTypeDescriptor::FLOATING_POINT_BUILTIN: { auto fp_builtin = std::make_unique(info); - fp_builtin->fpclass = (s32) eat_s64_literal(input); - expect_char(input, ';', "floating point builtin"); - fp_builtin->bytes = (s32) eat_s64_literal(input); - expect_char(input, ';', "floating point builtin"); - type = std::move(fp_builtin); + + std::optional fpclass = eat_s32_literal(input); + CCC_CHECK(fpclass.has_value(), "Cannot parse floating point built-in class."); + fp_builtin->fpclass = *fpclass; + + CCC_EXPECT_CHAR(input, ';', "floating point builtin"); + + std::optional bytes = eat_s32_literal(input); + CCC_CHECK(bytes.has_value(), "Cannot parse floating point built-in."); + fp_builtin->bytes = *bytes; + + CCC_EXPECT_CHAR(input, ';', "floating point builtin"); + out_type = std::move(fp_builtin); break; } case StabsTypeDescriptor::METHOD: { // # auto method = std::make_unique(info); if(*input == '#') { input++; - method->return_type = parse_stabs_type(input); + + auto return_type = parse_stabs_type(input); + CCC_RETURN_IF_ERROR(return_type); + method->return_type = std::move(*return_type); + if(*input == ';') { input++; } } else { - method->class_type = parse_stabs_type(input); - expect_char(input, ',', "method"); - method->return_type = parse_stabs_type(input); + auto class_type = parse_stabs_type(input); + CCC_RETURN_IF_ERROR(class_type); + method->class_type = std::move(*class_type); + + CCC_EXPECT_CHAR(input, ',', "method"); + + auto return_type = parse_stabs_type(input); + CCC_RETURN_IF_ERROR(return_type); + method->return_type = std::move(*return_type); + while(*input != '\0') { if(*input == ';') { input++; break; } - expect_char(input, ',', "method"); - method->parameter_types.emplace_back(parse_stabs_type(input)); + CCC_EXPECT_CHAR(input, ',', "method"); + + auto parameter_type = parse_stabs_type(input); + CCC_RETURN_IF_ERROR(parameter_type); + method->parameter_types.emplace_back(std::move(*parameter_type)); } } - type = std::move(method); + out_type = std::move(method); break; } case StabsTypeDescriptor::REFERENCE: { // & auto reference = std::make_unique(info); - reference->value_type = parse_stabs_type(input); - type = std::move(reference); + + auto value_type = parse_stabs_type(input); + CCC_RETURN_IF_ERROR(value_type); + reference->value_type = std::move(*value_type); + + out_type = std::move(reference); break; } case StabsTypeDescriptor::POINTER: { // * auto pointer = std::make_unique(info); - pointer->value_type = parse_stabs_type(input); - type = std::move(pointer); + + auto value_type = parse_stabs_type(input); + CCC_RETURN_IF_ERROR(value_type); + pointer->value_type = std::move(*value_type); + + out_type = std::move(pointer); break; } case StabsTypeDescriptor::TYPE_ATTRIBUTE: { // @ if((*input >= '0' && *input <= '9') || *input == '(') { auto member_pointer = std::make_unique(info); - member_pointer->class_type = parse_stabs_type(input); - expect_char(input, ',', "pointer to non-static data member"); - member_pointer->member_type = parse_stabs_type(input); - type = std::move(member_pointer); + + auto class_type = parse_stabs_type(input); + CCC_RETURN_IF_ERROR(class_type); + member_pointer->class_type = std::move(*class_type); + + CCC_EXPECT_CHAR(input, ',', "pointer to non-static data member"); + + auto member_type = parse_stabs_type(input); + CCC_RETURN_IF_ERROR(member_type); + member_pointer->member_type = std::move(*member_type); + + out_type = std::move(member_pointer); } else { auto type_attribute = std::make_unique(info); - verify(*input == 's', "Weird value following '@' type descriptor. Please submit a bug report!"); + CCC_CHECK(*input == 's', "Weird value following '@' type descriptor."); input++; - type_attribute->size_bits = eat_s64_literal(input); - expect_char(input, ';', "type attribute"); - type_attribute->type = parse_stabs_type(input); - type = std::move(type_attribute); + + std::optional size_bits = eat_s64_literal(input); + CCC_CHECK(size_bits.has_value(), "Cannot parse type attribute.") + type_attribute->size_bits = *size_bits; + CCC_EXPECT_CHAR(input, ';', "type attribute"); + + auto type = parse_stabs_type(input); + CCC_RETURN_IF_ERROR(type); + type_attribute->type = std::move(*type); + + out_type = std::move(type_attribute); } break; } case StabsTypeDescriptor::BUILTIN: { // - auto built_in = std::make_unique(info); - built_in->type_id = eat_s64_literal(input); - type = std::move(built_in); + std::optional type_id = eat_s64_literal(input); + CCC_CHECK(type_id.has_value(), "Cannot parse built-in."); + built_in->type_id = *type_id; + out_type = std::move(built_in); break; } default: { - verify_not_reached("Invalid type descriptor '%c' (%02x). Please file a bug report!", + return CCC_FAILURE( + "Invalid type descriptor '%c' (%02x). Please file a bug report!", (u32) descriptor, (u32) descriptor); } } - return type; + + return out_type; } -static std::vector parse_field_list(const char*& input) { +static Result> parse_field_list(const char*& input) { std::vector fields; + while(*input != '\0') { if(*input == ';') { input++; @@ -266,11 +398,19 @@ static std::vector parse_field_list(const char*& input) { const char* before_field = input; StabsField field; - field.name = eat_dodgy_stabs_identifier(input); - expect_char(input, ':', "identifier"); + + std::optional name = eat_dodgy_stabs_identifier(input); + CCC_CHECK(name.has_value(), "Cannot parse field name."); + field.name = std::move(*name); + + CCC_EXPECT_CHAR(input, ':', "identifier"); if(*input == '/') { input++; - field.visibility = (StabsFieldVisibility) eat_char(input); + + std::optional visibility = eat_char(input); + CCC_CHECK(visibility.has_value(), "Cannot parse field visibility."); + field.visibility = (StabsFieldVisibility) *visibility; + switch(field.visibility) { case StabsFieldVisibility::NONE: case StabsFieldVisibility::PRIVATE: @@ -279,47 +419,67 @@ static std::vector parse_field_list(const char*& input) { case StabsFieldVisibility::PUBLIC_OPTIMIZED_OUT: break; default: - verify_not_reached("Invalid field visibility."); + return CCC_FAILURE("invalid field visibility"); } } if(*input == ':') { input = before_field; break; } - field.type = parse_stabs_type(input); + auto type = parse_stabs_type(input); + CCC_RETURN_IF_ERROR(type); + field.type = std::move(*type); + if(field.name.size() >= 1 && field.name[0] == '$') { // Virtual table pointers. - expect_char(input, ',', "field type"); - field.offset_bits = eat_s64_literal(input); - expect_char(input, ';', "field offset"); + CCC_EXPECT_CHAR(input, ',', "field type"); + + std::optional offset_bits = eat_s32_literal(input); + CCC_CHECK(offset_bits.has_value(), "Cannot parse field offset."); + field.offset_bits = *offset_bits; + + CCC_EXPECT_CHAR(input, ';', "field offset"); } else if(*input == ':') { input++; field.is_static = true; - field.type_name = eat_dodgy_stabs_identifier(input); - expect_char(input, ';', "identifier"); + + std::optional type_name = eat_dodgy_stabs_identifier(input); + CCC_CHECK(type_name.has_value(), "Cannot parse static field type name."); + field.type_name = std::move(*type_name); + + CCC_EXPECT_CHAR(input, ';', "identifier"); } else if(*input == ',') { input++; - field.offset_bits = eat_s64_literal(input); - expect_char(input, ',', "field offset"); - field.size_bits = eat_s64_literal(input); - expect_char(input, ';', "field size"); + + std::optional offset_bits = eat_s32_literal(input); + CCC_CHECK(offset_bits.has_value(), "Cannot parse field offset."); + field.offset_bits = *offset_bits; + + CCC_EXPECT_CHAR(input, ',', "field offset"); + + std::optional size_bits = eat_s32_literal(input); + CCC_CHECK(size_bits.has_value(), "Cannot parse field size."); + field.size_bits = *size_bits; + + CCC_EXPECT_CHAR(input, ';', "field size"); } else { - verify_not_reached("Expected ':' or ',', got '%c' (%hhx).", *input, *input); + return CCC_FAILURE("Expected ':' or ',', got '%c' (%hhx).", *input, *input); } STABS_DEBUG(print_field(field);) fields.emplace_back(std::move(field)); } + return fields; } -static std::vector parse_member_functions(const char*& input) { +static Result> parse_member_functions(const char*& input) { // Check for if the next character is from an enclosing field list. If this // is the case, the next character will be ',' for normal fields and ':' for // static fields (see above). if(*input == ',' || *input == ':') { - return {}; + return std::vector(); } std::vector member_functions; @@ -330,9 +490,13 @@ static std::vector parse_member_functions(const char*& i } const char* before = input; StabsMemberFunctionSet member_function_set; - member_function_set.name = eat_stabs_identifier(input); - expect_char(input, ':', "member function"); - expect_char(input, ':', "member function"); + + std::optional name = eat_stabs_identifier(input); + CCC_CHECK(name.has_value(), "Cannot parse member function name."); + member_function_set.name = std::move(*name); + + CCC_EXPECT_CHAR(input, ':', "member function"); + CCC_EXPECT_CHAR(input, ':', "member function"); while(*input != '\0') { if(*input == ';') { input++; @@ -340,12 +504,19 @@ static std::vector parse_member_functions(const char*& i } StabsMemberFunction function; - function.type = parse_stabs_type(input); - expect_char(input, ':', "member function"); + auto type = parse_stabs_type(input); + CCC_RETURN_IF_ERROR(type); + function.type = std::move(*type); + + CCC_EXPECT_CHAR(input, ':', "member function"); eat_dodgy_stabs_identifier(input); - expect_char(input, ';', "member function"); - function.visibility = (StabsFieldVisibility) eat_char(input); + CCC_EXPECT_CHAR(input, ';', "member function"); + + std::optional visibility = eat_char(input); + CCC_CHECK(visibility.has_value(), "Cannot parse member function visibility."); + function.visibility = (StabsFieldVisibility) *visibility; + switch(function.visibility) { case StabsFieldVisibility::PRIVATE: case StabsFieldVisibility::PROTECTED: @@ -353,9 +524,12 @@ static std::vector parse_member_functions(const char*& i case StabsFieldVisibility::PUBLIC_OPTIMIZED_OUT: break; default: - verify_not_reached("Invalid visibility for member function."); + return CCC_FAILURE("Invalid visibility for member function."); } - switch(eat_char(input)) { + + std::optional modifiers = eat_char(input); + CCC_CHECK(modifiers.has_value(), "Cannot parse member function modifiers."); + switch(*modifiers) { case 'A': function.is_const = false; function.is_volatile = false; @@ -376,24 +550,36 @@ static std::vector parse_member_functions(const char*& i case '.': break; default: - verify_not_reached("Invalid member function modifiers."); + return CCC_FAILURE("Invalid member function modifiers."); } - switch(eat_char(input)) { - case '.': // normal member function + + std::optional flag = eat_char(input); + CCC_CHECK(flag.has_value(), "Cannot parse member function type."); + switch(*flag) { + case '.': { // normal member function function.modifier = MemberFunctionModifier::NONE; break; - case '?': // static member function + } + case '?': { // static member function function.modifier = MemberFunctionModifier::STATIC; break; - case '*': // virtual member function - function.vtable_index = eat_s64_literal(input); - expect_char(input, ';', "virtual member function"); - parse_stabs_type(input); - expect_char(input, ';', "virtual member function"); + } + case '*': { // virtual member function + std::optional vtable_index = eat_s32_literal(input); + CCC_CHECK(vtable_index.has_value(), "Cannot parse vtable index."); + function.vtable_index = *vtable_index; + + CCC_EXPECT_CHAR(input, ';', "virtual member function"); + + auto virtual_type = parse_stabs_type(input); + CCC_RETURN_IF_ERROR(virtual_type); + + CCC_EXPECT_CHAR(input, ';', "virtual member function"); function.modifier = MemberFunctionModifier::VIRTUAL; break; + } default: - verify_not_reached("Invalid member function type."); + return CCC_FAILURE("Invalid member function type."); } member_function_set.overloads.emplace_back(std::move(function)); } @@ -458,28 +644,34 @@ static BuiltInClass classify_range(const std::string& low, const std::string& hi return BuiltInClass::UNKNOWN_PROBABLY_ARRAY; } -char eat_char(const char*& input) { - verify(*input != '\0', ERR_END_OF_SYMBOL); +std::optional eat_char(const char*& input) { + if(*input == '\0') { + return std::nullopt; + } return *(input++); } -s64 eat_s64_literal(const char*& input) { - std::string number; - if(*input == '-') { - number = "-"; - input++; +std::optional eat_s32_literal(const char*& input) { + char* end; + s64 value = strtoll(input, &end, 10); + if(end == input) { + return std::nullopt; } - for(; *input != '\0'; input++) { - if(*input < '0' || *input > '9') { - break; - } - number += *input; + input = end; + return (s32) value; +} + +std::optional eat_s64_literal(const char*& input) { + char* end; + s64 value = strtoll(input, &end, 10); + if(end == input) { + return std::nullopt; } - verify(number.size() > 0, "Unexpected '%c' (%02hhx).", *input, *input); - return strtoll(number.c_str(), nullptr, 10); + input = end; + return value; } -std::string eat_stabs_identifier(const char*& input) { +std::optional eat_stabs_identifier(const char*& input) { std::string identifier; bool first = true; for(; *input != '\0'; input++) { @@ -493,12 +685,12 @@ std::string eat_stabs_identifier(const char*& input) { } first = false; } - verify_not_reached(ERR_END_OF_SYMBOL); + return std::nullopt; } // The complexity here is because the input may contain an unescaped namespace // separator '::' even if the field terminator is supposed to be a colon. -std::string eat_dodgy_stabs_identifier(const char*& input) { +std::optional eat_dodgy_stabs_identifier(const char*& input) { std::string identifier; bool first = true; s32 template_depth = 0; @@ -519,14 +711,7 @@ std::string eat_dodgy_stabs_identifier(const char*& input) { } first = false; } - verify_not_reached(ERR_END_OF_SYMBOL); -} - - -void expect_char(const char*& input, char expected, const char* subject) { - verify(*input != '\0', ERR_END_OF_SYMBOL); - char val = *(input++); - verify(val == expected, "Expected '%c' in %s, got '%c'.", expected, subject, val); + return std::nullopt; } static void print_field(const StabsField& field) { diff --git a/ccc/stabs.h b/ccc/stabs.h index c884c995..4d6ddd5a 100644 --- a/ccc/stabs.h +++ b/ccc/stabs.h @@ -6,9 +6,6 @@ namespace ccc { -static const char* ERR_END_OF_SYMBOL = - "Unexpected end of input while parsing symbol."; - enum class StabsTypeDescriptor : u8 { TYPE_REFERENCE = 0xef, // '0'..'9','(' ARRAY = 'a', @@ -43,11 +40,24 @@ struct StabsBaseClass; struct StabsField; struct StabsMemberFunctionSet; +// These are used to reference STABS types from other types within a single +// translation unit. For most games these will just be a single number, the type +// number. In some cases, for example with the homebrew SDK, type numbers are a +// pair of two numbers surrounded by round brackets e.g. (1,23) where the first +// number is the index of the include file to use (includes are listed for each +// translation unit separately), and the second number is the type number. +struct StabsTypeNumber { + s32 file = -1; + s32 type = -1; + + friend auto operator<=>(const StabsTypeNumber& lhs, const StabsTypeNumber& rhs) = default; +}; + // Fields to be filled in before the per-descriptor code that actually allocates // the stab runs. struct StabsTypeInfo { bool anonymous = false; - s64 type_number = -1; + StabsTypeNumber type_number; bool has_body = false; }; @@ -67,12 +77,18 @@ struct StabsType : StabsTypeInfo { virtual ~StabsType() {} template - SubType& as() { assert(descriptor == SubType::DESCRIPTOR); return *static_cast(this); } + SubType& as() { + CCC_ASSERT(descriptor == SubType::DESCRIPTOR); + return *static_cast(this); + } template - const SubType& as() const { assert(descriptor == SubType::DESCRIPTOR); return *static_cast(this); } + const SubType& as() const { + CCC_ASSERT(descriptor == SubType::DESCRIPTOR); + return *static_cast(this); + } - virtual void enumerate_numbered_types(std::map& output) const { + virtual void enumerate_numbered_types(std::map& output) const { if(!anonymous && has_body) { output.emplace(type_number, this); } @@ -129,7 +145,7 @@ struct StabsTypeReferenceType : StabsType { StabsTypeReferenceType(const StabsTypeInfo& i) : StabsType(i, DESCRIPTOR) {} static const constexpr StabsTypeDescriptor DESCRIPTOR = StabsTypeDescriptor::TYPE_REFERENCE; - void enumerate_numbered_types(std::map& output) const override { + void enumerate_numbered_types(std::map& output) const override { StabsType::enumerate_numbered_types(output); type->enumerate_numbered_types(output); } @@ -142,7 +158,7 @@ struct StabsArrayType : StabsType { StabsArrayType(const StabsTypeInfo& i) : StabsType(i, DESCRIPTOR) {} static const constexpr StabsTypeDescriptor DESCRIPTOR = StabsTypeDescriptor::ARRAY; - void enumerate_numbered_types(std::map& output) const override { + void enumerate_numbered_types(std::map& output) const override { StabsType::enumerate_numbered_types(output); index_type->enumerate_numbered_types(output); element_type->enumerate_numbered_types(output); @@ -162,7 +178,7 @@ struct StabsFunctionType : StabsType { StabsFunctionType(const StabsTypeInfo& i) : StabsType(i, DESCRIPTOR) {} static const constexpr StabsTypeDescriptor DESCRIPTOR = StabsTypeDescriptor::FUNCTION; - void enumerate_numbered_types(std::map& output) const override { + void enumerate_numbered_types(std::map& output) const override { StabsType::enumerate_numbered_types(output); return_type->enumerate_numbered_types(output); } @@ -174,7 +190,7 @@ struct StabsVolatileQualifierType : StabsType { StabsVolatileQualifierType(const StabsTypeInfo& i) : StabsType(i, DESCRIPTOR) {} static const constexpr StabsTypeDescriptor DESCRIPTOR = StabsTypeDescriptor::VOLATILE_QUALIFIER; - void enumerate_numbered_types(std::map& output) const override { + void enumerate_numbered_types(std::map& output) const override { StabsType::enumerate_numbered_types(output); type->enumerate_numbered_types(output); } @@ -186,7 +202,7 @@ struct StabsConstQualifierType : StabsType { StabsConstQualifierType(const StabsTypeInfo& i) : StabsType(i, DESCRIPTOR) {} static const constexpr StabsTypeDescriptor DESCRIPTOR = StabsTypeDescriptor::CONST_QUALIFIER; - void enumerate_numbered_types(std::map& output) const override { + void enumerate_numbered_types(std::map& output) const override { StabsType::enumerate_numbered_types(output); type->enumerate_numbered_types(output); } @@ -201,7 +217,7 @@ struct StabsRangeType : StabsType { StabsRangeType(const StabsTypeInfo& i) : StabsType(i, DESCRIPTOR) {} static const constexpr StabsTypeDescriptor DESCRIPTOR = StabsTypeDescriptor::RANGE; - void enumerate_numbered_types(std::map& output) const override { + void enumerate_numbered_types(std::map& output) const override { StabsType::enumerate_numbered_types(output); type->enumerate_numbered_types(output); } @@ -215,7 +231,7 @@ struct StabsStructOrUnionType : StabsType { StabsStructOrUnionType(const StabsTypeInfo& i, StabsTypeDescriptor d) : StabsType(i, d) {} - void enumerate_numbered_types(std::map& output) const override { + void enumerate_numbered_types(std::map& output) const override { StabsType::enumerate_numbered_types(output); for(const StabsBaseClass& base_class : base_classes) { base_class.type->enumerate_numbered_types(output); @@ -270,7 +286,7 @@ struct StabsMethodType : StabsType { StabsMethodType(const StabsTypeInfo& i) : StabsType(i, DESCRIPTOR) {} static const constexpr StabsTypeDescriptor DESCRIPTOR = StabsTypeDescriptor::METHOD; - void enumerate_numbered_types(std::map& output) const override { + void enumerate_numbered_types(std::map& output) const override { StabsType::enumerate_numbered_types(output); return_type->enumerate_numbered_types(output); if(class_type.has_value()) { @@ -288,7 +304,7 @@ struct StabsReferenceType : StabsType { StabsReferenceType(const StabsTypeInfo& i) : StabsType(i, DESCRIPTOR) {} static const constexpr StabsTypeDescriptor DESCRIPTOR = StabsTypeDescriptor::REFERENCE; - void enumerate_numbered_types(std::map& output) const override { + void enumerate_numbered_types(std::map& output) const override { StabsType::enumerate_numbered_types(output); value_type->enumerate_numbered_types(output); } @@ -300,7 +316,7 @@ struct StabsPointerType : StabsType { StabsPointerType(const StabsTypeInfo& i) : StabsType(i, DESCRIPTOR) {} static const constexpr StabsTypeDescriptor DESCRIPTOR = StabsTypeDescriptor::POINTER; - void enumerate_numbered_types(std::map& output) const override { + void enumerate_numbered_types(std::map& output) const override { StabsType::enumerate_numbered_types(output); value_type->enumerate_numbered_types(output); } @@ -313,7 +329,7 @@ struct StabsSizeTypeAttributeType : StabsType { StabsSizeTypeAttributeType(const StabsTypeInfo& i) : StabsType(i, DESCRIPTOR) {} static const constexpr StabsTypeDescriptor DESCRIPTOR = StabsTypeDescriptor::TYPE_ATTRIBUTE; - void enumerate_numbered_types(std::map& output) const override { + void enumerate_numbered_types(std::map& output) const override { StabsType::enumerate_numbered_types(output); type->enumerate_numbered_types(output); } @@ -326,7 +342,7 @@ struct StabsPointerToNonStaticDataMember : StabsType { StabsPointerToNonStaticDataMember(const StabsTypeInfo& i) : StabsType(i, DESCRIPTOR) {} static const constexpr StabsTypeDescriptor DESCRIPTOR = StabsTypeDescriptor::POINTER_TO_NON_STATIC_MEMBER; - void enumerate_numbered_types(std::map& output) const override { + void enumerate_numbered_types(std::map& output) const override { StabsType::enumerate_numbered_types(output); class_type->enumerate_numbered_types(output); member_type->enumerate_numbered_types(output); @@ -340,12 +356,12 @@ struct StabsBuiltInType : StabsType { static const constexpr StabsTypeDescriptor DESCRIPTOR = StabsTypeDescriptor::BUILTIN; }; -std::unique_ptr parse_stabs_type(const char*& input); -char eat_char(const char*& input); -s64 eat_s64_literal(const char*& input); -std::string eat_stabs_identifier(const char*& input); -std::string eat_dodgy_stabs_identifier(const char*& input); -void expect_char(const char*& input, char expected, const char* subject); +Result> parse_stabs_type(const char*& input); +std::optional eat_char(const char*& input); +std::optional eat_s32_literal(const char*& input); +std::optional eat_s64_literal(const char*& input); +std::optional eat_stabs_identifier(const char*& input); +std::optional eat_dodgy_stabs_identifier(const char*& input); const char* builtin_class_to_string(BuiltInClass bclass); s32 builtin_class_size(BuiltInClass bclass); const char* stabs_field_visibility_to_string(StabsFieldVisibility visibility); diff --git a/ccc/stabs_to_ast.cpp b/ccc/stabs_to_ast.cpp new file mode 100644 index 00000000..1c39d717 --- /dev/null +++ b/ccc/stabs_to_ast.cpp @@ -0,0 +1,446 @@ +#include "stabs_to_ast.h" + +#define AST_DEBUG(...) //__VA_ARGS__ +#define AST_DEBUG_PRINTF(...) AST_DEBUG(printf(__VA_ARGS__);) + +namespace ccc { + +static bool detect_bitfield(const StabsField& field, const StabsToAstState& state); + +Result> stabs_symbol_to_ast(const ParsedSymbol& symbol, const StabsToAstState& state) { + AST_DEBUG_PRINTF("ANALYSING %s\n", symbol.raw->string); + auto node = stabs_type_to_ast_and_handle_errors(*symbol.name_colon_type.type.get(), state, 0, 0, false, false); + node->name = (symbol.name_colon_type.name == " ") ? "" : symbol.name_colon_type.name; + node->symbol = &symbol; + if(symbol.name_colon_type.descriptor == StabsSymbolDescriptor::TYPE_NAME) { + node->storage_class = ast::SC_TYPEDEF; + } + return node; +} + +std::unique_ptr stabs_type_to_ast_and_handle_errors(const StabsType& type, const StabsToAstState& state, s32 abs_parent_offset_bytes, s32 depth, bool substitute_type_name, bool force_substitute) { + Result> node = stabs_type_to_ast(type, state, abs_parent_offset_bytes, depth, substitute_type_name, false); + if(!node.success()) { + auto error = std::make_unique(); + error->source = ast::TypeNameSource::ERROR; + error->type_name = std::string("/* ERROR: ") + node.error().message + " */"; + return std::unique_ptr(std::move(error)); + } + return std::move(*node); +} + +Result> stabs_type_to_ast(const StabsType& type, const StabsToAstState& state, s32 abs_parent_offset_bytes, s32 depth, bool substitute_type_name, bool force_substitute) { + AST_DEBUG_PRINTF("%-*stype desc=%hhx '%c' num=%d name=%s\n", + depth * 4, "", + (u8) type.descriptor, + isprint((u8) type.descriptor) ? (u8) type.descriptor : '!', + type.type_number, + type.name.has_value() ? type.name->c_str() : ""); + + CCC_CHECK(depth <= 200, "Call depth greater than 200 in stabs_type_to_ast, probably infinite recursion.") + + // This makes sure that types are replaced with their type name in cases + // where that would be more appropriate. + if(type.name.has_value()) { + bool try_substitute = depth > 0 && (type.is_root + || type.descriptor == StabsTypeDescriptor::RANGE + || type.descriptor == StabsTypeDescriptor::BUILTIN); + bool is_name_empty = type.name == "" || type.name == " "; + // Unfortunately, a common case seems to be that __builtin_va_list is + // indistinguishable from void*, so we prevent it from being output to + // avoid confusion. + bool is_va_list = type.name == "__builtin_va_list"; + if((substitute_type_name || try_substitute) && !is_name_empty && !is_va_list) { + auto type_name = std::make_unique(); + type_name->source = ast::TypeNameSource::REFERENCE; + type_name->type_name = *type.name; + type_name->referenced_file_index = state.file_index; + type_name->referenced_stabs_type_number = type.type_number; + return std::unique_ptr(std::move(type_name)); + } + } + + // This prevents infinite recursion when an automatically generated member + // function references an unnamed type. + if(force_substitute) { + const char* type_string = nullptr; + if(type.descriptor == StabsTypeDescriptor::ENUM) type_string = "__unnamed_enum"; + if(type.descriptor == StabsTypeDescriptor::STRUCT) type_string = "__unnamed_struct"; + if(type.descriptor == StabsTypeDescriptor::UNION) type_string = "__unnamed_union"; + if(type_string) { + auto type_name = std::make_unique(); + type_name->source = ast::TypeNameSource::REFERENCE; + type_name->type_name = type_string; + type_name->referenced_file_index = state.file_index; + type_name->referenced_stabs_type_number = type.type_number; + return std::unique_ptr(std::move(type_name)); + } + } + + if(!type.has_body) { + // The definition of the type has been defined previously, so we have to + // look it up by its type number. + auto stabs_type = state.stabs_types->find(type.type_number); + if(type.anonymous || stabs_type == state.stabs_types->end()) { + auto type_name = std::make_unique(); + type_name->source = ast::TypeNameSource::ERROR; + type_name->type_name = stringf("CCC_BADTYPELOOKUP(%d)", type.type_number); + return std::unique_ptr(std::move(type_name)); + } + return stabs_type_to_ast(*stabs_type->second, state, abs_parent_offset_bytes, depth + 1, substitute_type_name, force_substitute); + } + + std::unique_ptr result; + + switch(type.descriptor) { + case StabsTypeDescriptor::TYPE_REFERENCE: { + const auto& stabs_type_ref = type.as(); + if(type.anonymous | stabs_type_ref.type->anonymous || stabs_type_ref.type->type_number != type.type_number) { + auto node = stabs_type_to_ast(*stabs_type_ref.type, state, abs_parent_offset_bytes, depth + 1, substitute_type_name, force_substitute); + CCC_RETURN_IF_ERROR(node); + result = std::move(*node); + } else { + // I still don't know why in STABS void is a reference to + // itself, maybe because I'm not a philosopher. + auto type_name = std::make_unique(); + type_name->source = ast::TypeNameSource::REFERENCE; + type_name->type_name = "void"; + result = std::move(type_name); + } + break; + } + case StabsTypeDescriptor::ARRAY: { + auto array = std::make_unique(); + const auto& stabs_array = type.as(); + + auto element_node = stabs_type_to_ast(*stabs_array.element_type, state, abs_parent_offset_bytes, depth + 1, true, force_substitute);; + CCC_RETURN_IF_ERROR(element_node); + array->element_type = std::move(*element_node); + + const auto& index = stabs_array.index_type->as(); + // The low and high values are not wrong in this case. + CCC_CHECK(index.low_maybe_wrong == 0, "Invalid index type for array."); + array->element_count = index.high_maybe_wrong + 1; + result = std::move(array); + break; + } + case StabsTypeDescriptor::ENUM: { + auto inline_enum = std::make_unique(); + const auto& stabs_enum = type.as(); + inline_enum->constants = stabs_enum.fields; + result = std::move(inline_enum); + break; + } + case StabsTypeDescriptor::FUNCTION: { + auto function = std::make_unique(); + + auto node = stabs_type_to_ast(*type.as().return_type, state, abs_parent_offset_bytes, depth + 1, true, force_substitute); + CCC_RETURN_IF_ERROR(node); + function->return_type = std::move(*node); + + result = std::move(function); + break; + } + case StabsTypeDescriptor::VOLATILE_QUALIFIER: { + const auto& volatile_qualifier = type.as(); + + auto node = stabs_type_to_ast(*volatile_qualifier.type.get(), state, abs_parent_offset_bytes, depth + 1, substitute_type_name, force_substitute); + CCC_RETURN_IF_ERROR(node); + result = std::move(*node); + + result->is_volatile = true; + break; + } + case StabsTypeDescriptor::CONST_QUALIFIER: { + const auto& const_qualifier = type.as(); + + auto node = stabs_type_to_ast(*const_qualifier.type.get(), state, abs_parent_offset_bytes, depth + 1, substitute_type_name, force_substitute); + result = std::move(*node); + + result->is_const = true; + break; + } + case StabsTypeDescriptor::RANGE: { + auto builtin = std::make_unique(); + builtin->bclass = type.as().range_class; + result = std::move(builtin); + break; + } + case StabsTypeDescriptor::STRUCT: + case StabsTypeDescriptor::UNION: { + const StabsStructOrUnionType* stabs_struct_or_union; + if(type.descriptor == StabsTypeDescriptor::STRUCT) { + stabs_struct_or_union = &type.as(); + } else { + stabs_struct_or_union = &type.as(); + } + auto struct_or_union = std::make_unique(); + struct_or_union->is_struct = type.descriptor == StabsTypeDescriptor::STRUCT; + struct_or_union->size_bits = (s32) stabs_struct_or_union->size * 8; + for(const StabsBaseClass& stabs_base_class : stabs_struct_or_union->base_classes) { + auto base_class = stabs_type_to_ast(*stabs_base_class.type, state, abs_parent_offset_bytes, depth + 1, true, force_substitute); + CCC_RETURN_IF_ERROR(base_class); + + (*base_class)->is_base_class = true; + (*base_class)->absolute_offset_bytes = stabs_base_class.offset; + (*base_class)->access_specifier = stabs_field_visibility_to_access_specifier(stabs_base_class.visibility); + + struct_or_union->base_classes.emplace_back(std::move(*base_class)); + } + AST_DEBUG_PRINTF("%-*s beginfields\n", depth * 4, ""); + for(const StabsField& field : stabs_struct_or_union->fields) { + auto node = stabs_field_to_ast(field, state, abs_parent_offset_bytes, depth); + CCC_RETURN_IF_ERROR(node); + struct_or_union->fields.emplace_back(std::move(*node)); + } + AST_DEBUG_PRINTF("%-*s endfields\n", depth * 4, ""); + AST_DEBUG_PRINTF("%-*s beginmemberfuncs\n", depth * 4, ""); + std::string struct_or_union_name_no_template_parameters; + if(type.name.has_value()) { + struct_or_union_name_no_template_parameters = + type.name->substr(0, type.name->find("<")); + } + for(const StabsMemberFunctionSet& function_set : stabs_struct_or_union->member_functions) { + for(const StabsMemberFunction& stabs_func : function_set.overloads) { + auto node = stabs_type_to_ast(*stabs_func.type, state, abs_parent_offset_bytes, depth + 1, true, true); + CCC_RETURN_IF_ERROR(node); + if(function_set.name == "__as") { + (*node)->name = "operator="; + } else { + (*node)->name = function_set.name; + } + if((*node)->descriptor == ast::FUNCTION_TYPE) { + ast::FunctionType& function = (*node)->as(); + function.modifier = stabs_func.modifier; + function.is_constructor = false; + if(type.name.has_value()) { + function.is_constructor |= function_set.name == type.name; + function.is_constructor |= function_set.name == struct_or_union_name_no_template_parameters; + } + function.vtable_index = stabs_func.vtable_index; + } + (*node)->access_specifier = stabs_field_visibility_to_access_specifier(stabs_func.visibility); + struct_or_union->member_functions.emplace_back(std::move(*node)); + } + } + AST_DEBUG_PRINTF("%-*s endmemberfuncs\n", depth * 4, ""); + result = std::move(struct_or_union); + break; + } + case StabsTypeDescriptor::CROSS_REFERENCE: { + auto type_name = std::make_unique(); + type_name->source = ast::TypeNameSource::CROSS_REFERENCE; + type_name->type_name = type.as().identifier; + result = std::move(type_name); + break; + } + case ccc::StabsTypeDescriptor::FLOATING_POINT_BUILTIN: { + const auto& fp_builtin = type.as(); + auto builtin = std::make_unique(); + switch(fp_builtin.bytes) { + case 1: builtin->bclass = BuiltInClass::UNSIGNED_8; break; + case 2: builtin->bclass = BuiltInClass::UNSIGNED_16; break; + case 4: builtin->bclass = BuiltInClass::UNSIGNED_32; break; + case 8: builtin->bclass = BuiltInClass::UNSIGNED_64; break; + case 16: builtin->bclass = BuiltInClass::UNSIGNED_128; break; + default: builtin->bclass = BuiltInClass::UNSIGNED_8; break; + } + result = std::move(builtin); + break; + } + case StabsTypeDescriptor::METHOD: { + const auto& stabs_method = type.as(); + auto function = std::make_unique(); + + auto return_node = stabs_type_to_ast(*stabs_method.return_type.get(), state, abs_parent_offset_bytes, depth + 1, true, true); + CCC_RETURN_IF_ERROR(return_node); + function->return_type = std::move(*return_node); + + function->parameters.emplace(); + for(const std::unique_ptr& parameter_type : stabs_method.parameter_types) { + auto parameter_node = stabs_type_to_ast(*parameter_type, state, abs_parent_offset_bytes, depth + 1, true, true); + CCC_RETURN_IF_ERROR(parameter_node); + function->parameters->emplace_back(std::move(*parameter_node)); + } + result = std::move(function); + break; + } + case StabsTypeDescriptor::POINTER: { + auto pointer = std::make_unique(); + + auto value_node = stabs_type_to_ast(*type.as().value_type, state, abs_parent_offset_bytes, depth + 1, true, force_substitute); + CCC_RETURN_IF_ERROR(value_node); + pointer->value_type = std::move(*value_node); + + result = std::move(pointer); + break; + } + case StabsTypeDescriptor::REFERENCE: { + auto reference = std::make_unique(); + + auto value_node = stabs_type_to_ast(*type.as().value_type, state, abs_parent_offset_bytes, depth + 1, true, force_substitute); + CCC_RETURN_IF_ERROR(value_node); + reference->value_type = std::move(*value_node); + + result = std::move(reference); + break; + } + case StabsTypeDescriptor::TYPE_ATTRIBUTE: { + const auto& stabs_type_attribute = type.as(); + + auto node = stabs_type_to_ast(*stabs_type_attribute.type, state, abs_parent_offset_bytes, depth + 1, substitute_type_name, force_substitute); + CCC_RETURN_IF_ERROR(node); + result = std::move(*node); + + result->size_bits = stabs_type_attribute.size_bits; + break; + } + case StabsTypeDescriptor::POINTER_TO_NON_STATIC_MEMBER: { + const auto& stabs_member_pointer = type.as(); + auto member_pointer = std::make_unique(); + + auto class_node = stabs_type_to_ast(*stabs_member_pointer.class_type.get(), state, abs_parent_offset_bytes, depth + 1, true, true); + CCC_RETURN_IF_ERROR(class_node); + member_pointer->class_type = std::move(*class_node); + + auto member_node = stabs_type_to_ast(*stabs_member_pointer.member_type.get(), state, abs_parent_offset_bytes, depth + 1, true, true); + CCC_RETURN_IF_ERROR(member_node); + member_pointer->member_type = std::move(*member_node); + + result = std::move(member_pointer); + break; + } + case StabsTypeDescriptor::BUILTIN: { + CCC_CHECK(type.as().type_id == 16, + "Unknown built-in type! Please file a bug report."); + auto builtin = std::make_unique(); + builtin->bclass = BuiltInClass::BOOL_8; + result = std::move(builtin); + break; + } + } + CCC_ASSERT(result); + return result; +} + +Result> stabs_field_to_ast(const StabsField& field, const StabsToAstState& state, s32 abs_parent_offset_bytes, s32 depth) { + AST_DEBUG_PRINTF("%-*s field %s\n", depth * 4, "", field.name.c_str()); + + if(detect_bitfield(field, state)) { + // Process bitfields. + s32 relative_offset_bytes = field.offset_bits / 8; + s32 absolute_offset_bytes = abs_parent_offset_bytes + relative_offset_bytes; + auto bitfield_node = stabs_type_to_ast(*field.type, state, absolute_offset_bytes, depth + 1, true, false); + + std::unique_ptr bitfield = std::make_unique(); + bitfield->name = (field.name == " ") ? "" : field.name; + bitfield->relative_offset_bytes = relative_offset_bytes; + bitfield->absolute_offset_bytes = absolute_offset_bytes; + bitfield->size_bits = field.size_bits; + bitfield->underlying_type = std::move(*bitfield_node); + bitfield->bitfield_offset_bits = field.offset_bits % 8; + if(field.is_static) { + bitfield->storage_class = ast::SC_STATIC; + } + bitfield->access_specifier = stabs_field_visibility_to_access_specifier(field.visibility); + return std::unique_ptr(std::move(bitfield)); + } + + // Process a normal field. + s32 relative_offset_bytes = field.offset_bits / 8; + s32 absolute_offset_bytes = abs_parent_offset_bytes + relative_offset_bytes; + Result> node = stabs_type_to_ast(*field.type, state, absolute_offset_bytes, depth + 1, true, false); + CCC_RETURN_IF_ERROR(node); + (*node)->name = (field.name == " ") ? "" : field.name; + (*node)->relative_offset_bytes = relative_offset_bytes; + (*node)->absolute_offset_bytes = absolute_offset_bytes; + (*node)->size_bits = field.size_bits; + if(field.is_static) { + (*node)->storage_class = ast::SC_STATIC; + } + (*node)->access_specifier = stabs_field_visibility_to_access_specifier(field.visibility); + return node; +} + +static bool detect_bitfield(const StabsField& field, const StabsToAstState& state) { + // Static fields can't be bitfields. + if(field.is_static) { + return false; + } + + // Resolve type references. + const StabsType* type = field.type.get(); + for(s32 i = 0; i < 50; i++) { + if(!type->has_body) { + if(type->anonymous) { + return false; + } + auto next_type = state.stabs_types->find(type->type_number); + if(next_type == state.stabs_types->end() || next_type->second == type) { + return false; + } + type = next_type->second; + } else if(type->descriptor == StabsTypeDescriptor::TYPE_REFERENCE) { + type = type->as().type.get(); + } else if(type->descriptor == StabsTypeDescriptor::CONST_QUALIFIER) { + type = type->as().type.get(); + } else if(type->descriptor == StabsTypeDescriptor::VOLATILE_QUALIFIER) { + type = type->as().type.get(); + } else { + break; + } + + // Prevent an infinite loop if there's a cycle (fatal frame). + if(i == 49) { + return false; + } + } + + // Determine the size of the underlying type. + s32 underlying_type_size_bits = 0; + switch(type->descriptor) { + case ccc::StabsTypeDescriptor::RANGE: { + underlying_type_size_bits = builtin_class_size(type->as().range_class) * 8; + break; + } + case ccc::StabsTypeDescriptor::CROSS_REFERENCE: { + if(type->as().type == StabsCrossReferenceType::ENUM) { + underlying_type_size_bits = 32; + } else { + return false; + } + break; + } + case ccc::StabsTypeDescriptor::TYPE_ATTRIBUTE: { + underlying_type_size_bits = type->as().size_bits; + break; + } + case ccc::StabsTypeDescriptor::BUILTIN: { + underlying_type_size_bits = 8; // bool + break; + } + default: { + return false; + } + } + + if(underlying_type_size_bits == 0) { + return false; + } + + return field.size_bits != underlying_type_size_bits; +} + +ast::AccessSpecifier stabs_field_visibility_to_access_specifier(StabsFieldVisibility visibility) { + ast::AccessSpecifier access_specifier = ast::AS_PUBLIC; + switch(visibility) { + case ccc::StabsFieldVisibility::NONE: access_specifier = ast::AS_PUBLIC; break; + case ccc::StabsFieldVisibility::PUBLIC: access_specifier = ast::AS_PUBLIC; break; + case ccc::StabsFieldVisibility::PROTECTED: access_specifier = ast::AS_PROTECTED; break; + case ccc::StabsFieldVisibility::PRIVATE: access_specifier = ast::AS_PRIVATE; break; + case ccc::StabsFieldVisibility::PUBLIC_OPTIMIZED_OUT: access_specifier = ast::AS_PUBLIC; break; + } + return access_specifier; +} + +} diff --git a/ccc/stabs_to_ast.h b/ccc/stabs_to_ast.h new file mode 100644 index 00000000..d23da3ec --- /dev/null +++ b/ccc/stabs_to_ast.h @@ -0,0 +1,21 @@ +#ifndef _CCC_STABS_TO_AST_H +#define _CCC_STABS_TO_AST_H + +#include "ast.h" +#include "stabs.h" + +namespace ccc { + +struct StabsToAstState { + s32 file_index; + std::map* stabs_types; +}; +std::unique_ptr stabs_type_to_ast_and_handle_errors(const StabsType& type, const StabsToAstState& state, s32 abs_parent_offset_bytes, s32 depth, bool substitute_type_name, bool force_substitute); +Result> stabs_symbol_to_ast(const ParsedSymbol& symbol, const StabsToAstState& state); +Result> stabs_type_to_ast(const StabsType& type, const StabsToAstState& state, s32 abs_parent_offset_bytes, s32 depth, bool substitute_type_name, bool force_substitute); +Result> stabs_field_to_ast(const StabsField& field, const StabsToAstState& state, s32 abs_parent_offset_bytes, s32 depth); +ast::AccessSpecifier stabs_field_visibility_to_access_specifier(StabsFieldVisibility visibility); + +} + +#endif diff --git a/ccc/symbols.cpp b/ccc/symbols.cpp index 08f573fc..3bd35b28 100644 --- a/ccc/symbols.cpp +++ b/ccc/symbols.cpp @@ -5,9 +5,9 @@ namespace ccc { #define SYMBOLS_DEBUG(...) //__VA_ARGS__ #define SYMBOLS_DEBUG_PRINTF(...) SYMBOLS_DEBUG(printf(__VA_ARGS__);) -static void validate_symbol_descriptor(StabsSymbolDescriptor descriptor); +static bool validate_symbol_descriptor(StabsSymbolDescriptor descriptor); -std::vector parse_symbols(const std::vector& input, mdebug::SourceLanguage detected_language) { +Result> parse_symbols(const std::vector& input, mdebug::SourceLanguage detected_language) { std::vector output; std::string prefix; for(const mdebug::Symbol& symbol : input) { @@ -27,12 +27,13 @@ std::vector parse_symbols(const std::vector& input } else { std::string symbol_string = prefix + symbol.string; prefix.clear(); - ParsedSymbol stabs_symbol = parse_stabs_type_symbol(symbol_string.c_str()); - stabs_symbol.raw = &symbol; - output.emplace_back(std::move(stabs_symbol)); + Result stabs_symbol = parse_stabs_type_symbol(symbol_string.c_str()); + CCC_RETURN_IF_ERROR(stabs_symbol); + (*stabs_symbol).raw = &symbol; + output.emplace_back(std::move(*stabs_symbol)); } } else { - verify(prefix.empty(), "Invalid STABS continuation."); + CCC_CHECK(prefix.empty(), "Invalid STABS continuation."); if(symbol.code == mdebug::N_FUN) { ParsedSymbol& func_end = output.emplace_back(); func_end.type = ParsedSymbolType::FUNCTION_END; @@ -72,24 +73,38 @@ std::vector parse_symbols(const std::vector& input break; } case mdebug::STAB: - case mdebug::N_OPT: { + case mdebug::N_OPT: + case mdebug::N_BINCL: { break; } - case mdebug::N_FNAME: case mdebug::N_MAIN: - case mdebug::N_PC: case mdebug::N_NSYMS: - case mdebug::N_NOMAP: case mdebug::N_OBJ: - case mdebug::N_M2C: case mdebug::N_SLINE: - case mdebug::N_DSLINE: case mdebug::N_BSLINE: - case mdebug::N_EFD: case mdebug::N_EHDECL: - case mdebug::N_CATCH: case mdebug::N_SSYM: - case mdebug::N_BINCL: case mdebug::N_EINCL: - case mdebug::N_ENTRY: case mdebug::N_EXCL: - case mdebug::N_SCOPE: case mdebug::N_BCOMM: - case mdebug::N_ECOMM: case mdebug::N_ECOML: - case mdebug::N_NBTEXT: case mdebug::N_NBDATA: - case mdebug::N_NBBSS: case mdebug::N_NBSTS: - case mdebug::N_NBLCS: case mdebug::N_LENG: { - warn("Unhandled N_%s symbol: %s", mdebug::stabs_code(symbol.code), symbol.string); + case mdebug::N_FNAME: + case mdebug::N_MAIN: + case mdebug::N_PC: + case mdebug::N_NSYMS: + case mdebug::N_NOMAP: + case mdebug::N_OBJ: + case mdebug::N_M2C: + case mdebug::N_SLINE: + case mdebug::N_DSLINE: + case mdebug::N_BSLINE: + case mdebug::N_EFD: + case mdebug::N_EHDECL: + case mdebug::N_CATCH: + case mdebug::N_SSYM: + case mdebug::N_EINCL: + case mdebug::N_ENTRY: + case mdebug::N_EXCL: + case mdebug::N_SCOPE: + case mdebug::N_BCOMM: + case mdebug::N_ECOMM: + case mdebug::N_ECOML: + case mdebug::N_NBTEXT: + case mdebug::N_NBDATA: + case mdebug::N_NBBSS: + case mdebug::N_NBSTS: + case mdebug::N_NBLCS: + case mdebug::N_LENG: { + CCC_WARN("Unhandled N_%s symbol: %s", mdebug::stabs_code(symbol.code), symbol.string); break; } } @@ -102,25 +117,36 @@ std::vector parse_symbols(const std::vector& input return output; } -ParsedSymbol parse_stabs_type_symbol(const char* input) { +Result parse_stabs_type_symbol(const char* input) { SYMBOLS_DEBUG_PRINTF("PARSING %s\n", input); ParsedSymbol symbol; symbol.type = ParsedSymbolType::NAME_COLON_TYPE; - symbol.name_colon_type.name = eat_dodgy_stabs_identifier(input); - expect_char(input, ':', "identifier"); - verify(*input != '\0', ERR_END_OF_SYMBOL); + + std::optional name = eat_dodgy_stabs_identifier(input); + CCC_CHECK(name.has_value(), "Cannot parse stabs symbol name."); + symbol.name_colon_type.name = *name; + + CCC_EXPECT_CHAR(input, ':', "identifier"); + CCC_CHECK(*input != '\0', "Unexpected end of input."); if((*input >= '0' && *input <= '9') || *input == '(') { symbol.name_colon_type.descriptor = StabsSymbolDescriptor::LOCAL_VARIABLE; } else { - symbol.name_colon_type.descriptor = (StabsSymbolDescriptor) eat_char(input); + std::optional symbol_descriptor = eat_char(input); + CCC_CHECK(symbol_descriptor.has_value(), "Cannot parse symbol descriptor."); + symbol.name_colon_type.descriptor = (StabsSymbolDescriptor) *symbol_descriptor; } - validate_symbol_descriptor(symbol.name_colon_type.descriptor); - verify(*input != '\0', ERR_END_OF_SYMBOL); + CCC_CHECK(validate_symbol_descriptor(symbol.name_colon_type.descriptor), + "Invalid symbol descriptor '%c'.", + (char) symbol.name_colon_type.descriptor); + CCC_CHECK(*input != '\0', "Unexpected end of input."); if(*input == 't') { input++; } - symbol.name_colon_type.type = parse_stabs_type(input); + + auto type = parse_stabs_type(input); + CCC_RETURN_IF_ERROR(type); + symbol.name_colon_type.type = std::move(*type); // This is a bit of hack to make it so variable names aren't used as type // names e.g.the STABS symbol "somevar:P123=*456" may be referenced by the // type number 123, but the type name is not "somevar". @@ -134,7 +160,8 @@ ParsedSymbol parse_stabs_type_symbol(const char* input) { return symbol; } -static void validate_symbol_descriptor(StabsSymbolDescriptor descriptor) { +static bool validate_symbol_descriptor(StabsSymbolDescriptor descriptor) { + bool valid; switch(descriptor) { case StabsSymbolDescriptor::LOCAL_VARIABLE: case StabsSymbolDescriptor::REFERENCE_PARAMETER_A: @@ -149,10 +176,13 @@ static void validate_symbol_descriptor(StabsSymbolDescriptor descriptor) { case StabsSymbolDescriptor::ENUM_STRUCT_OR_TYPE_TAG: case StabsSymbolDescriptor::STATIC_LOCAL_VARIABLE: case StabsSymbolDescriptor::REFERENCE_PARAMETER_V: + valid = true; break; default: - verify_not_reached("Unknown symbol descriptor: %c.", (s8) descriptor); + valid = false; + break; } + return valid; } } diff --git a/ccc/symbols.h b/ccc/symbols.h index acada0bc..9f0e6f65 100644 --- a/ccc/symbols.h +++ b/ccc/symbols.h @@ -48,8 +48,8 @@ struct ParsedSymbol { } lrbrac; }; -std::vector parse_symbols(const std::vector& input, mdebug::SourceLanguage detected_language); -ParsedSymbol parse_stabs_type_symbol(const char* input); +Result> parse_symbols(const std::vector& input, mdebug::SourceLanguage detected_language); +Result parse_stabs_type_symbol(const char* input); } diff --git a/ccc/util.cpp b/ccc/util.cpp index 566bb667..fad5433b 100644 --- a/ccc/util.cpp +++ b/ccc/util.cpp @@ -1,7 +1,6 @@ #include "ccc.h" #include -#include #include #include #include @@ -12,51 +11,47 @@ namespace ccc { -std::vector read_binary_file(const fs::path& path) { - FILE* file = open_file_rb(path.c_str()); - verify(file, "Failed to open file '%s'.", path.string().c_str()); - s64 size = file_size(file); - std::vector output(size); - verify(fread(output.data(), size, 1, file) == 1, "Failed to read file '%s'.", path.string().c_str()); - return output; -} - -std::optional read_text_file(const fs::path& path) { - std::ifstream file_stream; - file_stream.open(path.string()); - if(!file_stream.is_open()) { - return std::nullopt; +Error* format_error(const char* source_file, int source_line, const char* format, ...) { + va_list args; + va_start(args, format); + + static char message[4096]; + if(vsnprintf(message, sizeof(message), format, args) < 0) { + strncpy(message, "Failed to generate error message.", sizeof(message)); } - std::stringstream string_stream; - string_stream << file_stream.rdbuf(); - return string_stream.str(); + + // Copy it just in case one of the variadic arguments is a pointer to the + // last error message. + static char message_copy[4096]; + strncpy(message_copy, message, sizeof(message_copy)); + message_copy[sizeof(message_copy) - 1] = '\0'; + + static Error error = {}; + error.message = message_copy; + error.source_file = source_file; + error.source_line = source_line; + + va_end(args); + return &error; } -s64 file_size(FILE* file) { - s64 pos = ftell(file); - fseek(file, 0, SEEK_END); - s64 size = ftell(file); - fseek(file, pos, SEEK_SET); - return size; +void print_error(FILE* out, const Error* error) { + fprintf(out, "[%s:%d] " CCC_ANSI_COLOUR_RED "error:" CCC_ANSI_COLOUR_OFF " %s\n", + error->source_file, error->source_line, error->message); } -std::string get_string(const std::vector& bytes, u64 offset) { - verify(offset < bytes.size(), "Tried to read a string past the end of the buffer."); - std::string result; - for(u64 i = offset; i < bytes.size() && bytes[i] != '\0'; i++) { - result += bytes[i]; - } - return result; +void print_warning(FILE* out, const Error* warning) { + fprintf(out, "[%s:%d] " CCC_ANSI_COLOUR_MAGENTA "warning:" CCC_ANSI_COLOUR_OFF " %s\n", + warning->source_file, warning->source_line, warning->message); } -const char* get_c_string(const std::vector& bytes, u64 offset) { - verify(offset < bytes.size(), "Tried to read a string past the end of the buffer."); +Result get_string(const std::vector& bytes, u64 offset) { for(const unsigned char* c = bytes.data() + offset; c < bytes.data() + bytes.size(); c++) { if(*c == '\0') { return (const char*) &bytes[offset]; } } - verify_not_reached("Unexpected end of buffer while reading string."); + return CCC_FAILURE("Unexpected end of buffer while reading string."); } std::string string_format(const char* format, va_list args) { diff --git a/ccc/util.h b/ccc/util.h index 6dc5d122..30194616 100644 --- a/ccc/util.h +++ b/ccc/util.h @@ -4,26 +4,19 @@ #include #include #include -#include #include +#include #include +#include #include +#include #include #include -#include -#include -#include -#include -#include #include -#include -#include #include namespace ccc { -namespace fs = std::filesystem; - using u8 = unsigned char; using u16 = uint16_t; using u32 = uint32_t; @@ -34,70 +27,162 @@ using s16 = int16_t; using s32 = int32_t; using s64 = int64_t; -#define ANSI_COLOUR_OFF "\033[0m" -#define ANSI_COLOUR_RED "\033[31m" -#define ANSI_COLOUR_MAGENTA "\033[35m" -#define ANSI_COLOUR_GRAY "\033[90m" +#define CCC_ANSI_COLOUR_OFF "\033[0m" +#define CCC_ANSI_COLOUR_RED "\033[31m" +#define CCC_ANSI_COLOUR_MAGENTA "\033[35m" +#define CCC_ANSI_COLOUR_GRAY "\033[90m" -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wformat-security" -template -void verify_impl(const char* file, int line, bool condition, const char* format, Args... args) { - if(!condition) { - fprintf(stderr, "[%s:%d] " ANSI_COLOUR_RED "error:" ANSI_COLOUR_OFF " ", file, line); - fprintf(stderr, format, args...); - fprintf(stderr, "\n"); - exit(1); +struct Error { + const char* message; + const char* source_file; + s32 source_line; +}; + +Error* format_error(const char* source_file, int source_line, const char* format, ...); +void print_error(FILE* out, const Error* error); +void print_warning(FILE* out, const Error* warning); + +#define CCC_FATAL(...) \ + { \ + Error* error = format_error(__FILE__, __LINE__, __VA_ARGS__); \ + print_error(stderr, error); \ + exit(1); \ } -} -#define verify(condition, ...) \ - ccc::verify_impl(__FILE__, __LINE__, condition, __VA_ARGS__) -template -[[noreturn]] void verify_not_reached_impl(const char* file, int line, const char* format, Args... args) { - fprintf(stderr, "[%s:%d] " ANSI_COLOUR_RED "error:" ANSI_COLOUR_OFF " ", file, line); - fprintf(stderr, format, args...); - fprintf(stderr, "\n"); - exit(1); -} -#define verify_not_reached(...) \ - ccc::verify_not_reached_impl(__FILE__, __LINE__, __VA_ARGS__) + +#define CCC_CHECK_FATAL(condition, ...) \ + if(!(condition)) { \ + Error* error = format_error(__FILE__, __LINE__, __VA_ARGS__); \ + print_error(stderr, error); \ + exit(1); \ + } + +#define CCC_ASSERT(condition) \ + CCC_CHECK_FATAL(condition, #condition) + +// The main error handling construct in CCC. This class is used to bundle +// together a return value and a pointer to error information, so that errors +// can be propagated up the stack. +template +class Result { + template + friend class Result; +protected: + Value m_value; + Error* m_error; + + Result() {} + +public: + Result(Value value) : m_value(std::move(value)), m_error(nullptr) {} + + // Used to propagate errors up the call stack. + template + Result(const Result& rhs) { + CCC_ASSERT(rhs.m_error != nullptr); + m_error = rhs.m_error; + } + + static Result success(Value value) { + Result result; + result.m_value = std::move(value); + return result; + } + + static Result failure(Error* error) { + Result result; + result.m_error = error; + return result; + } + + bool success() const { + return m_error == nullptr; + } + + const Error& error() const { + CCC_ASSERT(m_error != nullptr); + return *m_error; + } + + Value& operator*() { + CCC_ASSERT(m_error == nullptr); + return m_value; + } + + const Value& operator*() const { + CCC_ASSERT(m_error == nullptr); + return m_value; + } + + Value* operator->() { + CCC_ASSERT(m_error == nullptr); + return &m_value; + } + + const Value* operator->() const { + CCC_ASSERT(m_error == nullptr); + return &m_value; + } +}; + +template <> +class Result : public Result { +public: + Result() : Result(0) {} + + template + Result(const Result& rhs) { + CCC_ASSERT(rhs.m_error != nullptr); + m_error = rhs.m_error; + } +}; + +struct ResultDummyValue {}; +#define CCC_FAILURE(...) Result::failure(format_error(__FILE__, __LINE__, __VA_ARGS__)) +#define CCC_RETURN_IF_ERROR(result) if(!(result).success()) return (result); +#define CCC_EXIT_IF_ERROR(result) \ + if(!(result).success()) { \ + ccc::print_error(stderr, &(result).error()); \ + exit(1); \ + } + +#define CCC_CHECK(condition, ...) \ + if(!(condition)) { \ + return CCC_FAILURE(__VA_ARGS__); \ + } + +#define CCC_EXPECT_CHAR(input, c, context) \ + CCC_CHECK(*(input++) == c, \ + "Expected '%c' in %s, got '%c' (%02hhx)", \ + c, context, *(input - 1), *(input - 1)) + template -void warn_impl(const char* file, int line, const char* format, Args... args) { - fprintf(stderr, "[%s:%d] " ANSI_COLOUR_MAGENTA "warning:" ANSI_COLOUR_OFF " ", file, line); - fprintf(stderr, format, args...); - fprintf(stderr, "\n"); +void warn_impl(const char* source_file, int source_line, const char* format, Args... args) { + Error* warning = format_error(source_file, source_line, format, args...); + print_warning(stderr, warning); } -#define warn(...) \ +#define CCC_WARN(...) \ ccc::warn_impl(__FILE__, __LINE__, __VA_ARGS__) -#pragma GCC diagnostic pop #ifdef _MSC_VER - #define packed_struct(name, ...) \ + #define CCC_PACKED_STRUCT(name, ...) \ __pragma(pack(push, 1)) struct name { __VA_ARGS__ } __pragma(pack(pop)); #else - #define packed_struct(name, ...) \ + #define CCC_PACKED_STRUCT(name, ...) \ struct __attribute__((__packed__)) name { __VA_ARGS__ }; #endif template -const T& get_packed(const std::vector& bytes, u64 offset, const char* subject) { - verify(bytes.size() >= offset + sizeof(T), "Failed to read %s.", subject); - return *(const T*) &bytes[offset]; +const T* get_packed(const std::vector& bytes, u64 offset) { + if(bytes.size() < offset + sizeof(T)) { + return nullptr; + } + return reinterpret_cast(&bytes[offset]); } -std::vector read_binary_file(const fs::path& path); -std::optional read_text_file(const fs::path& path); -s64 file_size(FILE* file); -std::string get_string(const std::vector& bytes, u64 offset); -const char* get_c_string(const std::vector& bytes, u64 offset); - -struct Range { - s32 low; - s32 high; -}; +Result get_string(const std::vector& bytes, u64 offset); -#define BEGIN_END(x) (x).begin(), (x).end() -#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) +#define CCC_BEGIN_END(x) (x).begin(), (x).end() +#define CCC_ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) std::string string_format(const char* format, va_list args); std::string stringf(const char* format, ...); @@ -110,18 +195,6 @@ std::string normalise_path(const char* input, bool use_backslashes_as_path_separ bool guess_is_windows_path(const char* path); std::string extract_file_name(const std::string& path); -// On Windows long is only 4 bytes, so the regular libc standard IO functions -// are crippled, hence we need to use these special versions instead. -#ifdef _MSC_VER - #define open_file_rb(filename) _wfopen(filename, L"rb") - #define open_file_w(filename) _wfopen(filename, L"w") - #define fseek _fseeki64 - #define ftell _ftelli64 -#else - #define open_file_rb(filename) fopen(filename, "rb") - #define open_file_w(filename) fopen(filename, "w") -#endif - } #endif diff --git a/demangle.cpp b/demangle.cpp index 817a1963..59d3c459 100644 --- a/demangle.cpp +++ b/demangle.cpp @@ -9,7 +9,7 @@ const char* git_tag(); int main(int argc, char** argv) { if(argc == 2 && !(strcmp(argv[1], "help") == 0 || strcmp(argv[1], "--help") == 0 || strcmp(argv[1], "-h") == 0)) { const char* demangled = cplus_demangle(argv[1], DMGL_NO_OPTS); - verify(demangled, "Cannot demangle input!"); + CCC_CHECK_FATAL(demangled, "Cannot demangle input!"); printf("%s", cplus_demangle(argv[1], DMGL_NO_OPTS)); return 0; } else { diff --git a/objdump.cpp b/objdump.cpp index ac7b771b..d6f13e73 100644 --- a/objdump.cpp +++ b/objdump.cpp @@ -1,23 +1,35 @@ #include "ccc/ccc.h" +#include "platform/file.h" using namespace ccc; int main(int argc, char** argv) { - verify(argc == 2, "Incorrect number of arguments."); + CCC_CHECK_FATAL(argc == 2, "Incorrect number of arguments."); + + Module mod; + + fs::path input_path(argv[1]); + std::optional> binary = platform::read_binary_file(input_path); + CCC_CHECK_FATAL(binary.has_value(), "Failed to open file '%s'.", input_path.string().c_str()); + mod.image = std::move(*binary); + + Result result = parse_elf_file(mod); + CCC_EXIT_IF_ERROR(result); - Module mod = loaders::read_elf_file(fs::path(argv[1])); std::vector modules{&mod}; ModuleSection* text = mod.lookup_section(".text"); - verify(text, "ELF contains no .text section!"); + CCC_CHECK_FATAL(text, "ELF contains no .text section!"); + + std::optional text_address = mod.file_offset_to_virtual_address(text->file_offset); + CCC_CHECK_FATAL(text_address.has_value(), "Failed to translate file offset to virtual address."); - u32 text_address = mod.file_offset_to_virtual_address(text->file_offset); - std::vector insns = read_virtual_vector(text_address, text->size / 4, modules); + std::vector insns = read_virtual_vector(*text_address, text->size / 4, modules); for(u64 i = 0; i < text->size / 4; i++) { mips::Insn insn = insns[i]; const mips::InsnInfo& info = insn.info(); - u32 insn_address = text_address + i; + u32 insn_address = *text_address + i; printf("%08x:\t\t%08x %s ", insn_address, insn.value, info.mnemonic); for(s32 i = 0; i < 16 - strlen(info.mnemonic); i++) { @@ -58,7 +70,7 @@ int main(int argc, char** argv) { break; } case mips::FlowType::FIXED_REG: { - assert(0); + CCC_ASSERT(0); } } if(!first_operand && is_mem_access) { diff --git a/platform/file.cpp b/platform/file.cpp new file mode 100644 index 00000000..5fd800ff --- /dev/null +++ b/platform/file.cpp @@ -0,0 +1,40 @@ +#include "file.h" + +#include +#include + +namespace platform { + +std::optional> read_binary_file(const fs::path& path) { + FILE* file = fopen(path.string().c_str(), "rb"); + if(file == nullptr) { + return std::nullopt; + } + int64_t size = file_size(file); + std::vector output(size); + if(fread(output.data(), size, 1, file) != 1) { + return std::nullopt; + } + return output; +} + +std::optional read_text_file(const fs::path& path) { + std::ifstream file_stream; + file_stream.open(path); + if(!file_stream.is_open()) { + return std::nullopt; + } + std::stringstream string_stream; + string_stream << file_stream.rdbuf(); + return string_stream.str(); +} + +int64_t file_size(FILE* file) { + int64_t pos = ftell(file); + fseek(file, 0, SEEK_END); + int64_t size = ftell(file); + fseek(file, pos, SEEK_SET); + return size; +} + +} diff --git a/platform/file.h b/platform/file.h new file mode 100644 index 00000000..e2bfee40 --- /dev/null +++ b/platform/file.h @@ -0,0 +1,26 @@ +#ifndef _PLATFORM_FILE_H +#define _PLATFORM_FILE_H + +#include +#include +#include +#include + +namespace fs = std::filesystem; + +namespace platform { + +std::optional> read_binary_file(const fs::path& path); +std::optional read_text_file(const fs::path& path); +int64_t file_size(FILE* file); + +// On Windows long is only 4 bytes, so the regular libc standard IO functions +// are crippled, hence we need to use these special versions instead. +#ifdef _MSC_VER + #define fseek _fseeki64 + #define ftell _ftelli64 +#endif + +} + +#endif diff --git a/stdump.cpp b/stdump.cpp index 8b8ffbf1..d971bd90 100644 --- a/stdump.cpp +++ b/stdump.cpp @@ -1,4 +1,5 @@ #include "ccc/ccc.h" +#include "platform/file.h" #include @@ -36,6 +37,7 @@ struct Options { u32 flags = NO_FLAGS; }; +static Result read_symbol_table(Module& mod, const fs::path& input_file); static void print_deduplicated(const mdebug::SymbolTable& symbol_table, Options options); static std::vector> build_deduplicated_ast(std::vector>& symbols, const mdebug::SymbolTable& symbol_table); static void print_functions(FILE* out, mdebug::SymbolTable& symbol_table); @@ -56,79 +58,104 @@ int main(int argc, char** argv) { Options options = parse_args(argc, argv); FILE* out = stdout; if(!options.output_file.empty()) { - out = open_file_w(options.output_file.c_str()); - verify(out, "Failed to open output file '%s'.", options.output_file.string().c_str()); + out = fopen(options.output_file.string().c_str(), "w"); + CCC_CHECK_FATAL(out, "Failed to open output file '%s'.", options.output_file.string().c_str()); } switch(options.mode) { case OutputMode::FUNCTIONS: { Module mod; - mdebug::SymbolTable symbol_table = read_symbol_table(mod, options.input_file); - print_functions(out, symbol_table); + Result symbol_table = read_symbol_table(mod, options.input_file); + CCC_EXIT_IF_ERROR(symbol_table); + + print_functions(out, *symbol_table); return 0; } case OutputMode::GLOBALS: { Module mod; - mdebug::SymbolTable symbol_table = read_symbol_table(mod, options.input_file); - print_globals(out, symbol_table); + Result symbol_table = read_symbol_table(mod, options.input_file); + CCC_EXIT_IF_ERROR(symbol_table); + + print_globals(out, *symbol_table); return 0; } case OutputMode::TYPES: { Module mod; - mdebug::SymbolTable symbol_table = read_symbol_table(mod, options.input_file); + Result symbol_table = read_symbol_table(mod, options.input_file); + CCC_EXIT_IF_ERROR(symbol_table); + if(!(options.flags & FLAG_PER_FILE)) { - print_types_deduplicated(out, symbol_table, options); + print_types_deduplicated(out, *symbol_table, options); } else { - print_types_per_file(out, symbol_table, options); + print_types_per_file(out, *symbol_table, options); } return 0; } case OutputMode::JSON: { Module mod; - mdebug::SymbolTable symbol_table = read_symbol_table(mod, options.input_file); + Result symbol_table = read_symbol_table(mod, options.input_file); + CCC_EXIT_IF_ERROR(symbol_table); + u32 analysis_flags = STRIP_GENERATED_FUNCTIONS; if(!(options.flags & FLAG_PER_FILE)) { analysis_flags |= DEDUPLICATE_TYPES; } - HighSymbolTable high = analyse(symbol_table, analysis_flags); - print_json(out, high, options.flags & FLAG_PER_FILE); + Result high = analyse(*symbol_table, analysis_flags); + CCC_EXIT_IF_ERROR(high); + + print_json(out, *high, options.flags & FLAG_PER_FILE); + break; } case OutputMode::MDEBUG: { Module mod; - mdebug::SymbolTable symbol_table = read_symbol_table(mod, options.input_file); - mdebug::print_headers(out, symbol_table); + Result symbol_table = read_symbol_table(mod, options.input_file); + CCC_EXIT_IF_ERROR(symbol_table); + + mdebug::print_headers(out, *symbol_table); break; } case OutputMode::SYMBOLS: { Module mod; - mdebug::SymbolTable symbol_table = read_symbol_table(mod, options.input_file); - print_local_symbols(out, symbol_table); + Result symbol_table = read_symbol_table(mod, options.input_file); + CCC_EXIT_IF_ERROR(symbol_table); + + print_local_symbols(out, *symbol_table); return 0; } case OutputMode::EXTERNALS: { Module mod; - mdebug::SymbolTable symbol_table = read_symbol_table(mod, options.input_file); - print_external_symbols(out, symbol_table); + Result symbol_table = read_symbol_table(mod, options.input_file); + CCC_EXIT_IF_ERROR(symbol_table); + + print_external_symbols(out, *symbol_table); return 0; } case OutputMode::FILES: { Module mod; - mdebug::SymbolTable symbol_table = read_symbol_table(mod, options.input_file); - list_files(out, symbol_table); + Result symbol_table = read_symbol_table(mod, options.input_file); + CCC_EXIT_IF_ERROR(symbol_table); + + list_files(out, *symbol_table); return 0; } case OutputMode::SECTIONS: { Module mod; - mdebug::SymbolTable symbol_table = read_symbol_table(mod, options.input_file); - list_sections(out, symbol_table, mod); + Result symbol_table = read_symbol_table(mod, options.input_file); + CCC_EXIT_IF_ERROR(symbol_table); + + list_sections(out, *symbol_table, mod); return 0; } case OutputMode::TYPE_GRAPH: { Module mod; - mdebug::SymbolTable symbol_table = read_symbol_table(mod, options.input_file); - HighSymbolTable high = analyse(symbol_table, DEDUPLICATE_TYPES | STRIP_GENERATED_FUNCTIONS); - TypeDependencyAdjacencyList graph = build_type_dependency_graph(high); - print_type_dependency_graph(out, high, graph); + Result symbol_table = read_symbol_table(mod, options.input_file); + CCC_EXIT_IF_ERROR(symbol_table); + + Result high = analyse(*symbol_table, DEDUPLICATE_TYPES | STRIP_GENERATED_FUNCTIONS); + CCC_EXIT_IF_ERROR(high); + + TypeDependencyAdjacencyList graph = build_type_dependency_graph(*high); + print_type_dependency_graph(out, *high, graph); return 0; } case OutputMode::TEST: { @@ -143,11 +170,25 @@ int main(int argc, char** argv) { } } +static Result read_symbol_table(Module& mod, const fs::path& input_file) { + std::optional> binary = platform::read_binary_file(input_file); + CCC_CHECK(binary.has_value(), "Failed to open file '%s'.", input_file.string().c_str()); + mod.image = std::move(*binary); + Result result = parse_elf_file(mod); + CCC_RETURN_IF_ERROR(result); + + ModuleSection* mdebug_section = mod.lookup_section(".mdebug"); + CCC_CHECK(mdebug_section != nullptr, "No .mdebug section."); + + return mdebug::parse_symbol_table(mod.image, mdebug_section->file_offset); +} + static void print_functions(FILE* out, mdebug::SymbolTable& symbol_table) { CppPrinter printer(out); for(s32 i = 0; i < (s32) symbol_table.files.size(); i++) { - HighSymbolTable result = analyse(symbol_table, NO_ANALYSIS_FLAGS, i); - ast::SourceFile& source_file = *result.source_files.at(0); + Result result = analyse(symbol_table, NO_ANALYSIS_FLAGS, i); + CCC_EXIT_IF_ERROR(result); + ast::SourceFile& source_file = *result->source_files.at(0); printer.comment_block_file(source_file.full_path.c_str()); for(const std::unique_ptr& node : source_file.functions) { printer.function(node->as()); @@ -158,8 +199,9 @@ static void print_functions(FILE* out, mdebug::SymbolTable& symbol_table) { static void print_globals(FILE* out, mdebug::SymbolTable& symbol_table) { CppPrinter printer(out); for(s32 i = 0; i < (s32) symbol_table.files.size(); i++) { - HighSymbolTable result = analyse(symbol_table, NO_ANALYSIS_FLAGS, i); - ast::SourceFile& source_file = *result.source_files.at(0); + Result result = analyse(symbol_table, NO_ANALYSIS_FLAGS, i); + CCC_EXIT_IF_ERROR(result); + ast::SourceFile& source_file = *result->source_files.at(0); printer.comment_block_file(source_file.full_path.c_str()); for(const std::unique_ptr& node : source_file.globals) { printer.global_variable(node->as()); @@ -170,13 +212,14 @@ static void print_globals(FILE* out, mdebug::SymbolTable& symbol_table) { static void print_types_deduplicated(FILE* out, mdebug::SymbolTable& symbol_table, const Options& options) { u32 analysis_flags = build_analysis_flags(options.flags); analysis_flags |= DEDUPLICATE_TYPES; - HighSymbolTable high = analyse(symbol_table, analysis_flags); + Result high = analyse(symbol_table, analysis_flags); + CCC_EXIT_IF_ERROR(high); CppPrinter printer(out); - printer.comment_block_beginning(options.input_file); + printer.comment_block_beginning(options.input_file.filename().string().c_str()); printer.comment_block_compiler_version_info(symbol_table); - printer.comment_block_builtin_types(high.deduplicated_types); + printer.comment_block_builtin_types((*high).deduplicated_types); printer.verbose = options.flags & FLAG_VERBOSE; - for(const std::unique_ptr& type : high.deduplicated_types) { + for(const std::unique_ptr& type : high->deduplicated_types) { printer.data_type(*type); } } @@ -185,10 +228,11 @@ static void print_types_per_file(FILE* out, mdebug::SymbolTable& symbol_table, c u32 analysis_flags = build_analysis_flags(options.flags); CppPrinter printer(out); printer.verbose = options.flags & FLAG_VERBOSE; - printer.comment_block_beginning(options.input_file); + printer.comment_block_beginning(options.input_file.filename().string().c_str()); for(s32 i = 0; i < (s32) symbol_table.files.size(); i++) { - HighSymbolTable result = analyse(symbol_table, analysis_flags, i); - ast::SourceFile& source_file = *result.source_files.at(0); + Result result = analyse(symbol_table, analysis_flags, i); + CCC_EXIT_IF_ERROR(result); + ast::SourceFile& source_file = *result->source_files.at(0); printer.comment_block_file(source_file.full_path.c_str()); printer.comment_block_compiler_version_info(symbol_table); printer.comment_block_builtin_types(source_file.data_types); @@ -289,23 +333,35 @@ static void list_sections(FILE* out, const mdebug::SymbolTable& symbol_table, co } static void test(FILE* out, const fs::path& directory) { - verify(fs::is_directory(directory), "Input path is not a directory."); + CCC_CHECK_FATAL(fs::is_directory(directory), "Input path is not a directory."); s32 passed = 0; s32 skipped = 0; for(auto entry : fs::directory_iterator(directory)) { fs::path filename = entry.path().filename(); if(filename.extension() != ".c" && filename.extension() != ".cpp" && filename.extension() != ".md") { printf("%s ", entry.path().filename().string().c_str()); - Module mod = loaders::read_elf_file(entry.path()); + + std::optional> binary = platform::read_binary_file(entry.path()); + CCC_CHECK_FATAL(binary.has_value(), "Failed to open file '%s'.", entry.path().string().c_str()); + + Module mod; + mod.image = std::move(*binary); + Result result = parse_elf_file(mod); + CCC_EXIT_IF_ERROR(result); + ModuleSection* mdebug_section = mod.lookup_section(".mdebug"); if(mdebug_section) { - mdebug::SymbolTable symbol_table = mdebug::parse_symbol_table(mod, *mdebug_section); - ccc::HighSymbolTable high = analyse(symbol_table, DEDUPLICATE_TYPES); + Result symbol_table = mdebug::parse_symbol_table(mod.image, mdebug_section->file_offset); + CCC_EXIT_IF_ERROR(symbol_table); + + Result high = analyse(*symbol_table, DEDUPLICATE_TYPES); + CCC_EXIT_IF_ERROR(high); + CppPrinter printer(out); - for(const std::unique_ptr& type : high.deduplicated_types) { + for(const std::unique_ptr& type : high->deduplicated_types) { printer.data_type(*type); } - for(const std::unique_ptr& file : high.source_files) { + for(const std::unique_ptr& file : high->source_files) { for(const std::unique_ptr& node : file->functions) { printer.function(node->as()); } @@ -313,7 +369,7 @@ static void test(FILE* out, const fs::path& directory) { printer.global_variable(node->as()); } } - print_json(out, high, false); + print_json(out, *high, false); printf("pass\n"); passed++; } else { @@ -365,7 +421,7 @@ static Options parse_args(int argc, char** argv) { options.mode = OutputMode::HELP; require_input_path = false; } else { - verify_not_reached("Unknown command '%s'.", command); + CCC_CHECK_FATAL("Unknown command '%s'.", command); options.mode = OutputMode::BAD_COMMAND; return options; } @@ -387,24 +443,20 @@ static Options parse_args(int argc, char** argv) { if(i + 1 < argc) { options.output_file = argv[++i]; } else { - verify_not_reached("No output path specified."); + CCC_FATAL("No output path specified."); options.mode = OutputMode::BAD_COMMAND; return options; } } else if(strncmp(arg, "--", 2) == 0) { - verify_not_reached("Unknown option '%s'.", arg); - options.mode = OutputMode::BAD_COMMAND; - return options; + CCC_FATAL("Unknown option '%s'.", arg); } else if(input_path_provided) { - verify_not_reached("Multiple input paths specified."); - options.mode = OutputMode::BAD_COMMAND; - return options; + CCC_FATAL("Multiple input paths specified."); } else { options.input_file = argv[i]; input_path_provided = true; } } - verify(!require_input_path || !options.input_file.empty(), "No input path specified."); + CCC_CHECK_FATAL(!require_input_path || !options.input_file.empty(), "No input path specified."); return options; } diff --git a/uncc.cpp b/uncc.cpp index 5a53de0c..2a306ffa 100644 --- a/uncc.cpp +++ b/uncc.cpp @@ -1,4 +1,5 @@ #include "ccc/ccc.h" +#include "platform/file.h" #define HAVE_DECL_BASENAME 1 #include "demanglegnu/demangle.h" @@ -31,7 +32,7 @@ int main(int argc, char** argv) { fs::path elf_path = std::string(argv[1]); fs::path output_path = std::string(argv[2]); - verify(fs::is_directory(output_path), "Output path needs to be a directory!"); + CCC_CHECK_FATAL(fs::is_directory(output_path), "Output path needs to be a directory!"); fs::path sources_file_path = output_path/"SOURCES.txt"; fs::path functions_file_path = output_path/"FUNCTIONS.txt"; @@ -42,33 +43,46 @@ int main(int argc, char** argv) { } Module mod; - mdebug::SymbolTable symbol_table = read_symbol_table(mod, elf_path); - HighSymbolTable high = analyse(symbol_table, DEDUPLICATE_TYPES | STRIP_GENERATED_FUNCTIONS); - map_types_to_files_based_on_this_pointers(high); - map_types_to_files_based_on_reference_count(high); - demangle_all(high); + std::optional> binary = platform::read_binary_file(elf_path); + CCC_CHECK_FATAL(binary.has_value(), "Failed to open file '%s'.", elf_path.string().c_str()); + mod.image = std::move(*binary); + + Result result = parse_elf_file(mod); + CCC_EXIT_IF_ERROR(result); + + ModuleSection* mdebug_section = mod.lookup_section(".mdebug"); + CCC_CHECK_FATAL(mdebug_section != nullptr, "No .mdebug section."); + + Result symbol_table = mdebug::parse_symbol_table(mod.image, mdebug_section->file_offset); + CCC_EXIT_IF_ERROR(symbol_table); + + Result high = analyse(*symbol_table, DEDUPLICATE_TYPES | STRIP_GENERATED_FUNCTIONS); + CCC_EXIT_IF_ERROR(high); + map_types_to_files_based_on_this_pointers(*high); + map_types_to_files_based_on_reference_count(*high); + demangle_all(*high); // Fish out the values of global variables (and static locals). std::vector modules{&mod}; - refine_variables(high, modules); + refine_variables(*high, modules); - fill_in_pointers_to_member_function_definitions(high); + fill_in_pointers_to_member_function_definitions(*high); // Group duplicate source file entries, filter out files not referenced in // the SOURCES.txt file. std::map> path_to_source_file; - for(size_t path_index = 0, source_index = 0; path_index < source_paths.size() && source_index < high.source_files.size(); path_index++, source_index++) { + for(size_t path_index = 0, source_index = 0; path_index < source_paths.size() && source_index < high->source_files.size(); path_index++, source_index++) { // Find the next file referenced in the SOURCES.txt file. std::string source_name = extract_file_name(source_paths[path_index]); - while(source_index < high.source_files.size()) { - std::string symbol_name = extract_file_name(high.source_files[source_index]->full_path); + while(source_index < high->source_files.size()) { + std::string symbol_name = extract_file_name(high->source_files[source_index]->full_path); if(symbol_name == source_name) { break; } printf("Skipping %s (not referenced, expected %s next)\n", symbol_name.c_str(), source_name.c_str()); source_index++; } - if(source_index >= high.source_files.size()) { + if(source_index >= high->source_files.size()) { break; } // Add the file. @@ -87,15 +101,15 @@ int main(int argc, char** argv) { if(path.extension() == ".c" || path.extension() == ".cpp") { // Write .c/.cpp file. if(should_overwrite_file(path)) { - write_c_cpp_file(path, relative_header_path, high, sources, functions_file); + write_c_cpp_file(path, relative_header_path, *high, sources, functions_file); } else { - printf(ANSI_COLOUR_GRAY "Skipping " ANSI_COLOUR_OFF " %s\n", path.string().c_str()); + printf(CCC_ANSI_COLOUR_GRAY "Skipping " CCC_ANSI_COLOUR_OFF " %s\n", path.string().c_str()); } // Write .h file. if(should_overwrite_file(header_path)) { - write_h_file(header_path, relative_header_path.string(), high, sources); + write_h_file(header_path, relative_header_path.string(), *high, sources); } else { - printf(ANSI_COLOUR_GRAY "Skipping " ANSI_COLOUR_OFF " %s\n", header_path.string().c_str()); + printf(CCC_ANSI_COLOUR_GRAY "Skipping " CCC_ANSI_COLOUR_OFF " %s\n", header_path.string().c_str()); } } else { printf("Skipping assembly file %s\n", path.string().c_str()); @@ -104,14 +118,14 @@ int main(int argc, char** argv) { // Write out a lost+found file for types that can't be mapped to a specific // source file if we need it. - if(needs_lost_and_found_file(high)) { - write_lost_and_found_file(output_path/"lost+found.h", high); + if(needs_lost_and_found_file(*high)) { + write_lost_and_found_file(output_path/"lost+found.h", *high); } } static std::vector parse_sources_file(const fs::path& path) { - std::optional file = read_text_file(path); - verify(file.has_value(), "Failed to open file '%s'", path.string().c_str()); + std::optional file = platform::read_text_file(path); + CCC_CHECK_FATAL(file.has_value(), "Failed to open file '%s'", path.string().c_str()); std::string_view input(*file); std::vector sources; while(skip_whitespace(input), input.size() > 0) { @@ -123,8 +137,8 @@ static std::vector parse_sources_file(const fs::path& path) { static FunctionsFile parse_functions_file(const fs::path& path) { FunctionsFile result; - std::optional file = read_text_file(path); - verify(file.has_value(), "Failed to open file '%s'", path.string().c_str()); + std::optional file = platform::read_text_file(path); + CCC_CHECK_FATAL(file.has_value(), "Failed to open file '%s'", path.string().c_str()); result.contents = std::move(*file); // Parse the file. @@ -132,10 +146,10 @@ static FunctionsFile parse_functions_file(const fs::path& path) { std::span* function = nullptr; for(std::span line = eat_line(input); line.data() != nullptr; line = eat_line(input)) { if(line.size() >= 9 && memcmp(line.data(), "@function", 9) == 0) { - verify(line.size() > 10, "Bad @function directive in FUNCTIONS.txt file."); + CCC_CHECK_FATAL(line.size() > 10, "Bad @function directive in FUNCTIONS.txt file."); char* end = nullptr; s32 address = (s32) strtol(line.data() + 10, &end, 16); - verify(end != line.data() + 10, "Bad @function directive in FUNCTIONS.txt file."); + CCC_CHECK_FATAL(end != line.data() + 10, "Bad @function directive in FUNCTIONS.txt file."); function = &result.functions[address]; *function = input.subspan(1); } else if(function) { @@ -193,7 +207,7 @@ static void skip_whitespace(std::string_view& input) { } static bool should_overwrite_file(const fs::path& path) { - std::optional file = read_text_file(path); + std::optional file = platform::read_text_file(path); return !file || file->empty() || file->starts_with("// STATUS: NOT STARTED"); } @@ -222,8 +236,8 @@ static void demangle_all(HighSymbolTable& high) { static void write_c_cpp_file(const fs::path& path, const fs::path& header_path, const HighSymbolTable& high, const std::vector& file_indices, const FunctionsFile& functions_file) { printf("Writing %s\n", path.string().c_str()); - FILE* out = open_file_w(path.c_str()); - verify(out, "Failed to open '%s' for writing.", path.string().c_str()); + FILE* out = fopen(path.string().c_str(), "w"); + CCC_CHECK_FATAL(out, "Failed to open '%s' for writing.", path.string().c_str()); fprintf(out, "// STATUS: NOT STARTED\n\n"); // Configure printing. @@ -241,7 +255,7 @@ static void write_c_cpp_file(const fs::path& path, const fs::path& header_path, for(s32 file_index : file_indices) { const ast::SourceFile& file = *high.source_files[file_index].get(); for(const std::unique_ptr& node : high.deduplicated_types) { - assert(node); + CCC_ASSERT(node); if(node->probably_defined_in_cpp_file && node->files.size() == 1 && node->files[0] == file_index) { printer.data_type(*node); } @@ -269,7 +283,7 @@ static void write_c_cpp_file(const fs::path& path, const fs::path& header_path, static void write_h_file(const fs::path& path, std::string relative_path, const HighSymbolTable& high, const std::vector& file_indices) { printf("Writing %s\n", path.string().c_str()); - FILE* out = open_file_w(path.c_str()); + FILE* out = fopen(path.string().c_str(), "w"); fprintf(out, "// STATUS: NOT STARTED\n\n"); // Configure printing. @@ -338,7 +352,7 @@ static bool needs_lost_and_found_file(const HighSymbolTable& high) { static void write_lost_and_found_file(const fs::path& path, const HighSymbolTable& high) { printf("Writing %s\n", path.string().c_str()); - FILE* out = open_file_w(path.c_str()); + FILE* out = fopen(path.string().c_str(), "w"); CppPrinter printer(out); printer.print_offsets_and_sizes = false; printer.omit_this_parameter = true;