From 6f89bea11f579a735142dc79190124d945f57cd0 Mon Sep 17 00:00:00 2001 From: Foster Brereton Date: Wed, 1 May 2024 12:03:05 -0700 Subject: [PATCH 01/25] wip --- CMakeLists.txt | 2 + _orc-config | 8 ++ include/orc/dwarf.hpp | 10 ++- include/orc/mach_types.hpp | 31 ------- include/orc/orc.hpp | 2 +- include/orc/parse_file.hpp | 3 +- include/orc/settings.hpp | 1 + src/dwarf.cpp | 104 +++++++++++++++++++++-- src/fat.cpp | 29 +------ src/macho.cpp | 166 ++++++++++++++++++++++++++----------- src/main.cpp | 22 +++-- src/orc.cpp | 41 ++++++++- src/parse_file.cpp | 5 +- 13 files changed, 297 insertions(+), 127 deletions(-) delete mode 100644 include/orc/mach_types.hpp diff --git a/CMakeLists.txt b/CMakeLists.txt index ee4c164..a968438 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -92,6 +92,8 @@ set(CMAKE_CXX_EXTENSIONS OFF) set(CMAKE_CXX_STANDARD 20) set(CMAKE_CXX_STANDARD_REQUIRED ON) set(CMAKE_XCODE_GENERATE_SCHEME OFF) +# This does not appear to be set via `CMAKE_CXX_STANDARD`; not sure why. +set(CMAKE_XCODE_ATTRIBUTE_CLANG_CXX_LANGUAGE_STANDARD "c++20") file(GLOB SRC_FILES CONFIGURE_DEPENDS ${PROJECT_SOURCE_DIR}/src/*.cpp) file(GLOB HEADER_FILES CONFIGURE_DEPENDS ${PROJECT_SOURCE_DIR}/include/orc/*.hpp) diff --git a/_orc-config b/_orc-config index 2122283..d4b122b 100644 --- a/_orc-config +++ b/_orc-config @@ -49,6 +49,14 @@ log_level = 'warning' standalone_mode = false +# Scan the binary passed in to the command line for its Mach-O dependencies, specifically any +# dylibs it depends on (and rpaths where they might be found.) Then perform a scan over that +# collection as if it were runtime-linked. +# +# The default value is `false`. + +dylib_scan_mode = false + # If defined, ORC will log output to the specified file. (Normal stream output # is unaffected by the `output_file`.) # diff --git a/include/orc/dwarf.hpp b/include/orc/dwarf.hpp index bfea404..5a0125d 100644 --- a/include/orc/dwarf.hpp +++ b/include/orc/dwarf.hpp @@ -21,14 +21,20 @@ struct dwarf { dwarf(std::uint32_t ofd_index, freader&& s, file_details&& details, - register_dies_callback&& callback); + callbacks&& callbacks); void register_section(std::string name, std::size_t offset, std::size_t size); - void process_all_dies(); + void process_all_dies(); // assumes register die mode die_pair fetch_one_die(std::size_t debug_info_offset, std::size_t cu_address); + void register_dylib(std::string&&); + + void register_rpath(std::string&&); + + void derive_dependencies(); // assumes dylib scan mode + private: struct implementation; std::unique_ptr _impl{nullptr, diff --git a/include/orc/mach_types.hpp b/include/orc/mach_types.hpp deleted file mode 100644 index 6d24e71..0000000 --- a/include/orc/mach_types.hpp +++ /dev/null @@ -1,31 +0,0 @@ -// Copyright 2021 Adobe -// All Rights Reserved. -// -// NOTICE: Adobe permits you to use, modify, and distribute this file in accordance with the terms -// of the Adobe license agreement accompanying it. - -#pragma once - -/**************************************************************************************************/ - -constexpr std::uint32_t MH_MAGIC = 0xfeedface; -constexpr std::uint32_t MH_CIGAM = 0xcefaedfe; -constexpr std::uint32_t MH_MAGIC_64 = 0xfeedfacf; -constexpr std::uint32_t MH_CIGAM_64 = 0xcffaedfe; -constexpr std::uint32_t FAT_MAGIC = 0xcafebabe; -constexpr std::uint32_t FAT_CIGAM = 0xbebafeca; -constexpr std::uint32_t FAT_MAGIC_64 = 0xcafebabf; -constexpr std::uint32_t FAT_CIGAM_64 = 0xbfbafeca; - -using cpu_type_t = int; -using cpu_subtype_t = int; - -constexpr cpu_type_t CPU_ARCH_ABI64 = 0x01000000; -constexpr cpu_type_t CPU_ARCH_ABI64_32 = 0x02000000; -constexpr cpu_type_t CPU_TYPE_X86 = 7; -constexpr cpu_type_t CPU_TYPE_ARM = 12; -constexpr cpu_type_t CPU_TYPE_X86_64 = CPU_TYPE_X86 | CPU_ARCH_ABI64; -constexpr cpu_type_t CPU_TYPE_ARM64 = CPU_TYPE_ARM | CPU_ARCH_ABI64; -constexpr cpu_type_t CPU_TYPE_ARM64_32 = CPU_TYPE_ARM | CPU_ARCH_ABI64_32; - -/**************************************************************************************************/ diff --git a/include/orc/orc.hpp b/include/orc/orc.hpp index e7b673c..900c768 100644 --- a/include/orc/orc.hpp +++ b/include/orc/orc.hpp @@ -46,7 +46,7 @@ bool filter_report(const odrv_report& report); /**************************************************************************************************/ -std::vector orc_process(const std::vector&); +std::vector orc_process(std::vector&&); void orc_reset(); diff --git a/include/orc/parse_file.hpp b/include/orc/parse_file.hpp index 02df050..882507a 100644 --- a/include/orc/parse_file.hpp +++ b/include/orc/parse_file.hpp @@ -228,11 +228,12 @@ constexpr std::decay_t copy(T&& value) noexcept(noexcept(std::decay_t{ using register_dies_callback = std::function; using do_work_callback = std::function)>; -using empool_callback = std::function; +using derived_dependency_callback = std::function&&)>; struct callbacks { register_dies_callback _register_die; do_work_callback _do_work; + derived_dependency_callback _derived_dependency; }; void parse_file(std::string_view object_name, diff --git a/include/orc/settings.hpp b/include/orc/settings.hpp index 804980a..8bd74b6 100644 --- a/include/orc/settings.hpp +++ b/include/orc/settings.hpp @@ -33,6 +33,7 @@ struct settings { bool _forward_to_linker{true}; log_level _log_level{log_level::silent}; bool _standalone_mode{false}; + bool _dylib_scan_mode{false}; bool _print_object_file_list{false}; std::vector _symbol_ignore; std::vector _violation_report; diff --git a/src/dwarf.cpp b/src/dwarf.cpp index 7af00da..0fc4740 100644 --- a/src/dwarf.cpp +++ b/src/dwarf.cpp @@ -393,6 +393,36 @@ enum class process_mode { /**************************************************************************************************/ +std::filesystem::path resolve_dylib(std::string raw_path, + const std::filesystem::path& executable_path, + const std::filesystem::path& loader_path, + const std::vector& rpaths) { + constexpr std::string_view executable_path_k = "@executable_path"; + constexpr std::string_view loader_path_k = "@loader_path"; + constexpr std::string_view rpath_k = "@rpath"; + + if (raw_path.starts_with(executable_path_k)) { + raw_path.replace(0, executable_path_k.size(), executable_path.string()); + } else if (raw_path.starts_with(loader_path_k)) { + raw_path.replace(0, loader_path_k.size(), loader_path.string()); + } else if (raw_path.starts_with(rpath_k)) { + // search rpaths until the desired dylib is actually found. + for (const auto& rpath : rpaths) { + std::string tmp = raw_path; + tmp.replace(0, rpath_k.size(), rpath); + std::filesystem::path candidate = resolve_dylib(tmp, executable_path, loader_path, rpaths); + if (exists(candidate)) { + return candidate; + } + } + throw std::runtime_error("Could not find dependent library: " + raw_path); + } + + return raw_path; +} + +/**************************************************************************************************/ + } // namespace /**************************************************************************************************/ @@ -401,8 +431,8 @@ struct dwarf::implementation { implementation(std::uint32_t ofd_index, freader&& s, file_details&& details, - register_dies_callback&& callback) - : _s(std::move(s)), _details(std::move(details)), _register_dies(std::move(callback)), + callbacks&& callbacks) + : _s(std::move(s)), _details(std::move(details)), _callbacks(std::move(callbacks)), _ofd_index(ofd_index) {} void register_section(const std::string& name, std::size_t offset, std::size_t size); @@ -457,12 +487,42 @@ struct dwarf::implementation { die_pair abbreviation_to_die(std::size_t die_address, process_mode mode); + void register_dylib(std::string&& s) { + _dylibs.emplace_back(std::move(s)); + } + + void register_rpath(std::string&& s) { + _rpaths.emplace_back(std::move(s)); + } + + void derive_dependencies() { + // See https://itwenty.me/posts/01-understanding-rpath/ + // `@executable_path` resolves to the path of the directory containing the executable. + // `@loader_path` resolves to the path of the client doing the loading. + // For executables, `@loader_path` and `@executable_path` mean the same thing. + // TODO: (fosterbrereton) We're going to have to nest this search, aren't we? + // If so, that means we'll need to track the originating file and use it as + // `executable_path`, and then `loader_path` will follow wherever the nesting goes. + + std::filesystem::path executable_path = object_file_ancestry(_ofd_index)._ancestors[0].allocate_path().parent_path(); + std::filesystem::path loader_path = executable_path; + std::vector resolved_dylibs; + std::transform(_dylibs.begin(), _dylibs.end(), std::back_inserter(resolved_dylibs), [&](const auto& raw_dylib){ + return resolve_dylib(raw_dylib, executable_path, loader_path, _rpaths); + }); + + // Send these back to the main engine for ODR scanning processing. + _callbacks._derived_dependency(std::move(resolved_dylibs)); + } + freader _s; file_details _details; - register_dies_callback _register_dies; + callbacks _callbacks; std::vector _abbreviations; std::vector _path; std::vector _decl_files; + std::vector _dylibs; // unresolved + std::vector _rpaths; // unresolved std::unordered_map _type_cache; std::unordered_map _debug_str_cache; cu_header _cu_header; @@ -1538,7 +1598,25 @@ void dwarf::implementation::report_die_processing_failure(std::size_t die_absolu } /**************************************************************************************************/ - +/* + Not sure where to put this, so it's going here. This is specifically in relation to the + dylib scanning mode, where we're looking at a final linked artifact that enumerates the + dylibs it depends upon. + + Debug builds on macOS do not embed symbol information into the binary by default. + Rather, there are "debug maps" that link from the artifact to the `.o` files used to + make it where the symbol information resides. At the time the application is debugged, + the debug maps are used to derive the symbols of the application by pulling them from + the relevant object files. + + Because of this funky artifact->debug map->object file relationship, ORC must also + support debug maps in order to derive and scan the symbols present in a linked artifact. + This also means the final linked binary is not sufficient for a scan; you _also_ need + its associated object files present, _and_ in the location specified by the debug map. + + Apple's "Lazy" DWARF Scheme: https://wiki.dwarfstd.org/Apple%27s_%22Lazy%22_DWARF_Scheme.md + See: https://stackoverflow.com/a/12827463/153535 +*/ void dwarf::implementation::process_all_dies() { if (!_ready && !register_sections_done()) return; assert(_ready); @@ -1644,7 +1722,7 @@ void dwarf::implementation::process_all_dies() { dies.shrink_to_fit(); - _register_dies(std::move(dies)); + _callbacks._register_die(std::move(dies)); } /**************************************************************************************************/ @@ -1726,8 +1804,8 @@ die_pair dwarf::implementation::fetch_one_die(std::size_t debug_info_offset, dwarf::dwarf(std::uint32_t ofd_index, freader&& s, file_details&& details, - register_dies_callback&& callback) - : _impl(new implementation(ofd_index, std::move(s), std::move(details), std::move(callback)), + callbacks&& callbacks) + : _impl(new implementation(ofd_index, std::move(s), std::move(details), std::move(callbacks)), [](auto x) { delete x; }) {} void dwarf::register_section(std::string name, std::size_t offset, std::size_t size) { @@ -1740,4 +1818,16 @@ die_pair dwarf::fetch_one_die(std::size_t debug_info_offset, std::size_t cu_die_ return _impl->fetch_one_die(debug_info_offset, cu_die_address); } +void dwarf::register_dylib(std::string&& s) { + _impl->register_dylib(std::move(s)); +} + +void dwarf::register_rpath(std::string&& s) { + _impl->register_rpath(std::move(s)); +} + +void dwarf::derive_dependencies() { + _impl->derive_dependencies(); +} + /**************************************************************************************************/ diff --git a/src/fat.cpp b/src/fat.cpp index 52eb5ae..94b6aae 100644 --- a/src/fat.cpp +++ b/src/fat.cpp @@ -7,8 +7,9 @@ // identity #include "orc/fat.hpp" -// application -#include "orc/mach_types.hpp" +// mach-o +#include +#include /**************************************************************************************************/ @@ -16,30 +17,6 @@ namespace { /**************************************************************************************************/ -struct fat_header { - std::uint32_t magic{0}; - std::uint32_t nfat_arch{0}; -}; - -struct fat_arch { - cpu_type_t cputype{0}; - cpu_subtype_t cpusubtype{0}; - std::uint32_t offset{0}; - std::uint32_t size{0}; - std::uint32_t align{0}; -}; - -struct fat_arch_64 { - cpu_type_t cputype{0}; - cpu_subtype_t cpusubtype{0}; - std::uint64_t offset{0}; - std::uint64_t size{0}; - std::uint32_t align{0}; - std::uint32_t reserved{0}; -}; - -/**************************************************************************************************/ - const char* cputype_to_string(cpu_type_t cputype) { switch (cputype) { case CPU_TYPE_X86: diff --git a/src/macho.cpp b/src/macho.cpp index eb76b48..19e95f9 100644 --- a/src/macho.cpp +++ b/src/macho.cpp @@ -7,12 +7,14 @@ // identity #include "orc/macho.hpp" +// mach-o +#include + // tbb #include // application #include "orc/dwarf.hpp" -#include "orc/mach_types.hpp" #include "orc/object_file_registry.hpp" #include "orc/settings.hpp" #include "orc/str.hpp" @@ -23,23 +25,6 @@ namespace { /**************************************************************************************************/ -struct section_64 { - char sectname[16]{0}; - char segname[16]{0}; - std::uint64_t addr{0}; - std::uint64_t size{0}; - std::uint32_t offset{0}; - std::uint32_t align{0}; - std::uint32_t reloff{0}; - std::uint32_t nreloc{0}; - std::uint32_t flags{0}; - std::uint32_t reserved1{0}; - std::uint32_t reserved2{0}; - std::uint32_t reserved3{0}; -}; - -/**************************************************************************************************/ - void read_lc_segment_64_section(freader& s, const file_details& details, dwarf& dwarf) { auto section = read_pod(s); if (details._needs_byteswap) { @@ -65,24 +50,6 @@ void read_lc_segment_64_section(freader& s, const file_details& details, dwarf& /**************************************************************************************************/ -using vm_prot_t = int; - -struct segment_command_64 { - std::uint32_t cmd{0}; - std::uint32_t cmdsize{0}; - char segname[16]{0}; - std::uint64_t vmaddr{0}; - std::uint64_t vmsize{0}; - std::uint64_t fileoff{0}; - std::uint64_t filesize{0}; - vm_prot_t maxprot{0}; - vm_prot_t initprot{0}; - std::uint32_t nsects{0}; - std::uint32_t flags{0}; -}; - -/**************************************************************************************************/ - void read_lc_segment_64(freader& s, const file_details& details, dwarf& dwarf) { auto lc = read_pod(s); if (details._needs_byteswap) { @@ -106,6 +73,83 @@ void read_lc_segment_64(freader& s, const file_details& details, dwarf& dwarf) { /**************************************************************************************************/ +void read_lc_load_dylib(freader& s, const file_details& details, dwarf& dwarf) { + const auto command_start = s.tellg(); + auto lc = read_pod(s); + if (details._needs_byteswap) { + endian_swap(lc.cmd); + endian_swap(lc.cmdsize); + endian_swap(lc.dylib.name.offset); + endian_swap(lc.dylib.timestamp); + endian_swap(lc.dylib.current_version); // sufficient? + endian_swap(lc.dylib.compatibility_version); // sufficient? + } + + const std::string_view dylib_path = s.read_c_string_view(); + dwarf.register_dylib(std::string(dylib_path)); + + const auto padding = lc.cmdsize - (s.tellg() - command_start); + s.seekg(padding, std::ios::cur); +} + +/**************************************************************************************************/ + +void read_lc_rpath(freader& s, const file_details& details, dwarf& dwarf) { + const auto command_start = s.tellg(); + auto lc = read_pod(s); + if (details._needs_byteswap) { + endian_swap(lc.cmd); + endian_swap(lc.cmdsize); + endian_swap(lc.path.offset); + } + + const std::string_view dylib_path = s.read_c_string_view(); + dwarf.register_rpath(std::string(dylib_path)); + + const auto padding = lc.cmdsize - (s.tellg() - command_start); + s.seekg(padding, std::ios::cur); +} + +/**************************************************************************************************/ + +struct symtab_command { + std::uint32_t cmd; + std::uint32_t cmdsize; + std::uint32_t symoff; + std::uint32_t nsyms; + std::uint32_t stroff; + std::uint32_t strsize; +}; + +/* + The symtab_command contains the offsets and sizes of the link-edit 4.3BSD + "stab" style symbol table information as described in the header files + and . +*/ +void read_stabs(freader& s, + const file_details& details, + dwarf& dwarf, + std::uint32_t symbol_count) { +} + +void read_lc_symtab(freader& s, const file_details& details, dwarf& dwarf) { + auto lc = read_pod(s); + if (details._needs_byteswap) { + endian_swap(lc.cmd); + endian_swap(lc.cmdsize); + endian_swap(lc.symoff); + endian_swap(lc.nsyms); + endian_swap(lc.stroff); + endian_swap(lc.strsize); + } + + temp_seek(s, lc.symoff, [&](){ + read_stabs(s, details, dwarf, lc.nsyms); + }); +} + +/**************************************************************************************************/ + struct load_command { std::uint32_t cmd{0}; std::uint32_t cmdsize{0}; @@ -123,12 +167,19 @@ void read_load_command(freader& s, const file_details& details, dwarf& dwarf) { return command; }); - static constexpr std::uint32_t LC_SEGMENT_64 = 0x19; - switch (command.cmd) { case LC_SEGMENT_64: read_lc_segment_64(s, details, dwarf); break; + case LC_LOAD_DYLIB: + read_lc_load_dylib(s, details, dwarf); + break; + case LC_RPATH: + read_lc_rpath(s, details, dwarf); + break; + case LC_SYMTAB: + read_lc_symtab(s, details, dwarf); + break; default: s.seekg(command.cmdsize, std::ios::cur); } @@ -162,7 +213,7 @@ struct mach_header { dwarf dwarf_from_macho(std::uint32_t ofd_index, freader&& s, file_details&& details, - register_dies_callback&& callback) { + callbacks&& callbacks) { std::size_t load_command_sz{0}; if (details._is_64_bit) { @@ -195,7 +246,7 @@ dwarf dwarf_from_macho(std::uint32_t ofd_index, // REVISIT: (fbrereto) I'm not happy that dwarf is an out-arg to read_load_command. // Maybe pass in some kind of lambda that'll get called when a relevant DWARF section // is found? A problem for later... - dwarf dwarf(ofd_index, copy(s), copy(details), std::move(callback)); + dwarf dwarf(ofd_index, copy(s), copy(details), std::move(callbacks)); for (std::size_t i = 0; i < load_command_sz; ++i) { read_load_command(s, details, dwarf); @@ -217,14 +268,25 @@ void read_macho(object_ancestry&& ancestry, callbacks callbacks) { callbacks._do_work([_ancestry = std::move(ancestry), _s = std::move(s), _details = std::move(details), - _callback = std::move(callbacks._register_die)]() mutable { - ++globals::instance()._object_file_count; + _callbacks = callbacks]() mutable { + const bool process_die_mode = static_cast(_callbacks._register_die); + const bool derive_dylib_mode = static_cast(_callbacks._derived_dependency); - std::uint32_t ofd_index = static_cast(object_file_register(std::move(_ancestry), copy(_details))); - dwarf dwarf = dwarf_from_macho(ofd_index, std::move(_s), std::move(_details), - std::move(_callback)); + if (process_die_mode) { + ++globals::instance()._object_file_count; + } - dwarf.process_all_dies(); + std::uint32_t ofd_index = static_cast(object_file_register(std::move(_ancestry), copy(_details))); + dwarf dwarf = dwarf_from_macho(ofd_index, std::move(_s), std::move(_details), copy(_callbacks)); + + if (process_die_mode) { + dwarf.process_all_dies(); + } else if (derive_dylib_mode) { + dwarf.derive_dependencies(); + } else { + // If we're here, Something Bad has happened. + std::terminate(); + } }); } @@ -236,8 +298,18 @@ dwarf dwarf_from_macho(std::uint32_t ofd_index, register_dies_callback&& callbac s.seekg(entry._details._offset); + callbacks callbacks { + std::move(callback), + }; + return dwarf_from_macho(ofd_index, std::move(s), copy(entry._details), - std::move(callback)); + std::move(callbacks)); +} + +/**************************************************************************************************/ + +std::vector macho_derive_dylibs(const std::filesystem::path& root_binary) { + return std::vector(); } /**************************************************************************************************/ diff --git a/src/main.cpp b/src/main.cpp index 44acce8..8b8a504 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -70,8 +70,7 @@ std::string exec(const char* cmd) { return result; } -void open_output_file(const std::string& a, const std::string& b) -{ +void open_output_file(const std::string& a, const std::string& b) { std::filesystem::path path(b); if (!a.empty()) { path = a + '.' + b; @@ -82,7 +81,6 @@ void open_output_file(const std::string& a, const std::string& b) /**************************************************************************************************/ void process_orc_config_file(const char* bin_path_string) { - std::filesystem::path pwd(std::filesystem::current_path()); std::filesystem::path bin_path(pwd / bin_path_string); std::filesystem::path config_path; @@ -121,6 +119,7 @@ void process_orc_config_file(const char* bin_path_string) { app_settings._max_violation_count = settings["max_error_count"].value_or(0); app_settings._forward_to_linker = settings["forward_to_linker"].value_or(true); app_settings._standalone_mode = settings["standalone_mode"].value_or(false); + app_settings._dylib_scan_mode = settings["dylib_scan_mode"].value_or(false); app_settings._parallel_processing = settings["parallel_processing"].value_or(true); app_settings._filter_redundant = settings["filter_redundant"].value_or(true); app_settings._print_object_file_list = settings["print_object_file_list"].value_or(false); @@ -151,6 +150,14 @@ void process_orc_config_file(const char* bin_path_string) { } } + if (app_settings._standalone_mode && app_settings._dylib_scan_mode) { + throw std::logic_error("Both standalone and dylib scanning mode are enabled. Pick one."); + } + + if (app_settings._dylib_scan_mode) { + app_settings._forward_to_linker = false; + } + auto read_string_list = [&_settings = settings](const char* name) { std::vector result; if (auto* array = _settings.get_as(name)) { @@ -259,7 +266,7 @@ cmdline_results process_command_line(int argc, char** argv) { }); } - if (settings::instance()._standalone_mode) { + if (settings::instance()._standalone_mode || settings::instance()._dylib_scan_mode) { for (std::size_t i{1}; i < argc; ++i) { result._file_object_list.push_back(argv[i]); } @@ -498,10 +505,9 @@ int main(int argc, char** argv) try { process_orc_config_file(argv[0]); cmdline_results cmdline = process_command_line(argc, argv); - const auto& file_list = cmdline._file_object_list; if (settings::instance()._print_object_file_list) { - for (const auto& input_path : file_list) { + for (const auto& input_path : cmdline._file_object_list) { cout_safe([&](auto& s){ s << input_path.string() << '\n'; }); @@ -512,11 +518,11 @@ int main(int argc, char** argv) try { maybe_forward_to_linker(argc, argv, cmdline); - if (file_list.empty()) { + if (cmdline._file_object_list.empty()) { throw std::runtime_error("ORC could not find files to process"); } - std::vector reports = orc_process(file_list); + std::vector reports = orc_process(std::move(cmdline._file_object_list)); std::vector violations; for (const auto& report : reports) { diff --git a/src/orc.cpp b/src/orc.cpp index 68a2ae6..aa86a96 100644 --- a/src/orc.cpp +++ b/src/orc.cpp @@ -453,8 +453,42 @@ die* enforce_odrv_for_die_list(die* base, std::vector& results) { /**************************************************************************************************/ -std::vector orc_process(const std::vector& file_list) { - // First stage: process all the DIEs +std::vector orc_process(std::vector&& file_list) { + std::mutex macho_derived_dependencies_mutex; + + // First stage: dependency/dylib preprocessing + if (settings::instance()._dylib_scan_mode) { + // dylib scan mode involves a pre-processing step where we parse the file list + // and from those Mach-O files, derive additional files they depend upon. + // Instead, we should be collecting additional files to process in the next + // stage. + for (const auto& input_path : file_list) { + do_work([_input_path = input_path, &_mutex = macho_derived_dependencies_mutex, &_list = file_list] { + if (!exists(_input_path)) { + throw std::runtime_error("file " + _input_path.string() + " does not exist"); + } + + freader input(_input_path); + callbacks callbacks = { + register_dies_callback(), + do_work, + [&](std::vector&& p){ + std::lock_guard m(_mutex); + _list.insert(_list.end(), + std::move_iterator(p.begin()), + std::move_iterator(p.end())); + } + }; + + parse_file(_input_path.string(), object_ancestry(), input, input.size(), + std::move(callbacks)); + }); + } + } + + work().wait(); + + // Second stage: process all the DIEs for (const auto& input_path : file_list) { do_work([_input_path = input_path] { if (!exists(_input_path)) { @@ -465,6 +499,7 @@ std::vector orc_process(const std::vector& f callbacks callbacks = { register_dies, do_work, + derived_dependency_callback(), }; parse_file(_input_path.string(), object_ancestry(), input, input.size(), @@ -474,7 +509,7 @@ std::vector orc_process(const std::vector& f work().wait(); - // Second stage: review DIEs for ODRVs + // Third stage: review DIEs for ODRVs std::vector result; // We now subdivide the work. We do so by looking at the total number of entries diff --git a/src/parse_file.cpp b/src/parse_file.cpp index d335c28..3d4a9a1 100644 --- a/src/parse_file.cpp +++ b/src/parse_file.cpp @@ -19,10 +19,13 @@ #include #include // close +// mach-o +#include +#include + // application #include "orc/ar.hpp" #include "orc/fat.hpp" -#include "orc/mach_types.hpp" #include "orc/macho.hpp" #include "orc/orc.hpp" From b4b03b5ab4d92c28678551a18ae56827d9e90918 Mon Sep 17 00:00:00 2001 From: Foster Brereton Date: Wed, 1 May 2024 16:00:24 -0700 Subject: [PATCH 02/25] first pass at dependency scanning. For locally build objects, traverses `LC_SYMTAB` to find the related object files, aka the debug map. --- include/orc/dwarf.hpp | 2 +- src/dwarf.cpp | 8 ++++ src/macho.cpp | 100 +++++++++++++++++++++++++++++------------- src/orc.cpp | 13 ++++-- 4 files changed, 87 insertions(+), 36 deletions(-) diff --git a/include/orc/dwarf.hpp b/include/orc/dwarf.hpp index 5a0125d..2dc0049 100644 --- a/include/orc/dwarf.hpp +++ b/include/orc/dwarf.hpp @@ -30,8 +30,8 @@ struct dwarf { die_pair fetch_one_die(std::size_t debug_info_offset, std::size_t cu_address); void register_dylib(std::string&&); - void register_rpath(std::string&&); + void register_additional_object_files(std::vector&&); void derive_dependencies(); // assumes dylib scan mode diff --git a/src/dwarf.cpp b/src/dwarf.cpp index 0fc4740..be43b66 100644 --- a/src/dwarf.cpp +++ b/src/dwarf.cpp @@ -495,6 +495,10 @@ struct dwarf::implementation { _rpaths.emplace_back(std::move(s)); } + void register_additional_object_files(std::vector&& paths) { + _callbacks._derived_dependency(std::move(paths)); + } + void derive_dependencies() { // See https://itwenty.me/posts/01-understanding-rpath/ // `@executable_path` resolves to the path of the directory containing the executable. @@ -1826,6 +1830,10 @@ void dwarf::register_rpath(std::string&& s) { _impl->register_rpath(std::move(s)); } +void dwarf::register_additional_object_files(std::vector&& paths) { + _impl->register_additional_object_files(std::move(paths)); +} + void dwarf::derive_dependencies() { _impl->derive_dependencies(); } diff --git a/src/macho.cpp b/src/macho.cpp index 19e95f9..ebfa084 100644 --- a/src/macho.cpp +++ b/src/macho.cpp @@ -9,6 +9,8 @@ // mach-o #include +#include +#include // tbb #include @@ -111,25 +113,50 @@ void read_lc_rpath(freader& s, const file_details& details, dwarf& dwarf) { } /**************************************************************************************************/ - -struct symtab_command { - std::uint32_t cmd; - std::uint32_t cmdsize; - std::uint32_t symoff; - std::uint32_t nsyms; - std::uint32_t stroff; - std::uint32_t strsize; -}; - /* - The symtab_command contains the offsets and sizes of the link-edit 4.3BSD - "stab" style symbol table information as described in the header files - and . + See https://sourceware.org/gdb/current/onlinedocs/stabs.html/ */ void read_stabs(freader& s, const file_details& details, dwarf& dwarf, - std::uint32_t symbol_count) { + std::uint32_t symbol_count, + std::uint32_t string_offset) { + if (!details._is_64_bit) throw std::runtime_error("Need support for non-64-bit STABs."); + std::vector additional_object_files; + while (symbol_count--) { + auto entry = read_pod(s); + if (entry.n_type != N_OSO) continue; + // TODO: Comparing the modified file time ensures the object file has not changed + // since the application binary was linked. We should compare this time given to + // us against the modified time of the file on-disk. + // const auto modified_time = entry.n_value; + + std::filesystem::path path = temp_seek(s, details._offset + string_offset + entry.n_un.n_strx, [&](){ + return s.read_c_string_view(); + }); + + // Some entries have been observed to contain the `.o` file as a parenthetical to the + // `.a` file that contains it. e.g., `/path/to/bar.a(foo.o)`. For our purposes we'll + // trim off the parenthetical and include the entire `.a` file. Although this could + // introduce extra symbols, they are likely to be included by other STAB entries + // anyhow. + // + // TL;DR: If the filename has an open parentheses in it, remove it and all that + // comes after it. + std::string filename = path.filename().string(); + if (const auto pos = filename.find('('); pos != std::string::npos) { + path = path.parent_path() / filename.substr(0, pos); + } + + additional_object_files.push_back(std::move(path)); + } + // don't think I need these here, as we should sort/make unique the total set + // of additional object files within `orc_process`. Saving until I'm sure. + // + // std::sort(additional_object_files.begin(), additional_object_files.end()); + // auto new_end = std::unique(additional_object_files.begin(), additional_object_files.end()); + // additional_object_files.erase(new_end, additional_object_files.end()); + dwarf.register_additional_object_files(std::move(additional_object_files)); } void read_lc_symtab(freader& s, const file_details& details, dwarf& dwarf) { @@ -143,8 +170,8 @@ void read_lc_symtab(freader& s, const file_details& details, dwarf& dwarf) { endian_swap(lc.strsize); } - temp_seek(s, lc.symoff, [&](){ - read_stabs(s, details, dwarf, lc.nsyms); + temp_seek(s, details._offset + lc.symoff, [&](){ + read_stabs(s, details, dwarf, lc.nsyms, lc.stroff); }); } @@ -157,7 +184,7 @@ struct load_command { /**************************************************************************************************/ -void read_load_command(freader& s, const file_details& details, dwarf& dwarf) { +void read_load_command(freader& s, const file_details& details, dwarf& dwarf, bool derive_dylib_mode) { auto command = temp_seek(s, [&] { auto command = read_pod(s); if (details._needs_byteswap) { @@ -168,20 +195,27 @@ void read_load_command(freader& s, const file_details& details, dwarf& dwarf) { }); switch (command.cmd) { - case LC_SEGMENT_64: + case LC_SEGMENT_64: { read_lc_segment_64(s, details, dwarf); - break; - case LC_LOAD_DYLIB: - read_lc_load_dylib(s, details, dwarf); - break; - case LC_RPATH: - read_lc_rpath(s, details, dwarf); - break; - case LC_SYMTAB: - read_lc_symtab(s, details, dwarf); - break; - default: + } break; + case LC_LOAD_DYLIB: { + if (derive_dylib_mode) { + read_lc_load_dylib(s, details, dwarf); + } + } break; + case LC_RPATH: { + if (derive_dylib_mode) { + read_lc_rpath(s, details, dwarf); + } + } break; + case LC_SYMTAB: { + if (derive_dylib_mode) { + read_lc_symtab(s, details, dwarf); + } + } break; + default: { s.seekg(command.cmdsize, std::ios::cur); + } break; } } @@ -215,6 +249,8 @@ dwarf dwarf_from_macho(std::uint32_t ofd_index, file_details&& details, callbacks&& callbacks) { std::size_t load_command_sz{0}; + // TODO: The Mach-O reader really needs its own context/state machine to hold this kind of stuff. + const bool derive_dylib_mode = static_cast(callbacks._derived_dependency); if (details._is_64_bit) { auto header = read_pod(s); @@ -245,11 +281,12 @@ dwarf dwarf_from_macho(std::uint32_t ofd_index, // REVISIT: (fbrereto) I'm not happy that dwarf is an out-arg to read_load_command. // Maybe pass in some kind of lambda that'll get called when a relevant DWARF section - // is found? A problem for later... + // is found? Or maybe `read_load_command` should be a member function of `dwarf`? + // A problem for another time... dwarf dwarf(ofd_index, copy(s), copy(details), std::move(callbacks)); for (std::size_t i = 0; i < load_command_sz; ++i) { - read_load_command(s, details, dwarf); + read_load_command(s, details, dwarf, derive_dylib_mode); } return dwarf; @@ -269,6 +306,7 @@ void read_macho(object_ancestry&& ancestry, callbacks._do_work([_ancestry = std::move(ancestry), _s = std::move(s), _details = std::move(details), _callbacks = callbacks]() mutable { + // TODO: The Mach-O reader really needs its own context/state machine to hold this kind of stuff. const bool process_die_mode = static_cast(_callbacks._register_die); const bool derive_dylib_mode = static_cast(_callbacks._derived_dependency); diff --git a/src/orc.cpp b/src/orc.cpp index aa86a96..247d397 100644 --- a/src/orc.cpp +++ b/src/orc.cpp @@ -283,10 +283,9 @@ void do_work(std::function f) { auto doit = [_f = std::move(f)]() { try { _f(); - } catch (const std::exception& error) { - cerr_safe([&](auto& s) { s << "task error: " << error.what() << '\n'; }); } catch (...) { - cerr_safe([&](auto& s) { s << "task error: unknown\n"; }); + assert(!"The program is ill-structured. Catch exceptions before they hit here."); + std::terminate(); } }; @@ -488,11 +487,17 @@ std::vector orc_process(std::vector&& file_l work().wait(); + // eliminate duplicate object files, if any + std::sort(file_list.begin(), file_list.end()); + auto new_end = std::unique(file_list.begin(), file_list.end()); + file_list.erase(new_end, file_list.end()); + // Second stage: process all the DIEs for (const auto& input_path : file_list) { do_work([_input_path = input_path] { if (!exists(_input_path)) { - throw std::runtime_error("file " + _input_path.string() + " does not exist"); + cerr_safe([&](auto& s) { s << "file " << _input_path.string() << " does not exist\n"; }); + return; } freader input(_input_path); From f6d9f5e97649b59cc93c3be7234541851274aaab Mon Sep 17 00:00:00 2001 From: Foster Brereton Date: Wed, 1 May 2024 16:42:20 -0700 Subject: [PATCH 03/25] adding justfile --- justfile | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) create mode 100644 justfile diff --git a/justfile b/justfile new file mode 100644 index 0000000..39924d9 --- /dev/null +++ b/justfile @@ -0,0 +1,26 @@ +set shell := ["bash", "-uc"] + +alias g := generate + +# Self-help +default: + @just --list --justfile {{justfile()}} + +build_instruction := if path_exists("build") == "true" { "echo '`build/`' exists" } else { "mkdir build" } + +# make the `build` directory where cmake files will go +mkdir: + @{{build_instruction}} + +# Generate the cmake project (Tracy disabled by default) +generate: mkdir + cd build && cmake .. -GXcode -DTRACY_ENABLE=OFF + +# Erase and rebuild the `build` folder +nuke: + rm -rf build/ + just --justfile {{justfile()}} generate + +# Enable Tracy support +tracy: + cd build && cmake .. -GXcode -DTRACY_ENABLE=ON From d60d48b0305e0db15a5c1ecc6bf85180894d30f9 Mon Sep 17 00:00:00 2001 From: Foster Brereton Date: Wed, 1 May 2024 17:01:26 -0700 Subject: [PATCH 04/25] justfile tweaks --- justfile | 33 +++++++++++++++++++-------------- 1 file changed, 19 insertions(+), 14 deletions(-) diff --git a/justfile b/justfile index 39924d9..c894b81 100644 --- a/justfile +++ b/justfile @@ -1,26 +1,31 @@ -set shell := ["bash", "-uc"] +# For documentation on `just`, see https://github.com/casey/just +# and the online manual https://just.systems/man/en/ +# This set of recipes have only been tested on macOS. -alias g := generate +set shell := ["bash", "-uc"] # Self-help +[private] # Don't print `default` as one of the recipes default: - @just --list --justfile {{justfile()}} - -build_instruction := if path_exists("build") == "true" { "echo '`build/`' exists" } else { "mkdir build" } + @just --list --justfile {{justfile()}} --list-heading $'Usage: `just recipe` where `recipe` is one of:\n' -# make the `build` directory where cmake files will go +# Make `build/` if it does not exist mkdir: - @{{build_instruction}} + #!/usr/bin/env bash + set -euxo pipefail + if [ ! -d build ]; then + mkdir build + fi -# Generate the cmake project (Tracy disabled by default) -generate: mkdir +# Generate the cmake project (Tracy disabled) +gen: mkdir cd build && cmake .. -GXcode -DTRACY_ENABLE=OFF -# Erase and rebuild the `build` folder -nuke: +# Erase and rebuild `build/` folder +[confirm("Are you sure you want to delete `build/`? (y/N)")] +nuke: && mkdir gen rm -rf build/ - just --justfile {{justfile()}} generate -# Enable Tracy support -tracy: +# Generate the cmake project (Tracy enabled) +tracy: mkdir cd build && cmake .. -GXcode -DTRACY_ENABLE=ON From 3de9c004f4c6b8a99ccf6e84a4af74e089878c6e Mon Sep 17 00:00:00 2001 From: Foster Brereton Date: Thu, 2 May 2024 10:29:15 -0700 Subject: [PATCH 05/25] refactoring to keep the right work in the right sources --- _orc-config | 2 +- include/orc/dwarf.hpp | 6 - include/orc/macho.hpp | 13 ++ src/dwarf.cpp | 80 ------------ src/macho.cpp | 287 +++++++++++++++++++++++++----------------- src/orc.cpp | 30 ++--- 6 files changed, 195 insertions(+), 223 deletions(-) diff --git a/_orc-config b/_orc-config index d4b122b..379ed50 100644 --- a/_orc-config +++ b/_orc-config @@ -55,7 +55,7 @@ standalone_mode = false # # The default value is `false`. -dylib_scan_mode = false +dylib_scan_mode = true # If defined, ORC will log output to the specified file. (Normal stream output # is unaffected by the `output_file`.) diff --git a/include/orc/dwarf.hpp b/include/orc/dwarf.hpp index 2dc0049..7eea2c4 100644 --- a/include/orc/dwarf.hpp +++ b/include/orc/dwarf.hpp @@ -29,12 +29,6 @@ struct dwarf { die_pair fetch_one_die(std::size_t debug_info_offset, std::size_t cu_address); - void register_dylib(std::string&&); - void register_rpath(std::string&&); - void register_additional_object_files(std::vector&&); - - void derive_dependencies(); // assumes dylib scan mode - private: struct implementation; std::unique_ptr _impl{nullptr, diff --git a/include/orc/macho.hpp b/include/orc/macho.hpp index 19a1943..ad70883 100644 --- a/include/orc/macho.hpp +++ b/include/orc/macho.hpp @@ -14,6 +14,15 @@ /**************************************************************************************************/ +template +void move_append(C& dst, C& src) { + dst.insert(dst.end(), + std::move_iterator(src.begin()), + std::move_iterator(src.end())); +} + +/**************************************************************************************************/ + void read_macho(object_ancestry&& ancestry, freader s, std::istream::pos_type end_pos, @@ -25,3 +34,7 @@ void read_macho(object_ancestry&& ancestry, struct dwarf dwarf_from_macho(std::uint32_t ofd_index, register_dies_callback&& callback); /**************************************************************************************************/ + +std::vector macho_derive_dylibs(const std::filesystem::path& root_binary); + +/**************************************************************************************************/ diff --git a/src/dwarf.cpp b/src/dwarf.cpp index be43b66..63f1b16 100644 --- a/src/dwarf.cpp +++ b/src/dwarf.cpp @@ -393,36 +393,6 @@ enum class process_mode { /**************************************************************************************************/ -std::filesystem::path resolve_dylib(std::string raw_path, - const std::filesystem::path& executable_path, - const std::filesystem::path& loader_path, - const std::vector& rpaths) { - constexpr std::string_view executable_path_k = "@executable_path"; - constexpr std::string_view loader_path_k = "@loader_path"; - constexpr std::string_view rpath_k = "@rpath"; - - if (raw_path.starts_with(executable_path_k)) { - raw_path.replace(0, executable_path_k.size(), executable_path.string()); - } else if (raw_path.starts_with(loader_path_k)) { - raw_path.replace(0, loader_path_k.size(), loader_path.string()); - } else if (raw_path.starts_with(rpath_k)) { - // search rpaths until the desired dylib is actually found. - for (const auto& rpath : rpaths) { - std::string tmp = raw_path; - tmp.replace(0, rpath_k.size(), rpath); - std::filesystem::path candidate = resolve_dylib(tmp, executable_path, loader_path, rpaths); - if (exists(candidate)) { - return candidate; - } - } - throw std::runtime_error("Could not find dependent library: " + raw_path); - } - - return raw_path; -} - -/**************************************************************************************************/ - } // namespace /**************************************************************************************************/ @@ -487,46 +457,12 @@ struct dwarf::implementation { die_pair abbreviation_to_die(std::size_t die_address, process_mode mode); - void register_dylib(std::string&& s) { - _dylibs.emplace_back(std::move(s)); - } - - void register_rpath(std::string&& s) { - _rpaths.emplace_back(std::move(s)); - } - - void register_additional_object_files(std::vector&& paths) { - _callbacks._derived_dependency(std::move(paths)); - } - - void derive_dependencies() { - // See https://itwenty.me/posts/01-understanding-rpath/ - // `@executable_path` resolves to the path of the directory containing the executable. - // `@loader_path` resolves to the path of the client doing the loading. - // For executables, `@loader_path` and `@executable_path` mean the same thing. - // TODO: (fosterbrereton) We're going to have to nest this search, aren't we? - // If so, that means we'll need to track the originating file and use it as - // `executable_path`, and then `loader_path` will follow wherever the nesting goes. - - std::filesystem::path executable_path = object_file_ancestry(_ofd_index)._ancestors[0].allocate_path().parent_path(); - std::filesystem::path loader_path = executable_path; - std::vector resolved_dylibs; - std::transform(_dylibs.begin(), _dylibs.end(), std::back_inserter(resolved_dylibs), [&](const auto& raw_dylib){ - return resolve_dylib(raw_dylib, executable_path, loader_path, _rpaths); - }); - - // Send these back to the main engine for ODR scanning processing. - _callbacks._derived_dependency(std::move(resolved_dylibs)); - } - freader _s; file_details _details; callbacks _callbacks; std::vector _abbreviations; std::vector _path; std::vector _decl_files; - std::vector _dylibs; // unresolved - std::vector _rpaths; // unresolved std::unordered_map _type_cache; std::unordered_map _debug_str_cache; cu_header _cu_header; @@ -1822,20 +1758,4 @@ die_pair dwarf::fetch_one_die(std::size_t debug_info_offset, std::size_t cu_die_ return _impl->fetch_one_die(debug_info_offset, cu_die_address); } -void dwarf::register_dylib(std::string&& s) { - _impl->register_dylib(std::move(s)); -} - -void dwarf::register_rpath(std::string&& s) { - _impl->register_rpath(std::move(s)); -} - -void dwarf::register_additional_object_files(std::vector&& paths) { - _impl->register_additional_object_files(std::move(paths)); -} - -void dwarf::derive_dependencies() { - _impl->derive_dependencies(); -} - /**************************************************************************************************/ diff --git a/src/macho.cpp b/src/macho.cpp index ebfa084..49ee57c 100644 --- a/src/macho.cpp +++ b/src/macho.cpp @@ -15,6 +15,9 @@ // tbb #include +// stlab +#include + // application #include "orc/dwarf.hpp" #include "orc/object_file_registry.hpp" @@ -27,9 +30,53 @@ namespace { /**************************************************************************************************/ -void read_lc_segment_64_section(freader& s, const file_details& details, dwarf& dwarf) { - auto section = read_pod(s); - if (details._needs_byteswap) { +struct macho_reader { + macho_reader(std::uint32_t ofd_index, + freader&& s, + file_details&& details, + callbacks&& callbacks) : + _process_die_mode(static_cast(callbacks._register_die)), + _derive_dylib_mode(static_cast(callbacks._derived_dependency)), + _ofd_index(ofd_index), + _s(std::move(s)), + _details(std::move(details)), + _derived_dependency(_derive_dylib_mode ? callbacks._derived_dependency : derived_dependency_callback()), + _dwarf(ofd_index, copy(_s), copy(_details), std::move(callbacks)) { + populate_dwarf(); + } + + struct dwarf& dwarf() & { return _dwarf; } + struct dwarf&& dwarf() && { return std::move(_dwarf); } + + void derive_dependencies(); + + const bool _process_die_mode{false}; + const bool _derive_dylib_mode{false}; + +private: + void populate_dwarf(); + void read_load_command(); + void read_lc_segment_64(); + void read_lc_segment_64_section(); + void read_lc_load_dylib(); + void read_lc_rpath(); + void read_lc_symtab(); + void read_stabs(std::uint32_t symbol_count, std::uint32_t string_offset); + + const std::uint32_t _ofd_index{0}; + freader _s; + const file_details _details; + derived_dependency_callback _derived_dependency; + std::vector _unresolved_dylibs; + std::vector _rpaths; + struct dwarf _dwarf; // must be last +}; + +/**************************************************************************************************/ + +void macho_reader::read_lc_segment_64_section() { + auto section = read_pod(_s); + if (_details._needs_byteswap) { // endian_swap(section.sectname[16]); // endian_swap(section.segname[16]); endian_swap(section.addr); @@ -46,15 +93,15 @@ void read_lc_segment_64_section(freader& s, const file_details& details, dwarf& if (rstrip(section.segname) != "__DWARF") return; - dwarf.register_section(rstrip(section.sectname), details._offset + section.offset, - section.size); + _dwarf.register_section(rstrip(section.sectname), _details._offset + section.offset, + section.size); } /**************************************************************************************************/ -void read_lc_segment_64(freader& s, const file_details& details, dwarf& dwarf) { - auto lc = read_pod(s); - if (details._needs_byteswap) { +void macho_reader::read_lc_segment_64() { + auto lc = read_pod(_s); + if (_details._needs_byteswap) { endian_swap(lc.cmd); endian_swap(lc.cmdsize); // endian_swap(lc.segname); @@ -69,16 +116,16 @@ void read_lc_segment_64(freader& s, const file_details& details, dwarf& dwarf) { } for (std::size_t i = 0; i < lc.nsects; ++i) { - read_lc_segment_64_section(s, details, dwarf); + read_lc_segment_64_section(); } } /**************************************************************************************************/ -void read_lc_load_dylib(freader& s, const file_details& details, dwarf& dwarf) { - const auto command_start = s.tellg(); - auto lc = read_pod(s); - if (details._needs_byteswap) { +void macho_reader::read_lc_load_dylib() { + const auto command_start = _s.tellg(); + auto lc = read_pod(_s); + if (_details._needs_byteswap) { endian_swap(lc.cmd); endian_swap(lc.cmdsize); endian_swap(lc.dylib.name.offset); @@ -87,52 +134,46 @@ void read_lc_load_dylib(freader& s, const file_details& details, dwarf& dwarf) { endian_swap(lc.dylib.compatibility_version); // sufficient? } - const std::string_view dylib_path = s.read_c_string_view(); - dwarf.register_dylib(std::string(dylib_path)); + _unresolved_dylibs.emplace_back(_s.read_c_string_view()); - const auto padding = lc.cmdsize - (s.tellg() - command_start); - s.seekg(padding, std::ios::cur); + const auto padding = lc.cmdsize - (_s.tellg() - command_start); + _s.seekg(padding, std::ios::cur); } /**************************************************************************************************/ -void read_lc_rpath(freader& s, const file_details& details, dwarf& dwarf) { - const auto command_start = s.tellg(); - auto lc = read_pod(s); - if (details._needs_byteswap) { +void macho_reader::read_lc_rpath() { + const auto command_start = _s.tellg(); + auto lc = read_pod(_s); + if (_details._needs_byteswap) { endian_swap(lc.cmd); endian_swap(lc.cmdsize); endian_swap(lc.path.offset); } - const std::string_view dylib_path = s.read_c_string_view(); - dwarf.register_rpath(std::string(dylib_path)); + _rpaths.emplace_back(_s.read_c_string_view()); - const auto padding = lc.cmdsize - (s.tellg() - command_start); - s.seekg(padding, std::ios::cur); + const auto padding = lc.cmdsize - (_s.tellg() - command_start); + _s.seekg(padding, std::ios::cur); } /**************************************************************************************************/ /* See https://sourceware.org/gdb/current/onlinedocs/stabs.html/ */ -void read_stabs(freader& s, - const file_details& details, - dwarf& dwarf, - std::uint32_t symbol_count, - std::uint32_t string_offset) { - if (!details._is_64_bit) throw std::runtime_error("Need support for non-64-bit STABs."); +void macho_reader::read_stabs(std::uint32_t symbol_count, std::uint32_t string_offset) { + if (!_details._is_64_bit) throw std::runtime_error("Need support for non-64-bit STABs."); std::vector additional_object_files; while (symbol_count--) { - auto entry = read_pod(s); + auto entry = read_pod(_s); if (entry.n_type != N_OSO) continue; // TODO: Comparing the modified file time ensures the object file has not changed // since the application binary was linked. We should compare this time given to // us against the modified time of the file on-disk. // const auto modified_time = entry.n_value; - std::filesystem::path path = temp_seek(s, details._offset + string_offset + entry.n_un.n_strx, [&](){ - return s.read_c_string_view(); + std::filesystem::path path = temp_seek(_s, _details._offset + string_offset + entry.n_un.n_strx, [&](){ + return _s.read_c_string_view(); }); // Some entries have been observed to contain the `.o` file as a parenthetical to the @@ -156,12 +197,12 @@ void read_stabs(freader& s, // std::sort(additional_object_files.begin(), additional_object_files.end()); // auto new_end = std::unique(additional_object_files.begin(), additional_object_files.end()); // additional_object_files.erase(new_end, additional_object_files.end()); - dwarf.register_additional_object_files(std::move(additional_object_files)); + _derived_dependency(std::move(additional_object_files)); } -void read_lc_symtab(freader& s, const file_details& details, dwarf& dwarf) { - auto lc = read_pod(s); - if (details._needs_byteswap) { +void macho_reader::read_lc_symtab() { + auto lc = read_pod(_s); + if (_details._needs_byteswap) { endian_swap(lc.cmd); endian_swap(lc.cmdsize); endian_swap(lc.symoff); @@ -170,24 +211,17 @@ void read_lc_symtab(freader& s, const file_details& details, dwarf& dwarf) { endian_swap(lc.strsize); } - temp_seek(s, details._offset + lc.symoff, [&](){ - read_stabs(s, details, dwarf, lc.nsyms, lc.stroff); + temp_seek(_s, _details._offset + lc.symoff, [&](){ + read_stabs(lc.nsyms, lc.stroff); }); } /**************************************************************************************************/ -struct load_command { - std::uint32_t cmd{0}; - std::uint32_t cmdsize{0}; -}; - -/**************************************************************************************************/ - -void read_load_command(freader& s, const file_details& details, dwarf& dwarf, bool derive_dylib_mode) { - auto command = temp_seek(s, [&] { - auto command = read_pod(s); - if (details._needs_byteswap) { +void macho_reader::read_load_command() { + auto command = temp_seek(_s, [&] { + auto command = read_pod(_s); + if (_details._needs_byteswap) { endian_swap(command.cmd); endian_swap(command.cmdsize); } @@ -196,65 +230,89 @@ void read_load_command(freader& s, const file_details& details, dwarf& dwarf, bo switch (command.cmd) { case LC_SEGMENT_64: { - read_lc_segment_64(s, details, dwarf); + read_lc_segment_64(); } break; case LC_LOAD_DYLIB: { - if (derive_dylib_mode) { - read_lc_load_dylib(s, details, dwarf); + if (_derive_dylib_mode) { + read_lc_load_dylib(); } } break; case LC_RPATH: { - if (derive_dylib_mode) { - read_lc_rpath(s, details, dwarf); + if (_derive_dylib_mode) { + read_lc_rpath(); } } break; case LC_SYMTAB: { - if (derive_dylib_mode) { - read_lc_symtab(s, details, dwarf); + if (_derive_dylib_mode) { + read_lc_symtab(); } } break; default: { - s.seekg(command.cmdsize, std::ios::cur); + _s.seekg(command.cmdsize, std::ios::cur); } break; } } /**************************************************************************************************/ -struct mach_header_64 { - std::uint32_t magic{0}; - cpu_type_t cputype{0}; - cpu_subtype_t cpusubtype{0}; - std::uint32_t filetype{0}; - std::uint32_t ncmds{0}; - std::uint32_t sizeofcmds{0}; - std::uint32_t flags{0}; - std::uint32_t reserved{0}; -}; +std::filesystem::path resolve_dylib(std::string raw_path, + const std::filesystem::path& executable_path, + const std::filesystem::path& loader_path, + const std::vector& rpaths) { + constexpr std::string_view executable_path_k = "@executable_path"; + constexpr std::string_view loader_path_k = "@loader_path"; + constexpr std::string_view rpath_k = "@rpath"; + + if (raw_path.starts_with(executable_path_k)) { + raw_path.replace(0, executable_path_k.size(), executable_path.string()); + } else if (raw_path.starts_with(loader_path_k)) { + raw_path.replace(0, loader_path_k.size(), loader_path.string()); + } else if (raw_path.starts_with(rpath_k)) { + // search rpaths until the desired dylib is actually found. + for (const auto& rpath : rpaths) { + std::string tmp = raw_path; + tmp.replace(0, rpath_k.size(), rpath); + std::filesystem::path candidate = resolve_dylib(tmp, executable_path, loader_path, rpaths); + if (exists(candidate)) { + return candidate; + } + } + throw std::runtime_error("Could not find dependent library: " + raw_path); + } -struct mach_header { - std::uint32_t magic; - cpu_type_t cputype; - cpu_subtype_t cpusubtype; - std::uint32_t filetype; - std::uint32_t ncmds; - std::uint32_t sizeofcmds; - std::uint32_t flags; -}; + return raw_path; +} /**************************************************************************************************/ -dwarf dwarf_from_macho(std::uint32_t ofd_index, - freader&& s, - file_details&& details, - callbacks&& callbacks) { +void macho_reader::derive_dependencies() { + // See https://itwenty.me/posts/01-understanding-rpath/ + // `@executable_path` resolves to the path of the directory containing the executable. + // `@loader_path` resolves to the path of the client doing the loading. + // For executables, `@loader_path` and `@executable_path` mean the same thing. + // TODO: (fosterbrereton) We're going to have to nest this search, aren't we? + // If so, that means we'll need to track the originating file and use it as + // `executable_path`, and then `loader_path` will follow wherever the nesting goes. + + std::filesystem::path executable_path = object_file_ancestry(_ofd_index)._ancestors[0].allocate_path().parent_path(); + std::filesystem::path loader_path = executable_path; + std::vector resolved_dylibs; + std::transform(_unresolved_dylibs.begin(), _unresolved_dylibs.end(), std::back_inserter(resolved_dylibs), [&](const auto& raw_dylib){ + return resolve_dylib(raw_dylib, executable_path, loader_path, _rpaths); + }); + + // Send these back to the main engine for ODR scanning processing. + _derived_dependency(std::move(resolved_dylibs)); +} + +/**************************************************************************************************/ + +void macho_reader::populate_dwarf() { std::size_t load_command_sz{0}; - // TODO: The Mach-O reader really needs its own context/state machine to hold this kind of stuff. - const bool derive_dylib_mode = static_cast(callbacks._derived_dependency); - if (details._is_64_bit) { - auto header = read_pod(s); - if (details._needs_byteswap) { + if (_details._is_64_bit) { + auto header = read_pod(_s); + if (_details._needs_byteswap) { endian_swap(header.magic); endian_swap(header.cputype); endian_swap(header.cpusubtype); @@ -266,8 +324,8 @@ dwarf dwarf_from_macho(std::uint32_t ofd_index, } load_command_sz = header.ncmds; } else { - auto header = read_pod(s); - if (details._needs_byteswap) { + auto header = read_pod(_s); + if (_details._needs_byteswap) { endian_swap(header.magic); endian_swap(header.cputype); endian_swap(header.cpusubtype); @@ -279,17 +337,9 @@ dwarf dwarf_from_macho(std::uint32_t ofd_index, load_command_sz = header.ncmds; } - // REVISIT: (fbrereto) I'm not happy that dwarf is an out-arg to read_load_command. - // Maybe pass in some kind of lambda that'll get called when a relevant DWARF section - // is found? Or maybe `read_load_command` should be a member function of `dwarf`? - // A problem for another time... - dwarf dwarf(ofd_index, copy(s), copy(details), std::move(callbacks)); - for (std::size_t i = 0; i < load_command_sz; ++i) { - read_load_command(s, details, dwarf, derive_dylib_mode); + read_load_command(); } - - return dwarf; } /**************************************************************************************************/ @@ -306,21 +356,14 @@ void read_macho(object_ancestry&& ancestry, callbacks._do_work([_ancestry = std::move(ancestry), _s = std::move(s), _details = std::move(details), _callbacks = callbacks]() mutable { - // TODO: The Mach-O reader really needs its own context/state machine to hold this kind of stuff. - const bool process_die_mode = static_cast(_callbacks._register_die); - const bool derive_dylib_mode = static_cast(_callbacks._derived_dependency); - - if (process_die_mode) { - ++globals::instance()._object_file_count; - } - std::uint32_t ofd_index = static_cast(object_file_register(std::move(_ancestry), copy(_details))); - dwarf dwarf = dwarf_from_macho(ofd_index, std::move(_s), std::move(_details), copy(_callbacks)); + macho_reader macho(ofd_index, std::move(_s), std::move(_details), copy(_callbacks)); - if (process_die_mode) { - dwarf.process_all_dies(); - } else if (derive_dylib_mode) { - dwarf.derive_dependencies(); + if (macho._process_die_mode) { + ++globals::instance()._object_file_count; + macho.dwarf().process_all_dies(); + } else if (macho._derive_dylib_mode) { + macho.derive_dependencies(); } else { // If we're here, Something Bad has happened. std::terminate(); @@ -340,14 +383,30 @@ dwarf dwarf_from_macho(std::uint32_t ofd_index, register_dies_callback&& callbac std::move(callback), }; - return dwarf_from_macho(ofd_index, std::move(s), copy(entry._details), - std::move(callbacks)); + return macho_reader(ofd_index, std::move(s), copy(entry._details), std::move(callbacks)).dwarf(); } /**************************************************************************************************/ std::vector macho_derive_dylibs(const std::filesystem::path& root_binary) { - return std::vector(); + std::vector result; + + if (!exists(root_binary)) { + throw std::runtime_error("file " + root_binary.string() + " does not exist"); + } + + freader input(root_binary); + callbacks callbacks = { + register_dies_callback(), + stlab::immediate_executor, // don't subdivide or reschedule sub-work during this scan. + [&_result = result](std::vector&& p){ + move_append(_result, p); + } + }; + + parse_file(root_binary.string(), object_ancestry(), input, input.size(), std::move(callbacks)); + + return result; } /**************************************************************************************************/ diff --git a/src/orc.cpp b/src/orc.cpp index 247d397..203f0c6 100644 --- a/src/orc.cpp +++ b/src/orc.cpp @@ -454,39 +454,25 @@ die* enforce_odrv_for_die_list(die* base, std::vector& results) { std::vector orc_process(std::vector&& file_list) { std::mutex macho_derived_dependencies_mutex; + std::vector macho_derived_dependencies; - // First stage: dependency/dylib preprocessing + // First stage: (optional) dependency/dylib preprocessing if (settings::instance()._dylib_scan_mode) { // dylib scan mode involves a pre-processing step where we parse the file list // and from those Mach-O files, derive additional files they depend upon. - // Instead, we should be collecting additional files to process in the next - // stage. for (const auto& input_path : file_list) { - do_work([_input_path = input_path, &_mutex = macho_derived_dependencies_mutex, &_list = file_list] { - if (!exists(_input_path)) { - throw std::runtime_error("file " + _input_path.string() + " does not exist"); - } - - freader input(_input_path); - callbacks callbacks = { - register_dies_callback(), - do_work, - [&](std::vector&& p){ - std::lock_guard m(_mutex); - _list.insert(_list.end(), - std::move_iterator(p.begin()), - std::move_iterator(p.end())); - } - }; - - parse_file(_input_path.string(), object_ancestry(), input, input.size(), - std::move(callbacks)); + do_work([_input_path = input_path, &_mutex = macho_derived_dependencies_mutex, &_list = macho_derived_dependencies] { + auto dylibs = macho_derive_dylibs(_input_path); + std::lock_guard m(_mutex); + move_append(_list, dylibs); }); } } work().wait(); + move_append(file_list, macho_derived_dependencies); + // eliminate duplicate object files, if any std::sort(file_list.begin(), file_list.end()); auto new_end = std::unique(file_list.begin(), file_list.end()); From cf3dd79fca2bfce5d8c20a6d6d70d5015584dec5 Mon Sep 17 00:00:00 2001 From: Foster Brereton Date: Thu, 2 May 2024 10:29:37 -0700 Subject: [PATCH 06/25] checked in a typo --- _orc-config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/_orc-config b/_orc-config index 379ed50..d4b122b 100644 --- a/_orc-config +++ b/_orc-config @@ -55,7 +55,7 @@ standalone_mode = false # # The default value is `false`. -dylib_scan_mode = true +dylib_scan_mode = false # If defined, ORC will log output to the specified file. (Normal stream output # is unaffected by the `output_file`.) From 6d0fbad2ebf3a773069ce3ce618aee1e727a86e6 Mon Sep 17 00:00:00 2001 From: Foster Brereton Date: Thu, 2 May 2024 10:53:16 -0700 Subject: [PATCH 07/25] cleanup --- include/orc/dwarf.hpp | 2 +- src/dwarf.cpp | 32 +++++++------------------------- src/macho.cpp | 28 ++++++++++++++++++++++------ src/orc.cpp | 1 + 4 files changed, 31 insertions(+), 32 deletions(-) diff --git a/include/orc/dwarf.hpp b/include/orc/dwarf.hpp index 7eea2c4..694b2fc 100644 --- a/include/orc/dwarf.hpp +++ b/include/orc/dwarf.hpp @@ -21,7 +21,7 @@ struct dwarf { dwarf(std::uint32_t ofd_index, freader&& s, file_details&& details, - callbacks&& callbacks); + register_dies_callback&& callback); void register_section(std::string name, std::size_t offset, std::size_t size); diff --git a/src/dwarf.cpp b/src/dwarf.cpp index 63f1b16..7af00da 100644 --- a/src/dwarf.cpp +++ b/src/dwarf.cpp @@ -401,8 +401,8 @@ struct dwarf::implementation { implementation(std::uint32_t ofd_index, freader&& s, file_details&& details, - callbacks&& callbacks) - : _s(std::move(s)), _details(std::move(details)), _callbacks(std::move(callbacks)), + register_dies_callback&& callback) + : _s(std::move(s)), _details(std::move(details)), _register_dies(std::move(callback)), _ofd_index(ofd_index) {} void register_section(const std::string& name, std::size_t offset, std::size_t size); @@ -459,7 +459,7 @@ struct dwarf::implementation { freader _s; file_details _details; - callbacks _callbacks; + register_dies_callback _register_dies; std::vector _abbreviations; std::vector _path; std::vector _decl_files; @@ -1538,25 +1538,7 @@ void dwarf::implementation::report_die_processing_failure(std::size_t die_absolu } /**************************************************************************************************/ -/* - Not sure where to put this, so it's going here. This is specifically in relation to the - dylib scanning mode, where we're looking at a final linked artifact that enumerates the - dylibs it depends upon. - - Debug builds on macOS do not embed symbol information into the binary by default. - Rather, there are "debug maps" that link from the artifact to the `.o` files used to - make it where the symbol information resides. At the time the application is debugged, - the debug maps are used to derive the symbols of the application by pulling them from - the relevant object files. - - Because of this funky artifact->debug map->object file relationship, ORC must also - support debug maps in order to derive and scan the symbols present in a linked artifact. - This also means the final linked binary is not sufficient for a scan; you _also_ need - its associated object files present, _and_ in the location specified by the debug map. - - Apple's "Lazy" DWARF Scheme: https://wiki.dwarfstd.org/Apple%27s_%22Lazy%22_DWARF_Scheme.md - See: https://stackoverflow.com/a/12827463/153535 -*/ + void dwarf::implementation::process_all_dies() { if (!_ready && !register_sections_done()) return; assert(_ready); @@ -1662,7 +1644,7 @@ void dwarf::implementation::process_all_dies() { dies.shrink_to_fit(); - _callbacks._register_die(std::move(dies)); + _register_dies(std::move(dies)); } /**************************************************************************************************/ @@ -1744,8 +1726,8 @@ die_pair dwarf::implementation::fetch_one_die(std::size_t debug_info_offset, dwarf::dwarf(std::uint32_t ofd_index, freader&& s, file_details&& details, - callbacks&& callbacks) - : _impl(new implementation(ofd_index, std::move(s), std::move(details), std::move(callbacks)), + register_dies_callback&& callback) + : _impl(new implementation(ofd_index, std::move(s), std::move(details), std::move(callback)), [](auto x) { delete x; }) {} void dwarf::register_section(std::string name, std::size_t offset, std::size_t size) { diff --git a/src/macho.cpp b/src/macho.cpp index 49ee57c..574a9bd 100644 --- a/src/macho.cpp +++ b/src/macho.cpp @@ -21,6 +21,7 @@ // application #include "orc/dwarf.hpp" #include "orc/object_file_registry.hpp" +#include "orc/orc.hpp" // for cerr_safe #include "orc/settings.hpp" #include "orc/str.hpp" @@ -40,8 +41,8 @@ struct macho_reader { _ofd_index(ofd_index), _s(std::move(s)), _details(std::move(details)), - _derived_dependency(_derive_dylib_mode ? callbacks._derived_dependency : derived_dependency_callback()), - _dwarf(ofd_index, copy(_s), copy(_details), std::move(callbacks)) { + _derived_dependency(std::move(callbacks._derived_dependency)), + _dwarf(ofd_index, copy(_s), copy(_details), std::move(callbacks._register_die)) { populate_dwarf(); } @@ -159,7 +160,22 @@ void macho_reader::read_lc_rpath() { /**************************************************************************************************/ /* - See https://sourceware.org/gdb/current/onlinedocs/stabs.html/ + This is specifically in relation to the dylib scanning mode, where we're looking at a final + linked artifact that enumerates the dylibs it depends upon. + + Debug builds on macOS do not embed symbol information into the binary by default. Rather, there + are "debug maps" that link from the artifact to the `.o` files used to make it where the symbol + information resides. At the time the application is debugged, the debug maps are used to derive + the symbols of the application by pulling them from the relevant object files. + + Because of this funky artifact->debug map->object file relationship, ORC must also support debug + maps in order to derive and scan the symbols present in a linked artifact. This also means the + final linked binary is not sufficient for a scan; you _also_ need its associated object files + present, _and_ in the location specified by the debug map. + + Apple's "Lazy" DWARF Scheme: https://wiki.dwarfstd.org/Apple%27s_%22Lazy%22_DWARF_Scheme.md + See: https://stackoverflow.com/a/12827463/153535 + See: https://sourceware.org/gdb/current/onlinedocs/stabs.html/ */ void macho_reader::read_stabs(std::uint32_t symbol_count, std::uint32_t string_offset) { if (!_details._is_64_bit) throw std::runtime_error("Need support for non-64-bit STABs."); @@ -389,13 +405,13 @@ dwarf dwarf_from_macho(std::uint32_t ofd_index, register_dies_callback&& callbac /**************************************************************************************************/ std::vector macho_derive_dylibs(const std::filesystem::path& root_binary) { - std::vector result; - if (!exists(root_binary)) { - throw std::runtime_error("file " + root_binary.string() + " does not exist"); + cerr_safe([&](auto& s) { s << "file " << root_binary.string() << " does not exist\n"; }); + return std::vector(); } freader input(root_binary); + std::vector result; callbacks callbacks = { register_dies_callback(), stlab::immediate_executor, // don't subdivide or reschedule sub-work during this scan. diff --git a/src/orc.cpp b/src/orc.cpp index 203f0c6..b07f7dd 100644 --- a/src/orc.cpp +++ b/src/orc.cpp @@ -463,6 +463,7 @@ std::vector orc_process(std::vector&& file_l for (const auto& input_path : file_list) { do_work([_input_path = input_path, &_mutex = macho_derived_dependencies_mutex, &_list = macho_derived_dependencies] { auto dylibs = macho_derive_dylibs(_input_path); + if (dylibs.empty()) return; std::lock_guard m(_mutex); move_append(_list, dylibs); }); From 0e8dee7273a918e3b0dff184a70493d4bcd29f39 Mon Sep 17 00:00:00 2001 From: Foster Brereton Date: Thu, 2 May 2024 10:53:51 -0700 Subject: [PATCH 08/25] cleanup --- include/orc/dwarf.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/orc/dwarf.hpp b/include/orc/dwarf.hpp index 694b2fc..bfea404 100644 --- a/include/orc/dwarf.hpp +++ b/include/orc/dwarf.hpp @@ -25,7 +25,7 @@ struct dwarf { void register_section(std::string name, std::size_t offset, std::size_t size); - void process_all_dies(); // assumes register die mode + void process_all_dies(); die_pair fetch_one_die(std::size_t debug_info_offset, std::size_t cu_address); From 5ccc0a2a0a3f1edb490d9b8b4819ff3eb15e72d1 Mon Sep 17 00:00:00 2001 From: Foster Brereton Date: Thu, 2 May 2024 11:08:11 -0700 Subject: [PATCH 09/25] cleanup --- src/macho.cpp | 16 ++++++++++++++-- src/orc.cpp | 2 +- 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/src/macho.cpp b/src/macho.cpp index 574a9bd..5dafd2b 100644 --- a/src/macho.cpp +++ b/src/macho.cpp @@ -30,7 +30,13 @@ namespace { /**************************************************************************************************/ - +/* + This structure holds state while the Mach-O files are being read. Its goal is to 1) populate its + dwarf field (for die processing within `dwarf.cpp`, or 2) derive dylib dependencies enumerated + in the Mach-O segments of the file being read. The "switch" for which mode the reader is in is + based on the callbacks it is given. Since die scanning and dylib scanning are mutually + exclusive, the callbacks provided determine which path the `macho_reader` should take. +*/ struct macho_reader { macho_reader(std::uint32_t ofd_index, freader&& s, @@ -43,6 +49,10 @@ struct macho_reader { _details(std::move(details)), _derived_dependency(std::move(callbacks._derived_dependency)), _dwarf(ofd_index, copy(_s), copy(_details), std::move(callbacks._register_die)) { + if (_process_die_mode ^ _derive_dylib_mode) { + cerr_safe([&](auto& s) { s << "Exactly one of die or dylib scanning is allowed.\n"; }); + std::terminate(); + } populate_dwarf(); } @@ -246,7 +256,9 @@ void macho_reader::read_load_command() { switch (command.cmd) { case LC_SEGMENT_64: { - read_lc_segment_64(); + if (_process_die_mode) { + read_lc_segment_64(); + } } break; case LC_LOAD_DYLIB: { if (_derive_dylib_mode) { diff --git a/src/orc.cpp b/src/orc.cpp index b07f7dd..d915bcf 100644 --- a/src/orc.cpp +++ b/src/orc.cpp @@ -459,7 +459,7 @@ std::vector orc_process(std::vector&& file_l // First stage: (optional) dependency/dylib preprocessing if (settings::instance()._dylib_scan_mode) { // dylib scan mode involves a pre-processing step where we parse the file list - // and from those Mach-O files, derive additional files they depend upon. + // and discover any dylibs those Mach-O files depend upon. for (const auto& input_path : file_list) { do_work([_input_path = input_path, &_mutex = macho_derived_dependencies_mutex, &_list = macho_derived_dependencies] { auto dylibs = macho_derive_dylibs(_input_path); From c516b87f430f307d35c7502bd03a83e38a9089c1 Mon Sep 17 00:00:00 2001 From: Foster Brereton Date: Thu, 2 May 2024 11:13:28 -0700 Subject: [PATCH 10/25] cleanup --- src/dwarf.cpp | 12 ++++--- src/macho.cpp | 63 +++++++++++++++++----------------- src/main.cpp | 93 +++++++++++++++++++++------------------------------ 3 files changed, 78 insertions(+), 90 deletions(-) diff --git a/src/dwarf.cpp b/src/dwarf.cpp index 7af00da..6f5c9ed 100644 --- a/src/dwarf.cpp +++ b/src/dwarf.cpp @@ -1200,7 +1200,8 @@ attribute_value dwarf::implementation::process_form(const attribute& attr, const auto handle_passover = [&]() { if (fatal_attribute(attr._name)) { - throw std::runtime_error(std::string("Passing over an essential attribute (") + to_string(attr._name) + ")"); + throw std::runtime_error(std::string("Passing over an essential attribute (") + + to_string(attr._name) + ")"); } result.passover(); auto size = form_length(attr._form, _s); @@ -1519,9 +1520,11 @@ bool dwarf::implementation::skip_die(die& d, const attribute_sequence& attribute /**************************************************************************************************/ -void dwarf::implementation::report_die_processing_failure(std::size_t die_absolute_offset, std::string&& error) { +void dwarf::implementation::report_die_processing_failure(std::size_t die_absolute_offset, + std::string&& error) { if (log_level_at_least(settings::log_level::warning)) { - const auto debug_info_offset = static_cast(die_absolute_offset - _debug_info._offset); + const auto debug_info_offset = + static_cast(die_absolute_offset - _debug_info._offset); cerr_safe([&](auto& s) { s << "warning: failed to process die\n" @@ -1567,7 +1570,8 @@ void dwarf::implementation::process_all_dies() { attribute_sequence attributes; try { - std::tie(die, attributes) = abbreviation_to_die(die_absolute_offset, process_mode::complete); + std::tie(die, attributes) = + abbreviation_to_die(die_absolute_offset, process_mode::complete); } catch (const std::exception& error) { // `report_die_processing_failure` will rethrow report_die_processing_failure(die_absolute_offset, error.what()); diff --git a/src/macho.cpp b/src/macho.cpp index 5dafd2b..d7e45d5 100644 --- a/src/macho.cpp +++ b/src/macho.cpp @@ -41,14 +41,12 @@ struct macho_reader { macho_reader(std::uint32_t ofd_index, freader&& s, file_details&& details, - callbacks&& callbacks) : - _process_die_mode(static_cast(callbacks._register_die)), - _derive_dylib_mode(static_cast(callbacks._derived_dependency)), - _ofd_index(ofd_index), - _s(std::move(s)), - _details(std::move(details)), - _derived_dependency(std::move(callbacks._derived_dependency)), - _dwarf(ofd_index, copy(_s), copy(_details), std::move(callbacks._register_die)) { + callbacks&& callbacks) + : _process_die_mode(static_cast(callbacks._register_die)), + _derive_dylib_mode(static_cast(callbacks._derived_dependency)), + _ofd_index(ofd_index), _s(std::move(s)), _details(std::move(details)), + _derived_dependency(std::move(callbacks._derived_dependency)), + _dwarf(ofd_index, copy(_s), copy(_details), std::move(callbacks._register_die)) { if (_process_die_mode ^ _derive_dylib_mode) { cerr_safe([&](auto& s) { s << "Exactly one of die or dylib scanning is allowed.\n"; }); std::terminate(); @@ -56,8 +54,12 @@ struct macho_reader { populate_dwarf(); } - struct dwarf& dwarf() & { return _dwarf; } - struct dwarf&& dwarf() && { return std::move(_dwarf); } + struct dwarf& dwarf() & { + return _dwarf; + } + struct dwarf&& dwarf() && { + return std::move(_dwarf); + } void derive_dependencies(); @@ -141,7 +143,7 @@ void macho_reader::read_lc_load_dylib() { endian_swap(lc.cmdsize); endian_swap(lc.dylib.name.offset); endian_swap(lc.dylib.timestamp); - endian_swap(lc.dylib.current_version); // sufficient? + endian_swap(lc.dylib.current_version); // sufficient? endian_swap(lc.dylib.compatibility_version); // sufficient? } @@ -198,9 +200,9 @@ void macho_reader::read_stabs(std::uint32_t symbol_count, std::uint32_t string_o // us against the modified time of the file on-disk. // const auto modified_time = entry.n_value; - std::filesystem::path path = temp_seek(_s, _details._offset + string_offset + entry.n_un.n_strx, [&](){ - return _s.read_c_string_view(); - }); + std::filesystem::path path = + temp_seek(_s, _details._offset + string_offset + entry.n_un.n_strx, + [&]() { return _s.read_c_string_view(); }); // Some entries have been observed to contain the `.o` file as a parenthetical to the // `.a` file that contains it. e.g., `/path/to/bar.a(foo.o)`. For our purposes we'll @@ -237,9 +239,7 @@ void macho_reader::read_lc_symtab() { endian_swap(lc.strsize); } - temp_seek(_s, _details._offset + lc.symoff, [&](){ - read_stabs(lc.nsyms, lc.stroff); - }); + temp_seek(_s, _details._offset + lc.symoff, [&]() { read_stabs(lc.nsyms, lc.stroff); }); } /**************************************************************************************************/ @@ -300,7 +300,8 @@ std::filesystem::path resolve_dylib(std::string raw_path, for (const auto& rpath : rpaths) { std::string tmp = raw_path; tmp.replace(0, rpath_k.size(), rpath); - std::filesystem::path candidate = resolve_dylib(tmp, executable_path, loader_path, rpaths); + std::filesystem::path candidate = + resolve_dylib(tmp, executable_path, loader_path, rpaths); if (exists(candidate)) { return candidate; } @@ -322,12 +323,14 @@ void macho_reader::derive_dependencies() { // If so, that means we'll need to track the originating file and use it as // `executable_path`, and then `loader_path` will follow wherever the nesting goes. - std::filesystem::path executable_path = object_file_ancestry(_ofd_index)._ancestors[0].allocate_path().parent_path(); + std::filesystem::path executable_path = + object_file_ancestry(_ofd_index)._ancestors[0].allocate_path().parent_path(); std::filesystem::path loader_path = executable_path; std::vector resolved_dylibs; - std::transform(_unresolved_dylibs.begin(), _unresolved_dylibs.end(), std::back_inserter(resolved_dylibs), [&](const auto& raw_dylib){ - return resolve_dylib(raw_dylib, executable_path, loader_path, _rpaths); - }); + std::transform(_unresolved_dylibs.begin(), _unresolved_dylibs.end(), + std::back_inserter(resolved_dylibs), [&](const auto& raw_dylib) { + return resolve_dylib(raw_dylib, executable_path, loader_path, _rpaths); + }); // Send these back to the main engine for ODR scanning processing. _derived_dependency(std::move(resolved_dylibs)); @@ -382,9 +385,9 @@ void read_macho(object_ancestry&& ancestry, file_details details, callbacks callbacks) { callbacks._do_work([_ancestry = std::move(ancestry), _s = std::move(s), - _details = std::move(details), - _callbacks = callbacks]() mutable { - std::uint32_t ofd_index = static_cast(object_file_register(std::move(_ancestry), copy(_details))); + _details = std::move(details), _callbacks = callbacks]() mutable { + std::uint32_t ofd_index = + static_cast(object_file_register(std::move(_ancestry), copy(_details))); macho_reader macho(ofd_index, std::move(_s), std::move(_details), copy(_callbacks)); if (macho._process_die_mode) { @@ -407,11 +410,12 @@ dwarf dwarf_from_macho(std::uint32_t ofd_index, register_dies_callback&& callbac s.seekg(entry._details._offset); - callbacks callbacks { + callbacks callbacks{ std::move(callback), }; - return macho_reader(ofd_index, std::move(s), copy(entry._details), std::move(callbacks)).dwarf(); + return macho_reader(ofd_index, std::move(s), copy(entry._details), std::move(callbacks)) + .dwarf(); } /**************************************************************************************************/ @@ -427,10 +431,7 @@ std::vector macho_derive_dylibs(const std::filesystem::pa callbacks callbacks = { register_dies_callback(), stlab::immediate_executor, // don't subdivide or reschedule sub-work during this scan. - [&_result = result](std::vector&& p){ - move_append(_result, p); - } - }; + [&_result = result](std::vector&& p) { move_append(_result, p); }}; parse_file(root_binary.string(), object_ancestry(), input, input.size(), std::move(callbacks)); diff --git a/src/main.cpp b/src/main.cpp index 8b8a504..d8c9a12 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -11,8 +11,8 @@ #include #include #include -#include #include +#include #include #include #include @@ -122,7 +122,8 @@ void process_orc_config_file(const char* bin_path_string) { app_settings._dylib_scan_mode = settings["dylib_scan_mode"].value_or(false); app_settings._parallel_processing = settings["parallel_processing"].value_or(true); app_settings._filter_redundant = settings["filter_redundant"].value_or(true); - app_settings._print_object_file_list = settings["print_object_file_list"].value_or(false); + app_settings._print_object_file_list = + settings["print_object_file_list"].value_or(false); app_settings._relative_output_file = settings["relative_output_file"].value_or(""); app_settings._resource_metrics = settings["resource_metrics"].value_or(false); @@ -144,14 +145,14 @@ void process_orc_config_file(const char* bin_path_string) { } else { // not a known value. Switch to verbose! app_settings._log_level = settings::log_level::verbose; - cout_safe([&](auto& s){ - s << "warning: Unknown log level (using verbose)\n"; - }); + cout_safe( + [&](auto& s) { s << "warning: Unknown log level (using verbose)\n"; }); } } if (app_settings._standalone_mode && app_settings._dylib_scan_mode) { - throw std::logic_error("Both standalone and dylib scanning mode are enabled. Pick one."); + throw std::logic_error( + "Both standalone and dylib scanning mode are enabled. Pick one."); } if (app_settings._dylib_scan_mode) { @@ -178,7 +179,7 @@ void process_orc_config_file(const char* bin_path_string) { if (!app_settings._violation_report.empty() && !app_settings._violation_ignore.empty()) { if (log_level_at_least(settings::log_level::warning)) { - cout_safe([&](auto& s){ + cout_safe([&](auto& s) { s << "warning: Both `violation_report` and `violation_ignore` lists found\n"; s << "warning: `violation_report` will be ignored in favor of `violation_ignore`\n"; }); @@ -187,20 +188,16 @@ void process_orc_config_file(const char* bin_path_string) { if (log_level_at_least(settings::log_level::info)) { - cout_safe([&](auto& s){ - s << "info: ORC config file: " << config_path.string() << "\n"; + cout_safe([&](auto& s) { + s << "info: ORC config file: " << config_path.string() << "\n"; }); } } catch (const toml::parse_error& err) { - cerr_safe([&](auto& s) { - s << "Parsing failed:\n" << err << "\n"; - }); + cerr_safe([&](auto& s) { s << "Parsing failed:\n" << err << "\n"; }); throw std::runtime_error("configuration parsing error"); } } else { - cerr_safe([&](auto& s){ - s << "ORC config file: not found\n"; - }); + cerr_safe([&](auto& s) { s << "ORC config file: not found\n"; }); } } @@ -254,11 +251,10 @@ struct cmdline_results { /**************************************************************************************************/ cmdline_results process_command_line(int argc, char** argv) { - cmdline_results result; if (log_level_at_least(settings::log_level::verbose)) { - cout_safe([&](auto& s){ + cout_safe([&](auto& s) { s << "verbose: arguments:\n"; for (std::size_t i{0}; i < argc; ++i) { s << " " << argv[i] << '\n'; @@ -281,7 +277,7 @@ cmdline_results process_command_line(int argc, char** argv) { std::string_view arg = argv[i]; if (arg == "-o" || arg == "--output") { std::string filename(argv[++i]); - + if (!settings::instance()._relative_output_file.empty()) { open_output_file(filename, settings::instance()._relative_output_file); } @@ -293,16 +289,13 @@ cmdline_results process_command_line(int argc, char** argv) { if (filename.ends_with(".a")) { result._libtool_mode = true; if (log_level_at_least(settings::log_level::verbose)) { - cout_safe([&](auto& s){ - s << "verbose: mode: libtool (by filename)\n"; - }); + cout_safe( + [&](auto& s) { s << "verbose: mode: libtool (by filename)\n"; }); } } else { result._ld_mode = true; if (log_level_at_least(settings::log_level::verbose)) { - cout_safe([&](auto& s){ - s << "verbose: mode: ld (by filename)\n"; - }); + cout_safe([&](auto& s) { s << "verbose: mode: ld (by filename)\n"; }); } } } @@ -316,17 +309,13 @@ cmdline_results process_command_line(int argc, char** argv) { result._libtool_mode = true; assert(!result._ld_mode); if (log_level_at_least(settings::log_level::verbose)) { - cout_safe([&](auto& s){ - s << "verbose: mode: libtool (static)\n"; - }); + cout_safe([&](auto& s) { s << "verbose: mode: libtool (static)\n"; }); } } else if (arg == "-target") { result._ld_mode = true; assert(!result._libtool_mode); if (log_level_at_least(settings::log_level::verbose)) { - cout_safe([&](auto& s){ - s << "verbose: mode: ld (target)\n"; - }); + cout_safe([&](auto& s) { s << "verbose: mode: ld (target)\n"; }); } } else if (arg == "-lc++") { // ignore the C++ standard library @@ -389,9 +378,9 @@ auto epilogue(bool exception) { << " " << g._odrv_count << " ODRV(s) reported\n" << " " << g._object_file_count << " object file(s) processed\n" << " " << g._die_processed_count << " dies processed\n" - << " " << g._die_skipped_count << " dies skipped (" << format_pct(g._die_skipped_count, g._die_processed_count) << ")\n" - << " " << g._unique_symbol_count << " unique symbols\n" - ; + << " " << g._die_skipped_count << " dies skipped (" + << format_pct(g._die_skipped_count, g._die_processed_count) << ")\n" + << " " << g._unique_symbol_count << " unique symbols\n"; }); } @@ -400,22 +389,24 @@ auto epilogue(bool exception) { const auto total_pool_size = std::accumulate(pool_sizes.begin(), pool_sizes.end(), 0ull); const auto pool_wasted = string_pool_wasted(); const auto total_pool_wasted = std::accumulate(pool_wasted.begin(), pool_wasted.end(), 0); - const auto die_memory_footprint((g._die_processed_count - g._die_skipped_count) * sizeof(die)); + const auto die_memory_footprint((g._die_processed_count - g._die_skipped_count) * + sizeof(die)); cout_safe([&](auto& s) { s << "Resource metrics:\n" << " String pool size / waste:\n"; for (std::size_t i(0); i < string_pool_count_k; ++i) { - s << " " << i << ": " << format_size(pool_sizes[i]) << " (" << pool_sizes[i] << ") / " - << format_size(pool_wasted[i]) << " (" << pool_wasted[i] << ") / " + s << " " << i << ": " << format_size(pool_sizes[i]) << " (" << pool_sizes[i] + << ") / " << format_size(pool_wasted[i]) << " (" << pool_wasted[i] << ") / " << std::fixed << format_pct(pool_wasted[i], pool_sizes[i]) << "\n"; } s << " totals: " << format_size(total_pool_size) << " (" << total_pool_size << ") / " << format_size(total_pool_wasted) << " (" << total_pool_wasted << ") / " << format_pct(total_pool_wasted, total_pool_size) << "\n"; - s << " die footprint: " << format_size(die_memory_footprint) << " (" << die_memory_footprint << ") \n"; + s << " die footprint: " << format_size(die_memory_footprint) << " (" + << die_memory_footprint << ") \n"; }); } @@ -461,7 +452,7 @@ void maybe_forward_to_linker(int argc, char** argv, const cmdline_results& cmdli executable_path /= "libtool"; } else { if (log_level_at_least(settings::log_level::warning)) { - cout_safe([&](auto& s){ + cout_safe([&](auto& s) { s << "warning: libtool/ld mode could not be derived; forwarding to linker disabled\n"; }); } @@ -472,9 +463,8 @@ void maybe_forward_to_linker(int argc, char** argv, const cmdline_results& cmdli if (!exists(executable_path)) { throw std::runtime_error("Could not forward to linker: " + executable_path.string()); } else if (log_level_at_least(settings::log_level::verbose)) { - cout_safe([&](auto& s){ - s << "verbose: forwarding to " + executable_path.string() + "\n"; - }); + cout_safe( + [&](auto& s) { s << "verbose: forwarding to " + executable_path.string() + "\n"; }); } std::string arguments = executable_path.string(); @@ -486,9 +476,7 @@ void maybe_forward_to_linker(int argc, char** argv, const cmdline_results& cmdli // REVISIT: (fbrereto) We may need to capture/forward stderr here as well. // Also, if the execution of the linker ended in a failure, we need to bubble // that up immediately, and preempt ORC processing. - cout_safe([&](auto& s){ - s << exec(arguments.c_str()); - }); + cout_safe([&](auto& s) { s << exec(arguments.c_str()); }); } /**************************************************************************************************/ @@ -508,9 +496,7 @@ int main(int argc, char** argv) try { if (settings::instance()._print_object_file_list) { for (const auto& input_path : cmdline._file_object_list) { - cout_safe([&](auto& s){ - s << input_path.string() << '\n'; - }); + cout_safe([&](auto& s) { s << input_path.string() << '\n'; }); } return EXIT_SUCCESS; @@ -541,21 +527,18 @@ int main(int argc, char** argv) try { assert(globals::instance()._odrv_count == violations.size()); for (const auto& report : violations) { - cout_safe([&](auto& s){ - s << report; // important to NOT add the '\n', because lots of reports are empty, and it creates a lot of blank lines + cout_safe([&](auto& s) { + s << report; // important to NOT add the '\n', because lots of reports are empty, and it + // creates a lot of blank lines }); } return epilogue(false); } catch (const std::exception& error) { - cerr_safe([&](auto& s) { - s << "Fatal error: " << error.what() << '\n'; - }); + cerr_safe([&](auto& s) { s << "Fatal error: " << error.what() << '\n'; }); return epilogue(true); } catch (...) { - cerr_safe([&](auto& s) { - s << "Fatal error: unknown\n"; - }); + cerr_safe([&](auto& s) { s << "Fatal error: unknown\n"; }); return epilogue(false); } From 4b6a52f2d6402c585b382467c83128ba587ae5a5 Mon Sep 17 00:00:00 2001 From: Foster Brereton Date: Thu, 2 May 2024 11:14:55 -0700 Subject: [PATCH 11/25] reverting a change --- src/dwarf.cpp | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/src/dwarf.cpp b/src/dwarf.cpp index 6f5c9ed..7af00da 100644 --- a/src/dwarf.cpp +++ b/src/dwarf.cpp @@ -1200,8 +1200,7 @@ attribute_value dwarf::implementation::process_form(const attribute& attr, const auto handle_passover = [&]() { if (fatal_attribute(attr._name)) { - throw std::runtime_error(std::string("Passing over an essential attribute (") + - to_string(attr._name) + ")"); + throw std::runtime_error(std::string("Passing over an essential attribute (") + to_string(attr._name) + ")"); } result.passover(); auto size = form_length(attr._form, _s); @@ -1520,11 +1519,9 @@ bool dwarf::implementation::skip_die(die& d, const attribute_sequence& attribute /**************************************************************************************************/ -void dwarf::implementation::report_die_processing_failure(std::size_t die_absolute_offset, - std::string&& error) { +void dwarf::implementation::report_die_processing_failure(std::size_t die_absolute_offset, std::string&& error) { if (log_level_at_least(settings::log_level::warning)) { - const auto debug_info_offset = - static_cast(die_absolute_offset - _debug_info._offset); + const auto debug_info_offset = static_cast(die_absolute_offset - _debug_info._offset); cerr_safe([&](auto& s) { s << "warning: failed to process die\n" @@ -1570,8 +1567,7 @@ void dwarf::implementation::process_all_dies() { attribute_sequence attributes; try { - std::tie(die, attributes) = - abbreviation_to_die(die_absolute_offset, process_mode::complete); + std::tie(die, attributes) = abbreviation_to_die(die_absolute_offset, process_mode::complete); } catch (const std::exception& error) { // `report_die_processing_failure` will rethrow report_die_processing_failure(die_absolute_offset, error.what()); From 9d3aa3341118c0e63015a7cf5d6f676d7ee9f63c Mon Sep 17 00:00:00 2001 From: Foster Brereton Date: Thu, 2 May 2024 11:22:11 -0700 Subject: [PATCH 12/25] cleanup --- src/orc.cpp | 27 +++++++++++++++------------ 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/src/orc.cpp b/src/orc.cpp index d915bcf..3418d48 100644 --- a/src/orc.cpp +++ b/src/orc.cpp @@ -284,7 +284,11 @@ void do_work(std::function f) { try { _f(); } catch (...) { - assert(!"The program is ill-structured. Catch exceptions before they hit here."); + // I changed my opinion on this: an unhandled background task exception should terminate + // the application. This mimics the behavior of an unhandled exception on the main + // thread. Now (like main thread exceptions) background task exceptions must be + // handled before they hit this point. + assert(!"unhandled background task exception"); std::terminate(); } }; @@ -298,8 +302,9 @@ void do_work(std::function f) { system([_work_token = work().working(), _doit = std::move(doit)] { #if ORC_FEATURE(TRACY) - thread_local bool tracy_set_thread_name_k = []{ - TracyCSetThreadName(orc::tracy::format_unique("worker %s", orc::tracy::unique_thread_name())); + thread_local bool tracy_set_thread_name_k = [] { + TracyCSetThreadName( + orc::tracy::format_unique("worker %s", orc::tracy::unique_thread_name())); return true; }(); (void)tracy_set_thread_name_k; @@ -443,8 +448,8 @@ die* enforce_odrv_for_die_list(die* base, std::vector& results) { static TracyLockable(std::mutex, odrv_report_mutex); { - std::lock_guard lock(odrv_report_mutex); - results.push_back(report); + std::lock_guard lock(odrv_report_mutex); + results.push_back(report); } return dies.front(); @@ -461,7 +466,8 @@ std::vector orc_process(std::vector&& file_l // dylib scan mode involves a pre-processing step where we parse the file list // and discover any dylibs those Mach-O files depend upon. for (const auto& input_path : file_list) { - do_work([_input_path = input_path, &_mutex = macho_derived_dependencies_mutex, &_list = macho_derived_dependencies] { + do_work([_input_path = input_path, &_mutex = macho_derived_dependencies_mutex, + &_list = macho_derived_dependencies] { auto dylibs = macho_derive_dylibs(_input_path); if (dylibs.empty()) return; std::lock_guard m(_mutex); @@ -483,16 +489,13 @@ std::vector orc_process(std::vector&& file_l for (const auto& input_path : file_list) { do_work([_input_path = input_path] { if (!exists(_input_path)) { - cerr_safe([&](auto& s) { s << "file " << _input_path.string() << " does not exist\n"; }); + cerr_safe( + [&](auto& s) { s << "file " << _input_path.string() << " does not exist\n"; }); return; } freader input(_input_path); - callbacks callbacks = { - register_dies, - do_work, - derived_dependency_callback(), - }; + callbacks callbacks = {register_dies, do_work}; parse_file(_input_path.string(), object_ancestry(), input, input.size(), std::move(callbacks)); From f365a0c62cb8386706723e7d6b60469afc3cb0c8 Mon Sep 17 00:00:00 2001 From: Foster Brereton Date: Thu, 2 May 2024 16:43:12 -0700 Subject: [PATCH 13/25] more improvements and cleaning up the code so I can do recursive dylib scanning --- include/orc/async.hpp | 30 +++++++ include/orc/macho.hpp | 5 +- include/orc/parse_file.hpp | 1 - src/async.cpp | 171 +++++++++++++++++++++++++++++++++++++ src/macho.cpp | 142 ++++++++++++++++++++++-------- src/main.cpp | 11 ++- src/orc.cpp | 144 +++---------------------------- 7 files changed, 329 insertions(+), 175 deletions(-) create mode 100644 include/orc/async.hpp create mode 100644 src/async.cpp diff --git a/include/orc/async.hpp b/include/orc/async.hpp new file mode 100644 index 0000000..b77e5cc --- /dev/null +++ b/include/orc/async.hpp @@ -0,0 +1,30 @@ +// Copyright 2024 Adobe +// All Rights Reserved. +// +// NOTICE: Adobe permits you to use, modify, and distribute this file in accordance with the terms +// of the Adobe license agreement accompanying it. + +#pragma once + +#include + +//====================================================================================================================== + +namespace orc { + +//====================================================================================================================== + +// Enqueue a task for (possibly asynchronous) execution. If the `parallel_processing` setting in the +// ORC config file is true, the task will be enqueued for processing on a background thread pool. +// Otherwise, the task will be executed immediately in the current thread. +void do_work(std::function); + +// blocks the calling thread until all enqueued work items have completed. If the +// `parallel_processing` setting in the ORC config file is `false`, this will return immediately. +void block_on_work(); + +//====================================================================================================================== + +} // namespace orc + +//====================================================================================================================== diff --git a/include/orc/macho.hpp b/include/orc/macho.hpp index ad70883..2919752 100644 --- a/include/orc/macho.hpp +++ b/include/orc/macho.hpp @@ -15,10 +15,11 @@ /**************************************************************************************************/ template -void move_append(C& dst, C& src) { +void move_append(C& dst, C&& src) { dst.insert(dst.end(), std::move_iterator(src.begin()), std::move_iterator(src.end())); + src.clear(); } /**************************************************************************************************/ @@ -35,6 +36,6 @@ struct dwarf dwarf_from_macho(std::uint32_t ofd_index, register_dies_callback&& /**************************************************************************************************/ -std::vector macho_derive_dylibs(const std::filesystem::path& root_binary); +std::vector macho_derive_dylibs(const std::vector& root_binaries); /**************************************************************************************************/ diff --git a/include/orc/parse_file.hpp b/include/orc/parse_file.hpp index 882507a..f35fe2b 100644 --- a/include/orc/parse_file.hpp +++ b/include/orc/parse_file.hpp @@ -232,7 +232,6 @@ using derived_dependency_callback = std::function +//#include +//#include +//#include +//#include +//#include +//#include +//#include +//#include +//#include +// +//// stlab +//#include +//#include +//#include +//#include +// +//// toml++ +//#include +// +//// tbb +//#include +// +//// application +#include "orc/settings.hpp" +#include "orc/task_system.hpp" +#include "orc/tracy.hpp" + +/**************************************************************************************************/ + +namespace orc { + +/**************************************************************************************************/ + +namespace { + +/**************************************************************************************************/ + +struct work_counter { + struct state { + void increment() { + { + std::lock_guard lock(_m); + ++_n; + } + _c.notify_all(); + } + + void decrement() { + { + std::lock_guard lock(_m); + --_n; + } + _c.notify_all(); + } + + void wait() { + std::unique_lock lock(_m); + if (_n == 0) return; + _c.wait(lock, [&] { return _n == 0; }); + } + + std::mutex _m; + std::condition_variable _c; + std::size_t _n{0}; + }; + + using shared_state = std::shared_ptr; + + friend struct token; + +public: + work_counter() : _impl{std::make_shared()} {} + + struct token { + token(shared_state w) : _w(std::move(w)) { _w->increment(); } + token(const token& t) : _w{t._w} { _w->increment(); } + token(token&& t) = default; + ~token() { + if (_w) _w->decrement(); + } + + private: + shared_state _w; + }; + + auto working() { return token(_impl); } + + void wait() { _impl->wait(); } + +private: + shared_state _impl; + + void increment() { _impl->increment(); } + void decrement() { _impl->decrement(); } +}; + +/**************************************************************************************************/ + +auto& work() { + static work_counter _work; + return _work; +} + +/**************************************************************************************************/ + +} // namespace + +/**************************************************************************************************/ + +void do_work(std::function f) { + auto doit = [_f = std::move(f)]() { + // I changed my opinion on this: an unhandled background task exception should terminate + // the application. This mimics the behavior of an unhandled exception on the main + // thread. Now (like main thread exceptions) background task exceptions must be + // handled before they hit this point. + try { + _f(); + } catch (const std::exception& error) { + const char* what = error.what(); + (void)what; // so you can see it in the debugger. + assert(!"unhandled background task exception"); + std::terminate(); + } catch (...) { + assert(!"unknown unhandled background task exception"); + std::terminate(); + } + }; + + if (!settings::instance()._parallel_processing) { + doit(); + return; + } + + static orc::task_system system; + + system([_work_token = work().working(), _doit = std::move(doit)] { +#if ORC_FEATURE(TRACY) + thread_local bool tracy_set_thread_name_k = [] { + TracyCSetThreadName( + orc::tracy::format_unique("worker %s", orc::tracy::unique_thread_name())); + return true; + }(); + (void)tracy_set_thread_name_k; +#endif // ORC_FEATURE(TRACY) + _doit(); + }); +} + +/**************************************************************************************************/ +// It would be groovy if this routine could do some of the work, too, while it is waiting for the +// pool to finish up. +void block_on_work() { + work().wait(); +} + +/**************************************************************************************************/ + +} // namespace orc + +/**************************************************************************************************/ diff --git a/src/macho.cpp b/src/macho.cpp index d7e45d5..2cc3314 100644 --- a/src/macho.cpp +++ b/src/macho.cpp @@ -19,6 +19,7 @@ #include // application +#include "orc/async.hpp" #include "orc/dwarf.hpp" #include "orc/object_file_registry.hpp" #include "orc/orc.hpp" // for cerr_safe @@ -35,20 +36,23 @@ namespace { dwarf field (for die processing within `dwarf.cpp`, or 2) derive dylib dependencies enumerated in the Mach-O segments of the file being read. The "switch" for which mode the reader is in is based on the callbacks it is given. Since die scanning and dylib scanning are mutually - exclusive, the callbacks provided determine which path the `macho_reader` should take. + exclusive, the callbacks provided determine which path the `macho_reader` should take. There is + actually a third mode, which is during the ODRV reporting. In that mode we are neither scanning + for dylibs nor DIEs, but are gathering more details about DIEs that we need to report on issues. + In that case, both `_register_die_mode` and `_derive_dylib_mode` will be false. */ struct macho_reader { macho_reader(std::uint32_t ofd_index, freader&& s, file_details&& details, callbacks&& callbacks) - : _process_die_mode(static_cast(callbacks._register_die)), + : _register_die_mode(static_cast(callbacks._register_die)), _derive_dylib_mode(static_cast(callbacks._derived_dependency)), _ofd_index(ofd_index), _s(std::move(s)), _details(std::move(details)), _derived_dependency(std::move(callbacks._derived_dependency)), _dwarf(ofd_index, copy(_s), copy(_details), std::move(callbacks._register_die)) { - if (_process_die_mode ^ _derive_dylib_mode) { - cerr_safe([&](auto& s) { s << "Exactly one of die or dylib scanning is allowed.\n"; }); + if (_register_die_mode && _derive_dylib_mode) { + cerr_safe([&](auto& s) { s << "Only one of die or dylib scanning is allowed.\n"; }); std::terminate(); } populate_dwarf(); @@ -63,7 +67,7 @@ struct macho_reader { void derive_dependencies(); - const bool _process_die_mode{false}; + const bool _register_die_mode{false}; const bool _derive_dylib_mode{false}; private: @@ -190,18 +194,28 @@ void macho_reader::read_lc_rpath() { See: https://sourceware.org/gdb/current/onlinedocs/stabs.html/ */ void macho_reader::read_stabs(std::uint32_t symbol_count, std::uint32_t string_offset) { - if (!_details._is_64_bit) throw std::runtime_error("Need support for non-64-bit STABs."); std::vector additional_object_files; + while (symbol_count--) { - auto entry = read_pod(_s); - if (entry.n_type != N_OSO) continue; + std::uint32_t entry_string_offset{0}; + + if (_details._is_64_bit) { + auto entry = read_pod(_s); + if (entry.n_type != N_OSO) continue; + entry_string_offset = entry.n_un.n_strx; + } else { + auto entry = read_pod(_s); + if (entry.n_type != N_OSO) continue; + entry_string_offset = entry.n_un.n_strx; + } + // TODO: Comparing the modified file time ensures the object file has not changed // since the application binary was linked. We should compare this time given to // us against the modified time of the file on-disk. // const auto modified_time = entry.n_value; std::filesystem::path path = - temp_seek(_s, _details._offset + string_offset + entry.n_un.n_strx, + temp_seek(_s, _details._offset + string_offset + entry_string_offset, [&]() { return _s.read_c_string_view(); }); // Some entries have been observed to contain the `.o` file as a parenthetical to the @@ -219,12 +233,7 @@ void macho_reader::read_stabs(std::uint32_t symbol_count, std::uint32_t string_o additional_object_files.push_back(std::move(path)); } - // don't think I need these here, as we should sort/make unique the total set - // of additional object files within `orc_process`. Saving until I'm sure. - // - // std::sort(additional_object_files.begin(), additional_object_files.end()); - // auto new_end = std::unique(additional_object_files.begin(), additional_object_files.end()); - // additional_object_files.erase(new_end, additional_object_files.end()); + _derived_dependency(std::move(additional_object_files)); } @@ -256,9 +265,7 @@ void macho_reader::read_load_command() { switch (command.cmd) { case LC_SEGMENT_64: { - if (_process_die_mode) { - read_lc_segment_64(); - } + read_lc_segment_64(); } break; case LC_LOAD_DYLIB: { if (_derive_dylib_mode) { @@ -283,10 +290,10 @@ void macho_reader::read_load_command() { /**************************************************************************************************/ -std::filesystem::path resolve_dylib(std::string raw_path, - const std::filesystem::path& executable_path, - const std::filesystem::path& loader_path, - const std::vector& rpaths) { +std::optional resolve_dylib(std::string raw_path, + const std::filesystem::path& executable_path, + const std::filesystem::path& loader_path, + const std::vector& rpaths) { constexpr std::string_view executable_path_k = "@executable_path"; constexpr std::string_view loader_path_k = "@loader_path"; constexpr std::string_view rpath_k = "@rpath"; @@ -300,13 +307,15 @@ std::filesystem::path resolve_dylib(std::string raw_path, for (const auto& rpath : rpaths) { std::string tmp = raw_path; tmp.replace(0, rpath_k.size(), rpath); - std::filesystem::path candidate = + std::optional candidate = resolve_dylib(tmp, executable_path, loader_path, rpaths); - if (exists(candidate)) { + if (candidate && exists(*candidate)) { return candidate; } } - throw std::runtime_error("Could not find dependent library: " + raw_path); + + cerr_safe([&](auto& s){ s << "Could not find dependent library: " + raw_path + "\n"; }); + return std::nullopt; } return raw_path; @@ -326,11 +335,13 @@ void macho_reader::derive_dependencies() { std::filesystem::path executable_path = object_file_ancestry(_ofd_index)._ancestors[0].allocate_path().parent_path(); std::filesystem::path loader_path = executable_path; + std::vector resolved_dylibs; - std::transform(_unresolved_dylibs.begin(), _unresolved_dylibs.end(), - std::back_inserter(resolved_dylibs), [&](const auto& raw_dylib) { - return resolve_dylib(raw_dylib, executable_path, loader_path, _rpaths); - }); + for (const auto& raw_dylib : _unresolved_dylibs) { + auto resolved = resolve_dylib(raw_dylib, executable_path, loader_path, _rpaths); + if (!resolved) continue; + resolved_dylibs.emplace_back(std::move(*resolved)); + } // Send these back to the main engine for ODR scanning processing. _derived_dependency(std::move(resolved_dylibs)); @@ -384,13 +395,13 @@ void read_macho(object_ancestry&& ancestry, std::istream::pos_type end_pos, file_details details, callbacks callbacks) { - callbacks._do_work([_ancestry = std::move(ancestry), _s = std::move(s), + orc::do_work([_ancestry = std::move(ancestry), _s = std::move(s), _details = std::move(details), _callbacks = callbacks]() mutable { std::uint32_t ofd_index = static_cast(object_file_register(std::move(_ancestry), copy(_details))); macho_reader macho(ofd_index, std::move(_s), std::move(_details), copy(_callbacks)); - if (macho._process_die_mode) { + if (macho._register_die_mode) { ++globals::instance()._object_file_count; macho.dwarf().process_all_dies(); } else if (macho._derive_dylib_mode) { @@ -420,22 +431,79 @@ dwarf dwarf_from_macho(std::uint32_t ofd_index, register_dies_callback&& callbac /**************************************************************************************************/ -std::vector macho_derive_dylibs(const std::filesystem::path& root_binary) { +namespace { + +/**************************************************************************************************/ + +std::vector make_sorted_unique(std::vector&& files) { + // eliminate duplicate object files, if any. The discovered order of these things shouldn't + // matter for the purposes of additional dylib scans or the ODR scan at the end. + std::sort(files.begin(), files.end()); + auto new_end = std::unique(files.begin(), files.end()); + files.erase(new_end, files.end()); + return files; +} + +/**************************************************************************************************/ +// We have to assume that dependencies will circle back on themselves at some point, so we must have +// a criteria to know when we are "done". The way we do this is to check the size of the file list +// on every iteration. The moment the resulting list stops growing, we know we're done. +#if 0 +std::vector recursive_dylib_scan(std::vector&& files) { + std::vector result = files; + std::vector last_pass = std::move(files); + + while (true) { + last_pass = macho_derive_dylibs(last_pass); + const auto last_result_size = result.size(); + move_append(result, copy(last_pass)); + result = make_sorted_unique(std::move(result)); + if (result.size() == last_result_size) break; + } + + return result; +} +#endif +/**************************************************************************************************/ + +void macho_derive_dylibs(const std::filesystem::path& root_binary, + std::mutex& mutex, + std::vector& result) { if (!exists(root_binary)) { cerr_safe([&](auto& s) { s << "file " << root_binary.string() << " does not exist\n"; }); - return std::vector(); + return; } freader input(root_binary); - std::vector result; callbacks callbacks = { register_dies_callback(), - stlab::immediate_executor, // don't subdivide or reschedule sub-work during this scan. - [&_result = result](std::vector&& p) { move_append(_result, p); }}; + [&_mutex = mutex, &_result = result](std::vector&& p) { + if (p.empty()) return; + std::lock_guard m(_mutex); + move_append(_result, std::move(p)); + } + }; parse_file(root_binary.string(), object_ancestry(), input, input.size(), std::move(callbacks)); +} - return result; +/**************************************************************************************************/ + +} // namespace + +/**************************************************************************************************/ + +std::vector macho_derive_dylibs(const std::vector& root_binaries) { + std::mutex result_mutex; + std::vector result = root_binaries; + + for (const auto& input_path : root_binaries) { + macho_derive_dylibs(input_path, result_mutex, result); + } + + orc::block_on_work(); + + return make_sorted_unique(std::move(result)); } /**************************************************************************************************/ diff --git a/src/main.cpp b/src/main.cpp index d8c9a12..ad3e77a 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -122,8 +122,7 @@ void process_orc_config_file(const char* bin_path_string) { app_settings._dylib_scan_mode = settings["dylib_scan_mode"].value_or(false); app_settings._parallel_processing = settings["parallel_processing"].value_or(true); app_settings._filter_redundant = settings["filter_redundant"].value_or(true); - app_settings._print_object_file_list = - settings["print_object_file_list"].value_or(false); + app_settings._print_object_file_list = settings["print_object_file_list"].value_or(false); app_settings._relative_output_file = settings["relative_output_file"].value_or(""); app_settings._resource_metrics = settings["resource_metrics"].value_or(false); @@ -266,6 +265,14 @@ cmdline_results process_command_line(int argc, char** argv) { for (std::size_t i{1}; i < argc; ++i) { result._file_object_list.push_back(argv[i]); } + + if (settings::instance()._dylib_scan_mode && + result._file_object_list.size() != 1 && + log_level_at_least(settings::log_level::warning)) { + cout_safe([&](auto& s) { + s << "warning: dylib scanning with more than one top-level artifact may yield false positives.\n"; + }); + } } else { std::vector library_search_paths; std::vector framework_search_paths; diff --git a/src/orc.cpp b/src/orc.cpp index 3418d48..7d1dee0 100644 --- a/src/orc.cpp +++ b/src/orc.cpp @@ -32,6 +32,7 @@ #include // application +#include "orc/async.hpp" #include "orc/dwarf.hpp" #include "orc/features.hpp" #include "orc/macho.hpp" @@ -41,7 +42,6 @@ #include "orc/settings.hpp" #include "orc/str.hpp" #include "orc/string_pool.hpp" -#include "orc/task_system.hpp" #include "orc/tracy.hpp" /**************************************************************************************************/ @@ -211,110 +211,6 @@ struct cmdline_results { /**************************************************************************************************/ -struct work_counter { - struct state { - void increment() { - { - std::lock_guard lock(_m); - ++_n; - } - _c.notify_all(); - } - - void decrement() { - { - std::lock_guard lock(_m); - --_n; - } - _c.notify_all(); - } - - void wait() { - std::unique_lock lock(_m); - if (_n == 0) return; - _c.wait(lock, [&] { return _n == 0; }); - } - - std::mutex _m; - std::condition_variable _c; - std::size_t _n{0}; - }; - - using shared_state = std::shared_ptr; - - friend struct token; - -public: - work_counter() : _impl{std::make_shared()} {} - - struct token { - token(shared_state w) : _w(std::move(w)) { _w->increment(); } - token(const token& t) : _w{t._w} { _w->increment(); } - token(token&& t) = default; - ~token() { - if (_w) _w->decrement(); - } - - private: - shared_state _w; - }; - - auto working() { return token(_impl); } - - void wait() { _impl->wait(); } - -private: - shared_state _impl; - - void increment() { _impl->increment(); } - void decrement() { _impl->decrement(); } -}; - -/**************************************************************************************************/ - -auto& work() { - static work_counter _work; - return _work; -} - -/**************************************************************************************************/ - -void do_work(std::function f) { - auto doit = [_f = std::move(f)]() { - try { - _f(); - } catch (...) { - // I changed my opinion on this: an unhandled background task exception should terminate - // the application. This mimics the behavior of an unhandled exception on the main - // thread. Now (like main thread exceptions) background task exceptions must be - // handled before they hit this point. - assert(!"unhandled background task exception"); - std::terminate(); - } - }; - - if (!settings::instance()._parallel_processing) { - doit(); - return; - } - - static orc::task_system system; - - system([_work_token = work().working(), _doit = std::move(doit)] { -#if ORC_FEATURE(TRACY) - thread_local bool tracy_set_thread_name_k = [] { - TracyCSetThreadName( - orc::tracy::format_unique("worker %s", orc::tracy::unique_thread_name())); - return true; - }(); - (void)tracy_set_thread_name_k; -#endif // ORC_FEATURE(TRACY) - _doit(); - }); -} - -/**************************************************************************************************/ - const char* problem_prefix() { return settings::instance()._graceful_exit ? "warning" : "error"; } /**************************************************************************************************/ @@ -458,36 +354,19 @@ die* enforce_odrv_for_die_list(die* base, std::vector& results) { /**************************************************************************************************/ std::vector orc_process(std::vector&& file_list) { - std::mutex macho_derived_dependencies_mutex; - std::vector macho_derived_dependencies; - // First stage: (optional) dependency/dylib preprocessing if (settings::instance()._dylib_scan_mode) { // dylib scan mode involves a pre-processing step where we parse the file list - // and discover any dylibs those Mach-O files depend upon. - for (const auto& input_path : file_list) { - do_work([_input_path = input_path, &_mutex = macho_derived_dependencies_mutex, - &_list = macho_derived_dependencies] { - auto dylibs = macho_derive_dylibs(_input_path); - if (dylibs.empty()) return; - std::lock_guard m(_mutex); - move_append(_list, dylibs); - }); - } + // and discover any dylibs those Mach-O files depend upon. Note that we're + // glomming all these dependencies together, so if there are multiple + // files in `file_list`, we could be "finding" ODRVs across independent + // artifact+dylib groups that really do not exist. + file_list = macho_derive_dylibs(std::move(file_list)); } - work().wait(); - - move_append(file_list, macho_derived_dependencies); - - // eliminate duplicate object files, if any - std::sort(file_list.begin(), file_list.end()); - auto new_end = std::unique(file_list.begin(), file_list.end()); - file_list.erase(new_end, file_list.end()); - // Second stage: process all the DIEs for (const auto& input_path : file_list) { - do_work([_input_path = input_path] { + orc::do_work([_input_path = input_path] { if (!exists(_input_path)) { cerr_safe( [&](auto& s) { s << "file " << _input_path.string() << " does not exist\n"; }); @@ -495,14 +374,13 @@ std::vector orc_process(std::vector&& file_l } freader input(_input_path); - callbacks callbacks = {register_dies, do_work}; parse_file(_input_path.string(), object_ancestry(), input, input.size(), - std::move(callbacks)); + callbacks{register_dies}); }); } - work().wait(); + orc::block_on_work(); // Third stage: review DIEs for ODRVs std::vector result; @@ -526,7 +404,7 @@ std::vector orc_process(std::vector&& file_l // The last one could be up to (chunk_count - 1) smaller. const auto next_chunk_size = std::min(chunk_size, work_size - cur_work); const auto last = std::next(first, next_chunk_size); - do_work([_first = first, _last = last, &result]() mutable { + orc::do_work([_first = first, _last = last, &result]() mutable { for (; _first != _last; ++_first) { _first->second = enforce_odrv_for_die_list(_first->second, result); } @@ -535,7 +413,7 @@ std::vector orc_process(std::vector&& file_l first = last; } - work().wait(); + orc::block_on_work(); // Sort the ordrv_report std::sort(result.begin(), result.end(), From 7ca396bd838da0936aeabf26bd0ce01e4d5662ed Mon Sep 17 00:00:00 2001 From: Foster Brereton Date: Thu, 2 May 2024 16:48:04 -0700 Subject: [PATCH 14/25] more improvements and cleaning up the code so I can do recursive dylib scanning --- include/orc/macho.hpp | 10 ---------- include/orc/parse_file.hpp | 1 - src/async.cpp | 25 ++----------------------- src/macho.cpp | 10 ++++++++++ 4 files changed, 12 insertions(+), 34 deletions(-) diff --git a/include/orc/macho.hpp b/include/orc/macho.hpp index 2919752..bb8efc7 100644 --- a/include/orc/macho.hpp +++ b/include/orc/macho.hpp @@ -14,16 +14,6 @@ /**************************************************************************************************/ -template -void move_append(C& dst, C&& src) { - dst.insert(dst.end(), - std::move_iterator(src.begin()), - std::move_iterator(src.end())); - src.clear(); -} - -/**************************************************************************************************/ - void read_macho(object_ancestry&& ancestry, freader s, std::istream::pos_type end_pos, diff --git a/include/orc/parse_file.hpp b/include/orc/parse_file.hpp index f35fe2b..a71ff77 100644 --- a/include/orc/parse_file.hpp +++ b/include/orc/parse_file.hpp @@ -227,7 +227,6 @@ constexpr std::decay_t copy(T&& value) noexcept(noexcept(std::decay_t{ /**************************************************************************************************/ using register_dies_callback = std::function; -using do_work_callback = std::function)>; using derived_dependency_callback = std::function&&)>; struct callbacks { diff --git a/src/async.cpp b/src/async.cpp index 0098e16..9822a2a 100644 --- a/src/async.cpp +++ b/src/async.cpp @@ -9,29 +9,8 @@ // stdc++ #include -//#include -//#include -//#include -//#include -//#include -//#include -//#include -//#include -//#include -// -//// stlab -//#include -//#include -//#include -//#include -// -//// toml++ -//#include -// -//// tbb -//#include -// -//// application + +// application #include "orc/settings.hpp" #include "orc/task_system.hpp" #include "orc/tracy.hpp" diff --git a/src/macho.cpp b/src/macho.cpp index 2cc3314..f0f1ff2 100644 --- a/src/macho.cpp +++ b/src/macho.cpp @@ -435,6 +435,16 @@ namespace { /**************************************************************************************************/ +template +void move_append(C& dst, C&& src) { + dst.insert(dst.end(), + std::move_iterator(src.begin()), + std::move_iterator(src.end())); + src.clear(); +} + +/**************************************************************************************************/ + std::vector make_sorted_unique(std::vector&& files) { // eliminate duplicate object files, if any. The discovered order of these things shouldn't // matter for the purposes of additional dylib scans or the ODR scan at the end. From d050908753b475870f3e3fae42f9a3f0d2580826 Mon Sep 17 00:00:00 2001 From: Foster Brereton Date: Thu, 2 May 2024 16:55:42 -0700 Subject: [PATCH 15/25] more improvements and cleaning up the code so I can do recursive dylib scanning --- src/macho.cpp | 48 +++++++++++++++++++++++------------------------- 1 file changed, 23 insertions(+), 25 deletions(-) diff --git a/src/macho.cpp b/src/macho.cpp index f0f1ff2..145e14e 100644 --- a/src/macho.cpp +++ b/src/macho.cpp @@ -15,9 +15,6 @@ // tbb #include -// stlab -#include - // application #include "orc/async.hpp" #include "orc/dwarf.hpp" @@ -38,8 +35,8 @@ namespace { based on the callbacks it is given. Since die scanning and dylib scanning are mutually exclusive, the callbacks provided determine which path the `macho_reader` should take. There is actually a third mode, which is during the ODRV reporting. In that mode we are neither scanning - for dylibs nor DIEs, but are gathering more details about DIEs that we need to report on issues. - In that case, both `_register_die_mode` and `_derive_dylib_mode` will be false. + for dylibs nor DIEs, but are gathering more details about DIEs that we need to report on. In + that case, both `_register_die_mode` and `_derive_dylib_mode` will be `false`. */ struct macho_reader { macho_reader(std::uint32_t ofd_index, @@ -314,7 +311,7 @@ std::optional resolve_dylib(std::string raw_path, } } - cerr_safe([&](auto& s){ s << "Could not find dependent library: " + raw_path + "\n"; }); + cerr_safe([&](auto& s) { s << "Could not find dependent library: " + raw_path + "\n"; }); return std::nullopt; } @@ -332,6 +329,8 @@ void macho_reader::derive_dependencies() { // If so, that means we'll need to track the originating file and use it as // `executable_path`, and then `loader_path` will follow wherever the nesting goes. +#warning `executable_path` somehow needs to make its way from `macho_derive_dylibs` to here. + std::filesystem::path executable_path = object_file_ancestry(_ofd_index)._ancestors[0].allocate_path().parent_path(); std::filesystem::path loader_path = executable_path; @@ -395,8 +394,8 @@ void read_macho(object_ancestry&& ancestry, std::istream::pos_type end_pos, file_details details, callbacks callbacks) { - orc::do_work([_ancestry = std::move(ancestry), _s = std::move(s), - _details = std::move(details), _callbacks = callbacks]() mutable { + orc::do_work([_ancestry = std::move(ancestry), _s = std::move(s), _details = std::move(details), + _callbacks = callbacks]() mutable { std::uint32_t ofd_index = static_cast(object_file_register(std::move(_ancestry), copy(_details))); macho_reader macho(ofd_index, std::move(_s), std::move(_details), copy(_callbacks)); @@ -437,9 +436,7 @@ namespace { template void move_append(C& dst, C&& src) { - dst.insert(dst.end(), - std::move_iterator(src.begin()), - std::move_iterator(src.end())); + dst.insert(dst.end(), std::move_iterator(src.begin()), std::move_iterator(src.end())); src.clear(); } @@ -476,25 +473,25 @@ std::vector recursive_dylib_scan(std::vector& result) { - if (!exists(root_binary)) { - cerr_safe([&](auto& s) { s << "file " << root_binary.string() << " does not exist\n"; }); + if (!exists(executable_path)) { + cerr_safe( + [&](auto& s) { s << "file " << executable_path.string() << " does not exist\n"; }); return; } - freader input(root_binary); - callbacks callbacks = { - register_dies_callback(), - [&_mutex = mutex, &_result = result](std::vector&& p) { - if (p.empty()) return; - std::lock_guard m(_mutex); - move_append(_result, std::move(p)); - } - }; + freader input(executable_path); + callbacks callbacks = {register_dies_callback(), [&_mutex = mutex, &_result = result]( + std::vector&& p) { + if (p.empty()) return; + std::lock_guard m(_mutex); + move_append(_result, std::move(p)); + }}; - parse_file(root_binary.string(), object_ancestry(), input, input.size(), std::move(callbacks)); + parse_file(executable_path.string(), object_ancestry(), input, input.size(), + std::move(callbacks)); } /**************************************************************************************************/ @@ -503,7 +500,8 @@ void macho_derive_dylibs(const std::filesystem::path& root_binary, /**************************************************************************************************/ -std::vector macho_derive_dylibs(const std::vector& root_binaries) { +std::vector macho_derive_dylibs( + const std::vector& root_binaries) { std::mutex result_mutex; std::vector result = root_binaries; From 2978d1cd66558430aacff1ebdabc8b03063e3522 Mon Sep 17 00:00:00 2001 From: Foster Brereton Date: Fri, 3 May 2024 08:01:16 -0700 Subject: [PATCH 16/25] fix up `orc_test` --- test/src/main.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/src/main.cpp b/test/src/main.cpp index 09cfdb0..253740c 100644 --- a/test/src/main.cpp +++ b/test/src/main.cpp @@ -454,7 +454,7 @@ void run_battery_test(const std::filesystem::path& home) { auto object_files = compile_compilation_units(home, settings, compilation_units); orc_reset(); - auto reports = orc_process(object_files); + auto reports = orc_process(std::move(object_files)); console() << "ODRVs expected: " << expected_odrvs.size() << "; reported: " << reports.size() << '\n'; From c6a11148eb55434b81c10b534122dc16e3d2c74e Mon Sep 17 00:00:00 2001 From: Foster Brereton Date: Fri, 3 May 2024 08:29:23 -0700 Subject: [PATCH 17/25] replacing callbacks with a `macho_params` block --- include/orc/ar.hpp | 2 +- include/orc/dwarf.hpp | 3 +- include/orc/fat.hpp | 2 +- include/orc/macho.hpp | 4 +- include/orc/orc.hpp | 9 ++- include/orc/parse_file.hpp | 19 ++++-- src/ar.cpp | 4 +- src/dwarf.cpp | 14 ++--- src/fat.cpp | 4 +- src/macho.cpp | 106 ++++++++++++++------------------ src/orc.cpp | 122 ++++++++++++++++++++----------------- src/parse_file.cpp | 8 +-- 12 files changed, 151 insertions(+), 146 deletions(-) diff --git a/include/orc/ar.hpp b/include/orc/ar.hpp index 088ffe2..c6e5c56 100644 --- a/include/orc/ar.hpp +++ b/include/orc/ar.hpp @@ -18,6 +18,6 @@ void read_ar(object_ancestry&& ancestry, freader& s, std::istream::pos_type end_pos, file_details details, - callbacks callbacks); + macho_params params); /**************************************************************************************************/ diff --git a/include/orc/dwarf.hpp b/include/orc/dwarf.hpp index bfea404..8cce89c 100644 --- a/include/orc/dwarf.hpp +++ b/include/orc/dwarf.hpp @@ -20,8 +20,7 @@ using die_pair = std::tuple; struct dwarf { dwarf(std::uint32_t ofd_index, freader&& s, - file_details&& details, - register_dies_callback&& callback); + file_details&& details); void register_section(std::string name, std::size_t offset, std::size_t size); diff --git a/include/orc/fat.hpp b/include/orc/fat.hpp index cc48ca6..d148fdb 100644 --- a/include/orc/fat.hpp +++ b/include/orc/fat.hpp @@ -18,6 +18,6 @@ void read_fat(object_ancestry&& ancestry, freader& s, std::istream::pos_type end_pos, file_details details, - callbacks callbacks); + macho_params params); /**************************************************************************************************/ diff --git a/include/orc/macho.hpp b/include/orc/macho.hpp index bb8efc7..21245da 100644 --- a/include/orc/macho.hpp +++ b/include/orc/macho.hpp @@ -18,11 +18,11 @@ void read_macho(object_ancestry&& ancestry, freader s, std::istream::pos_type end_pos, file_details details, - callbacks callbacks); + macho_params params); /**************************************************************************************************/ -struct dwarf dwarf_from_macho(std::uint32_t ofd_index, register_dies_callback&& callback); +struct dwarf dwarf_from_macho(std::uint32_t ofd_index, macho_params params); /**************************************************************************************************/ diff --git a/include/orc/orc.hpp b/include/orc/orc.hpp index 900c768..ce86457 100644 --- a/include/orc/orc.hpp +++ b/include/orc/orc.hpp @@ -48,6 +48,12 @@ bool filter_report(const odrv_report& report); std::vector orc_process(std::vector&&); +namespace orc { + +void register_dies(dies die_vector); + +} // namespace orc + void orc_reset(); // The returned char* is good until the next call to demangle() on the same thread. @@ -55,7 +61,6 @@ const char* demangle(const char* x); /**************************************************************************************************/ - std::mutex& ostream_safe_mutex(); template @@ -76,3 +81,5 @@ template void cerr_safe(F&& f) { ostream_safe(std::cerr, std::forward(f)); } + +/**************************************************************************************************/ diff --git a/include/orc/parse_file.hpp b/include/orc/parse_file.hpp index a71ff77..1556f15 100644 --- a/include/orc/parse_file.hpp +++ b/include/orc/parse_file.hpp @@ -226,18 +226,25 @@ constexpr std::decay_t copy(T&& value) noexcept(noexcept(std::decay_t{ /**************************************************************************************************/ -using register_dies_callback = std::function; -using derived_dependency_callback = std::function&&)>; +enum class macho_reader_mode { + invalid, + register_dies, + derive_dylibs, + odrv_reporting, +}; + +struct macho_params { + using register_dependencies_callback = std::function&&)>; -struct callbacks { - register_dies_callback _register_die; - derived_dependency_callback _derived_dependency; + macho_reader_mode _mode{macho_reader_mode::invalid}; + std::filesystem::path _executable_path; // only required if mode == derive_dylibs + register_dependencies_callback _register_dependencies; // only required if mode == derive_dylibs }; void parse_file(std::string_view object_name, const object_ancestry& ancestry, freader& s, std::istream::pos_type end_pos, - callbacks callbacks); + macho_params params); /**************************************************************************************************/ diff --git a/src/ar.cpp b/src/ar.cpp index 1628b38..983982d 100644 --- a/src/ar.cpp +++ b/src/ar.cpp @@ -33,7 +33,7 @@ void read_ar(object_ancestry&& ancestry, freader& s, std::istream::pos_type end_pos, file_details details, - callbacks callbacks) { + macho_params params) { std::string magic = read_fixed_string<8>(s); assert(magic == "!\n"); @@ -56,7 +56,7 @@ void read_ar(object_ancestry&& ancestry, if (identifier.rfind(".o") == identifier.size() - 2) { auto end_pos = s.tellg() + static_cast(file_size); - parse_file(identifier, ancestry, s, end_pos, callbacks); + parse_file(identifier, ancestry, s, end_pos, params); s.seekg(end_pos); // parse_file could leave the read head anywhere. } else { // skip to next file in the archive. diff --git a/src/dwarf.cpp b/src/dwarf.cpp index 7af00da..350506f 100644 --- a/src/dwarf.cpp +++ b/src/dwarf.cpp @@ -400,10 +400,8 @@ enum class process_mode { struct dwarf::implementation { implementation(std::uint32_t ofd_index, freader&& s, - file_details&& details, - register_dies_callback&& callback) - : _s(std::move(s)), _details(std::move(details)), _register_dies(std::move(callback)), - _ofd_index(ofd_index) {} + file_details&& details) + : _s(std::move(s)), _details(std::move(details)), _ofd_index(ofd_index) {} void register_section(const std::string& name, std::size_t offset, std::size_t size); @@ -459,7 +457,6 @@ struct dwarf::implementation { freader _s; file_details _details; - register_dies_callback _register_dies; std::vector _abbreviations; std::vector _path; std::vector _decl_files; @@ -1644,7 +1641,7 @@ void dwarf::implementation::process_all_dies() { dies.shrink_to_fit(); - _register_dies(std::move(dies)); + orc::register_dies(std::move(dies)); } /**************************************************************************************************/ @@ -1725,9 +1722,8 @@ die_pair dwarf::implementation::fetch_one_die(std::size_t debug_info_offset, dwarf::dwarf(std::uint32_t ofd_index, freader&& s, - file_details&& details, - register_dies_callback&& callback) - : _impl(new implementation(ofd_index, std::move(s), std::move(details), std::move(callback)), + file_details&& details) + : _impl(new implementation(ofd_index, std::move(s), std::move(details)), [](auto x) { delete x; }) {} void dwarf::register_section(std::string name, std::size_t offset, std::size_t size) { diff --git a/src/fat.cpp b/src/fat.cpp index 94b6aae..8e5620c 100644 --- a/src/fat.cpp +++ b/src/fat.cpp @@ -44,7 +44,7 @@ void read_fat(object_ancestry&& ancestry, freader& s, std::istream::pos_type end_pos, file_details details, - callbacks callbacks) { + macho_params params) { auto header = read_pod(s); if (details._needs_byteswap) { endian_swap(header.magic); @@ -88,7 +88,7 @@ void read_fat(object_ancestry&& ancestry, temp_seek(s, offset, [&] { parse_file(cputype_to_string(cputype), ancestry, s, - s.tellg() + static_cast(size), callbacks); + s.tellg() + static_cast(size), params); }); } } diff --git a/src/macho.cpp b/src/macho.cpp index 145e14e..fdfb8e2 100644 --- a/src/macho.cpp +++ b/src/macho.cpp @@ -42,14 +42,12 @@ struct macho_reader { macho_reader(std::uint32_t ofd_index, freader&& s, file_details&& details, - callbacks&& callbacks) - : _register_die_mode(static_cast(callbacks._register_die)), - _derive_dylib_mode(static_cast(callbacks._derived_dependency)), - _ofd_index(ofd_index), _s(std::move(s)), _details(std::move(details)), - _derived_dependency(std::move(callbacks._derived_dependency)), - _dwarf(ofd_index, copy(_s), copy(_details), std::move(callbacks._register_die)) { - if (_register_die_mode && _derive_dylib_mode) { - cerr_safe([&](auto& s) { s << "Only one of die or dylib scanning is allowed.\n"; }); + macho_params&& params) + : _ofd_index(ofd_index), _s(std::move(s)), _details(std::move(details)), + _params(std::move(params)), + _dwarf(ofd_index, copy(_s), copy(_details)) { + if (params._mode == macho_reader_mode::invalid) { + cerr_safe([&](auto& s) { s << "Invalid reader mode.\n"; }); std::terminate(); } populate_dwarf(); @@ -62,10 +60,11 @@ struct macho_reader { return std::move(_dwarf); } - void derive_dependencies(); + bool register_dies_mode() const { return _params._mode == macho_reader_mode::register_dies; } + bool derive_dylibs_mode() const { return _params._mode == macho_reader_mode::derive_dylibs; } + // bool odrv_reporting_mode() const { return _params._mode == macho_reader_mode::odrv_reporting; } - const bool _register_die_mode{false}; - const bool _derive_dylib_mode{false}; + void derive_dependencies(); private: void populate_dwarf(); @@ -80,7 +79,7 @@ struct macho_reader { const std::uint32_t _ofd_index{0}; freader _s; const file_details _details; - derived_dependency_callback _derived_dependency; + const macho_params _params; std::vector _unresolved_dylibs; std::vector _rpaths; struct dwarf _dwarf; // must be last @@ -231,7 +230,7 @@ void macho_reader::read_stabs(std::uint32_t symbol_count, std::uint32_t string_o additional_object_files.push_back(std::move(path)); } - _derived_dependency(std::move(additional_object_files)); + _params._register_dependencies(std::move(additional_object_files)); } void macho_reader::read_lc_symtab() { @@ -260,28 +259,19 @@ void macho_reader::read_load_command() { return command; }); - switch (command.cmd) { - case LC_SEGMENT_64: { - read_lc_segment_64(); - } break; - case LC_LOAD_DYLIB: { - if (_derive_dylib_mode) { - read_lc_load_dylib(); - } - } break; - case LC_RPATH: { - if (_derive_dylib_mode) { - read_lc_rpath(); - } - } break; - case LC_SYMTAB: { - if (_derive_dylib_mode) { - read_lc_symtab(); - } - } break; - default: { - _s.seekg(command.cmdsize, std::ios::cur); - } break; + if (derive_dylibs_mode()) { + switch (command.cmd) { + case LC_SEGMENT_64: { read_lc_segment_64(); } break; + case LC_LOAD_DYLIB: { read_lc_load_dylib(); } break; + case LC_RPATH: { read_lc_rpath(); } break; + case LC_SYMTAB: { read_lc_symtab(); } break; + default: { _s.seekg(command.cmdsize, std::ios::cur); } break; + } + } else { + switch (command.cmd) { + case LC_SEGMENT_64: { read_lc_segment_64(); } break; + default: { _s.seekg(command.cmdsize, std::ios::cur); } break; + } } } @@ -329,11 +319,11 @@ void macho_reader::derive_dependencies() { // If so, that means we'll need to track the originating file and use it as // `executable_path`, and then `loader_path` will follow wherever the nesting goes. -#warning `executable_path` somehow needs to make its way from `macho_derive_dylibs` to here. - - std::filesystem::path executable_path = + const std::filesystem::path loader_path = object_file_ancestry(_ofd_index)._ancestors[0].allocate_path().parent_path(); - std::filesystem::path loader_path = executable_path; + +#warning `executable_path` somehow needs to make its way from `macho_derive_dylibs` to here. + const std::filesystem::path executable_path = loader_path; std::vector resolved_dylibs; for (const auto& raw_dylib : _unresolved_dylibs) { @@ -343,7 +333,7 @@ void macho_reader::derive_dependencies() { } // Send these back to the main engine for ODR scanning processing. - _derived_dependency(std::move(resolved_dylibs)); + _params._register_dependencies(std::move(resolved_dylibs)); } /**************************************************************************************************/ @@ -393,17 +383,17 @@ void read_macho(object_ancestry&& ancestry, freader s, std::istream::pos_type end_pos, file_details details, - callbacks callbacks) { + macho_params params) { orc::do_work([_ancestry = std::move(ancestry), _s = std::move(s), _details = std::move(details), - _callbacks = callbacks]() mutable { + _params = std::move(params)]() mutable { std::uint32_t ofd_index = static_cast(object_file_register(std::move(_ancestry), copy(_details))); - macho_reader macho(ofd_index, std::move(_s), std::move(_details), copy(_callbacks)); + macho_reader macho(ofd_index, std::move(_s), std::move(_details), std::move(_params)); - if (macho._register_die_mode) { + if (macho.register_dies_mode()) { ++globals::instance()._object_file_count; macho.dwarf().process_all_dies(); - } else if (macho._derive_dylib_mode) { + } else if (macho.derive_dylibs_mode()) { macho.derive_dependencies(); } else { // If we're here, Something Bad has happened. @@ -414,18 +404,13 @@ void read_macho(object_ancestry&& ancestry, /**************************************************************************************************/ -dwarf dwarf_from_macho(std::uint32_t ofd_index, register_dies_callback&& callback) { +dwarf dwarf_from_macho(std::uint32_t ofd_index, macho_params params) { const auto& entry = object_file_fetch(ofd_index); freader s(entry._ancestry.begin()->allocate_path()); s.seekg(entry._details._offset); - callbacks callbacks{ - std::move(callback), - }; - - return macho_reader(ofd_index, std::move(s), copy(entry._details), std::move(callbacks)) - .dwarf(); + return macho_reader(ofd_index, std::move(s), copy(entry._details), std::move(params)).dwarf(); } /**************************************************************************************************/ @@ -483,15 +468,18 @@ void macho_derive_dylibs(const std::filesystem::path& executable_path, } freader input(executable_path); - callbacks callbacks = {register_dies_callback(), [&_mutex = mutex, &_result = result]( - std::vector&& p) { - if (p.empty()) return; - std::lock_guard m(_mutex); - move_append(_result, std::move(p)); - }}; + macho_params params; + params._mode = macho_reader_mode::derive_dylibs; + params._executable_path = executable_path; + params._register_dependencies = + [&_mutex = mutex, &_result = result](std::vector&& p) { + if (p.empty()) return; + std::lock_guard m(_mutex); + move_append(_result, std::move(p)); + }; parse_file(executable_path.string(), object_ancestry(), input, input.size(), - std::move(callbacks)); + std::move(params)); } /**************************************************************************************************/ diff --git a/src/orc.cpp b/src/orc.cpp index 7d1dee0..9f3cb0e 100644 --- a/src/orc.cpp +++ b/src/orc.cpp @@ -148,61 +148,6 @@ auto& global_die_map() { /**************************************************************************************************/ -void register_dies(dies die_vector) { - ZoneScoped; - - globals::instance()._die_processed_count += die_vector.size(); - - // pre-process the vector of dies by partitioning them into those that are skippable and those - // that are not. Then, we erase the skippable ones and shrink the vector to fit, which will - // cause a reallocation and copying of only the necessary dies into a vector whose memory - // consumption is exactly what's needed. - - auto unskipped_end = - std::partition(die_vector.begin(), die_vector.end(), std::not_fn(&die::_skippable)); - - std::size_t skip_count = std::distance(unskipped_end, die_vector.end()); - - die_vector.erase(unskipped_end, die_vector.end()); - die_vector.shrink_to_fit(); - - // This is a list so the die vectors don't move about. The dies become pretty entangled as they - // point to one another by reference, and the odr_map itself stores const pointers to the dies - // it registers. Thus, we move our incoming die_vector to the end of this list, and all the - // pointers we use will stay valid for the lifetime of the application. - dies& dies = *with_global_die_collection([&](auto& collection) { - collection.push_back(std::move(die_vector)); - return --collection.end(); - }); - - for (auto& d : dies) { - assert(!d._skippable); - - // - // At this point we know we're going to register the die. Hereafter belongs - // work exclusive to DIEs getting registered/odr-enforced. - // - - auto result = global_die_map().insert(std::make_pair(d._hash, &d)); - if (result.second) { - ++globals::instance()._unique_symbol_count; - continue; - } - - constexpr auto mutex_count_k = 67; // prime; to help reduce any hash bias - static std::mutex mutexes_s[mutex_count_k]; - std::lock_guard lock(mutexes_s[d._hash % mutex_count_k]); - - die& d_in_map = *result.first->second; - d._next_die = d_in_map._next_die; - d_in_map._next_die = &d; - } - - globals::instance()._die_skipped_count += skip_count; -} - -/**************************************************************************************************/ - struct cmdline_results { std::vector _file_object_list; bool _ld_mode{false}; @@ -218,7 +163,7 @@ const char* problem_prefix() { return settings::instance()._graceful_exit ? "war attribute_sequence fetch_attributes_for_die(const die& d) { ZoneScoped; - auto dwarf = dwarf_from_macho(d._ofd_index, register_dies_callback()); + auto dwarf = dwarf_from_macho(d._ofd_index, macho_params{ macho_reader_mode::odrv_reporting }); auto [die, attributes] = dwarf.fetch_one_die(d._debug_info_offset, d._cu_die_address); assert(die._tag == d._tag); @@ -376,7 +321,7 @@ std::vector orc_process(std::vector&& file_l freader input(_input_path); parse_file(_input_path.string(), object_ancestry(), input, input.size(), - callbacks{register_dies}); + macho_params{macho_reader_mode::register_dies}); }); } @@ -424,6 +369,69 @@ std::vector orc_process(std::vector&& file_l /**************************************************************************************************/ +namespace orc { + +/**************************************************************************************************/ + +void register_dies(dies die_vector) { + ZoneScoped; + + globals::instance()._die_processed_count += die_vector.size(); + + // pre-process the vector of dies by partitioning them into those that are skippable and those + // that are not. Then, we erase the skippable ones and shrink the vector to fit, which will + // cause a reallocation and copying of only the necessary dies into a vector whose memory + // consumption is exactly what's needed. + + auto unskipped_end = + std::partition(die_vector.begin(), die_vector.end(), std::not_fn(&die::_skippable)); + + std::size_t skip_count = std::distance(unskipped_end, die_vector.end()); + + die_vector.erase(unskipped_end, die_vector.end()); + die_vector.shrink_to_fit(); + + // This is a list so the die vectors don't move about. The dies become pretty entangled as they + // point to one another by reference, and the odr_map itself stores const pointers to the dies + // it registers. Thus, we move our incoming die_vector to the end of this list, and all the + // pointers we use will stay valid for the lifetime of the application. + dies& dies = *with_global_die_collection([&](auto& collection) { + collection.push_back(std::move(die_vector)); + return --collection.end(); + }); + + for (auto& d : dies) { + assert(!d._skippable); + + // + // At this point we know we're going to register the die. Hereafter belongs + // work exclusive to DIEs getting registered/odr-enforced. + // + + auto result = global_die_map().insert(std::make_pair(d._hash, &d)); + if (result.second) { + ++globals::instance()._unique_symbol_count; + continue; + } + + constexpr auto mutex_count_k = 67; // prime; to help reduce any hash bias + static std::mutex mutexes_s[mutex_count_k]; + std::lock_guard lock(mutexes_s[d._hash % mutex_count_k]); + + die& d_in_map = *result.first->second; + d._next_die = d_in_map._next_die; + d_in_map._next_die = &d; + } + + globals::instance()._die_skipped_count += skip_count; +} + +/**************************************************************************************************/ + +} // namespace orc + +/**************************************************************************************************/ + void orc_reset() { global_die_map().clear(); with_global_die_collection([](auto& collection) { collection.clear(); }); diff --git a/src/parse_file.cpp b/src/parse_file.cpp index 3d4a9a1..f637208 100644 --- a/src/parse_file.cpp +++ b/src/parse_file.cpp @@ -157,7 +157,7 @@ void parse_file(std::string_view object_name, const object_ancestry& ancestry, freader& s, std::istream::pos_type end_pos, - callbacks callbacks) { + macho_params params) { auto detection = detect_file(s); // append this object name to the ancestry @@ -169,13 +169,13 @@ void parse_file(std::string_view object_name, throw std::runtime_error("unknown format"); case file_details::format::macho: return read_macho(std::move(new_ancestry), s, end_pos, std::move(detection), - std::move(callbacks)); + std::move(params)); case file_details::format::ar: return read_ar(std::move(new_ancestry), s, end_pos, std::move(detection), - std::move(callbacks)); + std::move(params)); case file_details::format::fat: return read_fat(std::move(new_ancestry), s, end_pos, std::move(detection), - std::move(callbacks)); + std::move(params)); } } From 5907afbc3e2d0cbe9a25a34e12887e3d83852413 Mon Sep 17 00:00:00 2001 From: Foster Brereton Date: Fri, 3 May 2024 08:41:19 -0700 Subject: [PATCH 18/25] got the right `executable_path` wired up during dylib resolution --- src/macho.cpp | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/src/macho.cpp b/src/macho.cpp index fdfb8e2..333f2e8 100644 --- a/src/macho.cpp +++ b/src/macho.cpp @@ -322,12 +322,9 @@ void macho_reader::derive_dependencies() { const std::filesystem::path loader_path = object_file_ancestry(_ofd_index)._ancestors[0].allocate_path().parent_path(); -#warning `executable_path` somehow needs to make its way from `macho_derive_dylibs` to here. - const std::filesystem::path executable_path = loader_path; - std::vector resolved_dylibs; for (const auto& raw_dylib : _unresolved_dylibs) { - auto resolved = resolve_dylib(raw_dylib, executable_path, loader_path, _rpaths); + auto resolved = resolve_dylib(raw_dylib, _params._executable_path, loader_path, _rpaths); if (!resolved) continue; resolved_dylibs.emplace_back(std::move(*resolved)); } @@ -470,7 +467,7 @@ void macho_derive_dylibs(const std::filesystem::path& executable_path, freader input(executable_path); macho_params params; params._mode = macho_reader_mode::derive_dylibs; - params._executable_path = executable_path; + params._executable_path = executable_path.parent_path(); params._register_dependencies = [&_mutex = mutex, &_result = result](std::vector&& p) { if (p.empty()) return; From e6ae50f34f482389914b89255eb20b330ac3c4e2 Mon Sep 17 00:00:00 2001 From: Foster Brereton Date: Fri, 3 May 2024 12:48:09 -0700 Subject: [PATCH 19/25] recursive dependency derivation is working --- src/macho.cpp | 149 ++++++++++++++++++++++++++++++++++---------------- src/orc.cpp | 9 ++- 2 files changed, 109 insertions(+), 49 deletions(-) diff --git a/src/macho.cpp b/src/macho.cpp index 333f2e8..a875261 100644 --- a/src/macho.cpp +++ b/src/macho.cpp @@ -44,8 +44,7 @@ struct macho_reader { file_details&& details, macho_params&& params) : _ofd_index(ofd_index), _s(std::move(s)), _details(std::move(details)), - _params(std::move(params)), - _dwarf(ofd_index, copy(_s), copy(_details)) { + _params(std::move(params)), _dwarf(ofd_index, copy(_s), copy(_details)) { if (params._mode == macho_reader_mode::invalid) { cerr_safe([&](auto& s) { s << "Invalid reader mode.\n"; }); std::terminate(); @@ -62,7 +61,8 @@ struct macho_reader { bool register_dies_mode() const { return _params._mode == macho_reader_mode::register_dies; } bool derive_dylibs_mode() const { return _params._mode == macho_reader_mode::derive_dylibs; } - // bool odrv_reporting_mode() const { return _params._mode == macho_reader_mode::odrv_reporting; } + // bool odrv_reporting_mode() const { return _params._mode == macho_reader_mode::odrv_reporting; + // } void derive_dependencies(); @@ -259,6 +259,7 @@ void macho_reader::read_load_command() { return command; }); + // clang-format off if (derive_dylibs_mode()) { switch (command.cmd) { case LC_SEGMENT_64: { read_lc_segment_64(); } break; @@ -273,6 +274,7 @@ void macho_reader::read_load_command() { default: { _s.seekg(command.cmdsize, std::ios::cur); } break; } } + // clang-format on } /**************************************************************************************************/ @@ -301,7 +303,11 @@ std::optional resolve_dylib(std::string raw_path, } } - cerr_safe([&](auto& s) { s << "Could not find dependent library: " + raw_path + "\n"; }); + if (log_level_at_least(settings::log_level::verbose)) { + cerr_safe( + [&](auto& s) { s << "Could not find dependent library: " + raw_path + "\n"; }); + } + return std::nullopt; } @@ -415,11 +421,15 @@ dwarf dwarf_from_macho(std::uint32_t ofd_index, macho_params params) { namespace { /**************************************************************************************************/ - +// Append `src` to the end of `dst` by destructively moving the items out of `src`. +// Preconditions: +// - `dst` and `src` are not the same container. +// Postconditions: +// - All elements in `src` will be moved-from. +// - `src` itself will still be valid, even if the elements within it are not. template void move_append(C& dst, C&& src) { dst.insert(dst.end(), std::move_iterator(src.begin()), std::move_iterator(src.end())); - src.clear(); } /**************************************************************************************************/ @@ -433,50 +443,93 @@ std::vector make_sorted_unique(std::vector derive_immediate_dylibs( + const std::filesystem::path& executable_path, const std::filesystem::path& input_path) { + // `input_path` is not `loader_path` because it points to the binary to be scanned. + // `loader_path` should be the directory that contains `input_path`. + if (!exists(input_path)) { + if (log_level_at_least(settings::log_level::verbose)) { + cerr_safe([&](auto& s) { + s << "verbose: file " << input_path.string() << " does not exist\n"; + }); + } + return std::vector(); + } + + std::mutex result_mutex; + std::vector result; + freader input(input_path); + macho_params params; + params._mode = macho_reader_mode::derive_dylibs; + params._executable_path = executable_path; + params._register_dependencies = [&](std::vector&& p) { + if (p.empty()) return; + std::lock_guard m(result_mutex); + move_append(result, std::move(p)); + }; + + parse_file(input_path.string(), object_ancestry(), input, input.size(), std::move(params)); + + orc::block_on_work(); + + return result; +} + /**************************************************************************************************/ // We have to assume that dependencies will circle back on themselves at some point, so we must have // a criteria to know when we are "done". The way we do this is to check the size of the file list // on every iteration. The moment the resulting list stops growing, we know we're done. -#if 0 -std::vector recursive_dylib_scan(std::vector&& files) { - std::vector result = files; - std::vector last_pass = std::move(files); +std::vector derive_all_dylibs(const std::filesystem::path& binary) { + const auto executable_path = binary.parent_path(); + std::vector result; + std::vector last_pass = derive_immediate_dylibs(executable_path, binary); + auto prev_result_size = result.size(); + + if (log_level_at_least(settings::log_level::info)) { + cout_safe([&](auto& s) { + s << "info: scanning for dependencies of " << binary.filename() << "\n"; + }); + } while (true) { - last_pass = macho_derive_dylibs(last_pass); - const auto last_result_size = result.size(); + // store a copy of last_pass in the result and get the new size. move_append(result, copy(last_pass)); result = make_sorted_unique(std::move(result)); - if (result.size() == last_result_size) break; - } - return result; -} -#endif -/**************************************************************************************************/ + // If the size of the result hasn't changed with the appending + // of the latest set of derived dependencies, then we know we + // have found all of them, and can stop. + const auto cur_result_size = result.size(); + if (cur_result_size == prev_result_size) break; + + if (log_level_at_least(settings::log_level::info)) { + cout_safe([&](auto& s) { + const auto count = cur_result_size - prev_result_size; + s << "info: found " << count << " more dependencies...\n"; + }); + } + + prev_result_size = cur_result_size; -void macho_derive_dylibs(const std::filesystem::path& executable_path, - std::mutex& mutex, - std::vector& result) { - if (!exists(executable_path)) { - cerr_safe( - [&](auto& s) { s << "file " << executable_path.string() << " does not exist\n"; }); - return; + // Otherwise, gather a new set of dependencies from those in last_pass + // and set them to `last_pass` for the next round. + std::vector next_pass; + for (const auto& dependency : last_pass) { + move_append(next_pass, derive_immediate_dylibs(executable_path, dependency)); + } + last_pass = make_sorted_unique(std::move(next_pass)); } - freader input(executable_path); - macho_params params; - params._mode = macho_reader_mode::derive_dylibs; - params._executable_path = executable_path.parent_path(); - params._register_dependencies = - [&_mutex = mutex, &_result = result](std::vector&& p) { - if (p.empty()) return; - std::lock_guard m(_mutex); - move_append(_result, std::move(p)); - }; - - parse_file(executable_path.string(), object_ancestry(), input, input.size(), - std::move(params)); + if (log_level_at_least(settings::log_level::info)) { + cout_safe( + [&](auto& s) { s << "info: found " << result.size() << " total dependencies\n"; }); + } + + return result; } /**************************************************************************************************/ @@ -486,16 +539,20 @@ void macho_derive_dylibs(const std::filesystem::path& executable_path, /**************************************************************************************************/ std::vector macho_derive_dylibs( - const std::vector& root_binaries) { - std::mutex result_mutex; - std::vector result = root_binaries; - - for (const auto& input_path : root_binaries) { - macho_derive_dylibs(input_path, result_mutex, result); + const std::vector& binaries) { + std::vector result = binaries; + + // For the purpose of the executable_path/loader_path relationships, we treat each binary + // as independent of the others. That is, each root binary will be the `executable_path` for + // its tree of dependencies. + // + // Then, yes, we smash them all together and treat them as one large binary with all its + // dependencies. Otherwise we'd have to add a way to conduct multiple ORC scans per session, + // which the app is not set up to do. We did warn the user we would do this, though. + for (const auto& binary : binaries) { + move_append(result, derive_all_dylibs(binary)); } - orc::block_on_work(); - return make_sorted_unique(std::move(result)); } diff --git a/src/orc.cpp b/src/orc.cpp index 9f3cb0e..667adaa 100644 --- a/src/orc.cpp +++ b/src/orc.cpp @@ -163,7 +163,7 @@ const char* problem_prefix() { return settings::instance()._graceful_exit ? "war attribute_sequence fetch_attributes_for_die(const die& d) { ZoneScoped; - auto dwarf = dwarf_from_macho(d._ofd_index, macho_params{ macho_reader_mode::odrv_reporting }); + auto dwarf = dwarf_from_macho(d._ofd_index, macho_params{macho_reader_mode::odrv_reporting}); auto [die, attributes] = dwarf.fetch_one_die(d._debug_info_offset, d._cu_die_address); assert(die._tag == d._tag); @@ -313,8 +313,11 @@ std::vector orc_process(std::vector&& file_l for (const auto& input_path : file_list) { orc::do_work([_input_path = input_path] { if (!exists(_input_path)) { - cerr_safe( - [&](auto& s) { s << "file " << _input_path.string() << " does not exist\n"; }); + if (log_level_at_least(settings::log_level::verbose)) { + cerr_safe([&](auto& s) { + s << "file " << _input_path.string() << " does not exist\n"; + }); + } return; } From e60946b721b9d3da2102be55a6cba138eb6e7ba0 Mon Sep 17 00:00:00 2001 From: Foster Brereton Date: Tue, 7 May 2024 14:13:58 -0700 Subject: [PATCH 20/25] tweak --- src/macho.cpp | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/src/macho.cpp b/src/macho.cpp index a875261..c020940 100644 --- a/src/macho.cpp +++ b/src/macho.cpp @@ -321,9 +321,6 @@ void macho_reader::derive_dependencies() { // `@executable_path` resolves to the path of the directory containing the executable. // `@loader_path` resolves to the path of the client doing the loading. // For executables, `@loader_path` and `@executable_path` mean the same thing. - // TODO: (fosterbrereton) We're going to have to nest this search, aren't we? - // If so, that means we'll need to track the originating file and use it as - // `executable_path`, and then `loader_path` will follow wherever the nesting goes. const std::filesystem::path loader_path = object_file_ancestry(_ofd_index)._ancestors[0].allocate_path().parent_path(); @@ -335,7 +332,7 @@ void macho_reader::derive_dependencies() { resolved_dylibs.emplace_back(std::move(*resolved)); } - // Send these back to the main engine for ODR scanning processing. + // Send these new-found dependencies to the main engine for ODR scanning. _params._register_dependencies(std::move(resolved_dylibs)); } From e4dd201b5708bce367ee1e299510755097980c96 Mon Sep 17 00:00:00 2001 From: Foster Brereton Date: Tue, 14 May 2024 11:46:05 -0700 Subject: [PATCH 21/25] performance improvements --- include/orc/tracy.hpp | 8 ++-- src/ar.cpp | 4 +- src/async.cpp | 3 +- src/macho.cpp | 88 +++++++++++++++++++++++++------------------ src/main.cpp | 2 +- src/orc.cpp | 3 ++ src/tracy.cpp | 4 +- 7 files changed, 68 insertions(+), 44 deletions(-) diff --git a/include/orc/tracy.hpp b/include/orc/tracy.hpp index e8f5b67..7ef6168 100644 --- a/include/orc/tracy.hpp +++ b/include/orc/tracy.hpp @@ -32,8 +32,10 @@ #endif //================================================================================================== - -namespace orc::tracy { +// This used to be orc::tracy, but then the compiler would complain about some of the Tracy macros +// failing to resolve to symbols in this nested namespace. It failed to find the global ::tracy +// namespace where they actually live. This has been renamed `profiler` to avoid the collision. +namespace orc::profiler { //================================================================================================== // returns a unique `const char*` per thread for the lifetime of the application. A _brief_ name, @@ -57,6 +59,6 @@ void initialize(); //================================================================================================== -} // namespace orc::tracy +} // namespace orc::profiler //================================================================================================== diff --git a/src/ar.cpp b/src/ar.cpp index 983982d..706fc62 100644 --- a/src/ar.cpp +++ b/src/ar.cpp @@ -9,6 +9,7 @@ // application #include "orc/str.hpp" +#include "orc/tracy.hpp" /**************************************************************************************************/ @@ -37,7 +38,8 @@ void read_ar(object_ancestry&& ancestry, std::string magic = read_fixed_string<8>(s); assert(magic == "!\n"); - // REVISIT: (fbrereto) opportunity to parallelize here. + // The .o files are stored serially within the ar file, so I am not sure how easily this can be + // parallelized (if at all) while (s.tellg() < end_pos) { std::string identifier = rstrip(read_fixed_string<16>(s)); std::string timestamp = rstrip(read_fixed_string<12>(s)); diff --git a/src/async.cpp b/src/async.cpp index 9822a2a..e866b63 100644 --- a/src/async.cpp +++ b/src/async.cpp @@ -127,7 +127,7 @@ void do_work(std::function f) { #if ORC_FEATURE(TRACY) thread_local bool tracy_set_thread_name_k = [] { TracyCSetThreadName( - orc::tracy::format_unique("worker %s", orc::tracy::unique_thread_name())); + orc::profiler::format_unique("worker %s", orc::profiler::unique_thread_name())); return true; }(); (void)tracy_set_thread_name_k; @@ -140,6 +140,7 @@ void do_work(std::function f) { // It would be groovy if this routine could do some of the work, too, while it is waiting for the // pool to finish up. void block_on_work() { + TracyMessageL("orc::block_on_work"); work().wait(); } diff --git a/src/macho.cpp b/src/macho.cpp index 3a5088b..b5c5539 100644 --- a/src/macho.cpp +++ b/src/macho.cpp @@ -62,8 +62,7 @@ struct macho_reader { bool register_dies_mode() const { return _params._mode == macho_reader_mode::register_dies; } bool derive_dylibs_mode() const { return _params._mode == macho_reader_mode::derive_dylibs; } - // bool odrv_reporting_mode() const { return _params._mode == macho_reader_mode::odrv_reporting; - // } + // bool odrv_reporting_mode() const { return _params._mode == macho_reader_mode::odrv_reporting; } void derive_dependencies(); @@ -387,7 +386,13 @@ void read_macho(object_ancestry&& ancestry, macho_params params) { orc::do_work([_ancestry = std::move(ancestry), _s = std::move(s), _details = std::move(details), _params = std::move(params)]() mutable { - ZoneScoped; + ZoneScopedN("read_macho"); +#if ORC_FEATURE(TRACY) + std::stringstream ss; + ss << _ancestry; + const auto path_annot = std::move(ss).str(); + ZoneText(path_annot.c_str(), path_annot.size()); +#endif // ORC_FEATURE(TRACY) std::uint32_t ofd_index = static_cast(object_file_register(std::move(_ancestry), copy(_details))); @@ -449,8 +454,10 @@ std::vector make_sorted_unique(std::vector derive_immediate_dylibs( const std::filesystem::path& executable_path, const std::filesystem::path& input_path) { + ZoneScoped; + // `input_path` is not `loader_path` because it points to the binary to be scanned. - // `loader_path` should be the directory that contains `input_path`. + // Therefore, `loader_path` should be the directory that contains `input_path`. if (!exists(input_path)) { if (log_level_at_least(settings::log_level::verbose)) { cerr_safe([&](auto& s) { @@ -460,15 +467,21 @@ std::vector derive_immediate_dylibs( return std::vector(); } - std::mutex result_mutex; +#if ORC_FEATURE(TRACY) + const auto path_annot = input_path.string(); + ZoneText(path_annot.c_str(), path_annot.size()); +#endif // ORC_FEATURE(TRACY) + + TracyLockable(std::mutex, dylib_result_mutex); std::vector result; freader input(input_path); macho_params params; params._mode = macho_reader_mode::derive_dylibs; params._executable_path = executable_path; params._register_dependencies = [&](std::vector&& p) { + ZoneScopedN("register_dependencies"); if (p.empty()) return; - std::lock_guard m(result_mutex); + std::lock_guard m(dylib_result_mutex); move_append(result, std::move(p)); }; @@ -476,18 +489,17 @@ std::vector derive_immediate_dylibs( orc::block_on_work(); - return result; + return make_sorted_unique(std::move(result)); } /**************************************************************************************************/ -// We have to assume that dependencies will circle back on themselves at some point, so we must have -// a criteria to know when we are "done". The way we do this is to check the size of the file list -// on every iteration. The moment the resulting list stops growing, we know we're done. + std::vector derive_all_dylibs(const std::filesystem::path& binary) { + ZoneScoped; + const auto executable_path = binary.parent_path(); - std::vector result; - std::vector last_pass = derive_immediate_dylibs(executable_path, binary); - auto prev_result_size = result.size(); + std::vector scanned; + std::vector pass(1, binary); if (log_level_at_least(settings::log_level::info)) { cout_safe([&](auto& s) { @@ -496,40 +508,44 @@ std::vector derive_all_dylibs(const std::filesystem::path } while (true) { - // store a copy of last_pass in the result and get the new size. - move_append(result, copy(last_pass)); - result = make_sorted_unique(std::move(result)); + std::vector pass_dependencies; + + for (const auto& dependency : pass) { + move_append(pass_dependencies, derive_immediate_dylibs(executable_path, dependency)); + } - // If the size of the result hasn't changed with the appending - // of the latest set of derived dependencies, then we know we - // have found all of them, and can stop. - const auto cur_result_size = result.size(); - if (cur_result_size == prev_result_size) break; + // The set of binaries scanned in this pass get appended to `scanned` + move_append(scanned, std::move(pass)); + scanned = make_sorted_unique(std::move(scanned)); + + // clean up the set of dependencies found in this pass. + pass_dependencies = make_sorted_unique(std::move(pass_dependencies)); + + // for the _next_ pass, we only want to scan files that are new, + // those that are in `pass_dependencies` and _not_ in `scanned`. + // If that set of files is empty, then we have found all our + // dependencies, and can stop. + pass = std::vector(); // ensure `pass` is valid and empty. + std::set_difference(pass_dependencies.begin(), pass_dependencies.end(), + scanned.begin(), scanned.end(), std::back_inserter(pass)); + + if (pass.empty()) { + break; + } if (log_level_at_least(settings::log_level::info)) { cout_safe([&](auto& s) { - const auto count = cur_result_size - prev_result_size; - s << "info: found " << count << " more dependencies...\n"; + s << "info: scanning " << pass.size() << " more dependencies...\n"; }); } - - prev_result_size = cur_result_size; - - // Otherwise, gather a new set of dependencies from those in last_pass - // and set them to `last_pass` for the next round. - std::vector next_pass; - for (const auto& dependency : last_pass) { - move_append(next_pass, derive_immediate_dylibs(executable_path, dependency)); - } - last_pass = make_sorted_unique(std::move(next_pass)); } if (log_level_at_least(settings::log_level::info)) { cout_safe( - [&](auto& s) { s << "info: found " << result.size() << " total dependencies\n"; }); + [&](auto& s) { s << "info: found " << scanned.size() << " total dependencies\n"; }); } - return result; + return scanned; } /**************************************************************************************************/ @@ -540,7 +556,7 @@ std::vector derive_all_dylibs(const std::filesystem::path std::vector macho_derive_dylibs( const std::vector& binaries) { - std::vector result = binaries; + std::vector result; // For the purpose of the executable_path/loader_path relationships, we treat each binary // as independent of the others. That is, each root binary will be the `executable_path` for diff --git a/src/main.cpp b/src/main.cpp index 11a94f2..752c298 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -493,7 +493,7 @@ void maybe_forward_to_linker(int argc, char** argv, const cmdline_results& cmdli /**************************************************************************************************/ int main(int argc, char** argv) try { - orc::tracy::initialize(); + orc::profiler::initialize(); signal(SIGINT, interrupt_callback_handler); diff --git a/src/orc.cpp b/src/orc.cpp index b80b7e5..357f945 100644 --- a/src/orc.cpp +++ b/src/orc.cpp @@ -419,6 +419,9 @@ die* enforce_odrv_for_die_list(die* base, std::vector& results) { odrv_report report{path_to_symbol(base->_path.view()), dies[0]}; + // This can be very contentious for large projects. We may want to think + // about a more efficient way to collect these reports per-thread, then + // bubble them up once they've all been collected. static TracyLockable(std::mutex, odrv_report_mutex); { std::lock_guard lock(odrv_report_mutex); diff --git a/src/tracy.cpp b/src/tracy.cpp index 40e5a5c..3f5ce96 100644 --- a/src/tracy.cpp +++ b/src/tracy.cpp @@ -14,7 +14,7 @@ //================================================================================================== -namespace orc::tracy { +namespace orc::profiler { //================================================================================================== @@ -82,6 +82,6 @@ void initialize() { //================================================================================================== -} // namespace orc::tracy +} // namespace orc::profiler //================================================================================================== From 58415352e9c3a15076672596ae6492e01f11895c Mon Sep 17 00:00:00 2001 From: Foster Brereton Date: Tue, 14 May 2024 11:49:52 -0700 Subject: [PATCH 22/25] review change from @leethomason --- src/async.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/async.cpp b/src/async.cpp index e866b63..9c81ca9 100644 --- a/src/async.cpp +++ b/src/async.cpp @@ -58,7 +58,6 @@ struct work_counter { friend struct token; -public: work_counter() : _impl{std::make_shared()} {} struct token { From a4987e7090b467b10d49e933fff9cf365f3df94a Mon Sep 17 00:00:00 2001 From: Foster Brereton Date: Tue, 14 May 2024 11:53:37 -0700 Subject: [PATCH 23/25] review change from @leethomason --- src/async.cpp | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/src/async.cpp b/src/async.cpp index 9c81ca9..0e3c81e 100644 --- a/src/async.cpp +++ b/src/async.cpp @@ -54,14 +54,12 @@ struct work_counter { std::size_t _n{0}; }; - using shared_state = std::shared_ptr; - friend struct token; work_counter() : _impl{std::make_shared()} {} struct token { - token(shared_state w) : _w(std::move(w)) { _w->increment(); } + token(std::shared_ptr w) : _w(std::move(w)) { _w->increment(); } token(const token& t) : _w{t._w} { _w->increment(); } token(token&& t) = default; ~token() { @@ -69,7 +67,7 @@ struct work_counter { } private: - shared_state _w; + std::shared_ptr _w; }; auto working() { return token(_impl); } @@ -77,7 +75,7 @@ struct work_counter { void wait() { _impl->wait(); } private: - shared_state _impl; + std::shared_ptr _impl; void increment() { _impl->increment(); } void decrement() { _impl->decrement(); } From 2c1d2106c325f9d9df9da0bc4359adbdba4c144d Mon Sep 17 00:00:00 2001 From: Foster Brereton Date: Tue, 14 May 2024 12:02:08 -0700 Subject: [PATCH 24/25] Yet another build break --- test/src/main.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/src/main.cpp b/test/src/main.cpp index 93cd1f8..e095310 100644 --- a/test/src/main.cpp +++ b/test/src/main.cpp @@ -532,7 +532,7 @@ void traverse_directory_tree(std::filesystem::path& directory) { /**************************************************************************************************/ int main(int argc, char** argv) try { - orc::tracy::initialize(); + orc::profiler::initialize(); if (argc < 2) { console_error() << "Usage: " << argv[0] << " /path/to/test/battery/\n"; From 654d789c951dc9df584c9112a8a9abd15474936e Mon Sep 17 00:00:00 2001 From: Foster Brereton Date: Tue, 14 May 2024 12:57:21 -0700 Subject: [PATCH 25/25] typo --- src/dwarf_structs.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/dwarf_structs.cpp b/src/dwarf_structs.cpp index 3b98f2d..0316fe6 100644 --- a/src/dwarf_structs.cpp +++ b/src/dwarf_structs.cpp @@ -100,7 +100,7 @@ std::ostream& operator<<(std::ostream& s, const attribute& x) { /**************************************************************************************************/ std::ostream& operator<<(std::ostream& s, const location& x) { - return s << " " << x.file << ": " << x.loc; + return s << " " << x.file << ":" << x.loc; } std::optional derive_definition_location(const attribute_sequence& x) {