From 940b49b78dca24d1d1225b76a733753915dc7dd2 Mon Sep 17 00:00:00 2001 From: Foster Brereton Date: Wed, 10 Jan 2024 10:51:40 -0800 Subject: [PATCH] adding debug_str section pre-loading before processing all dies. --- src/dwarf.cpp | 33 +++++++++++++++++++++++++++++---- 1 file changed, 29 insertions(+), 4 deletions(-) diff --git a/src/dwarf.cpp b/src/dwarf.cpp index f98be63..6125fdf 100644 --- a/src/dwarf.cpp +++ b/src/dwarf.cpp @@ -408,6 +408,7 @@ struct dwarf::implementation { std::uint32_t read_uleb(); std::int32_t read_sleb(); + void read_debug_strings(); void read_abbreviations(); void read_lines(); const abbrev& find_abbreviation(std::uint32_t code) const; @@ -490,6 +491,25 @@ void dwarf::implementation::register_section(const std::string& name, /**************************************************************************************************/ +void dwarf::implementation::read_debug_strings() { + ZoneScoped; + + // Go through the whole of __debug_str, pre-loading every string into the string pools. + temp_seek(_s, _debug_str._offset, [&]{ + const auto section_end = _debug_str._offset + _debug_str._size; + while (true) { + auto offset = _s.tellg(); + if (offset == section_end) { + break; + } + offset -= _debug_str._offset; + _debug_str_cache[offset] = empool(_s.read_c_string_view()); + } + }); +} + +/**************************************************************************************************/ + void dwarf::implementation::read_abbreviations() { ZoneScoped; @@ -565,10 +585,6 @@ pool_string dwarf::implementation::read_debug_str(std::size_t offset) { }; #endif // ORC_FEATURE(DEBUG_STR_CACHE) - // I tried an implementation that loaded the whole debug_str section into the string pool on - // the first debug string read. The big problem with that technique is that the single die - // processing mode becomes very expensive, as it only needs a handful of debug strings but - // ends up loading all of them. Perhaps we could pivot the technique based on the process mode? if (const auto found = _debug_str_cache.find(offset); found != _debug_str_cache.end()) { #if ORC_FEATURE(DEBUG_STR_CACHE) ++hit_s; @@ -1364,6 +1380,15 @@ void dwarf::implementation::process_all_dies() { if (!_ready && !register_sections_done()) return; assert(_ready); + // This call to `read_debug_strings` should *not* be part of register_sections_done. I tried an + // implementation that always loaded the whole debug_str section into the string pool on the + // first debug string read. The big problem with that technique is that the single die + // processing mode becomes very expensive, as it only needs a handful of debug strings but ends + // up loading all of them. This is a middle ground where the debug strings are empooled when + // proccessing all dies, but for the fetch-single-die phase, it pays the cost for the bespoke + // debug strings it needs. + read_debug_strings(); + auto section_begin = _debug_info._offset; auto section_end = section_begin + _debug_info._size;