From edbcc6db7a50b6435e10d5aa8989f9f899c5a9f9 Mon Sep 17 00:00:00 2001 From: Haiqi Xu <14502009+haiqi96@users.noreply.github.com> Date: Fri, 26 Jul 2024 16:09:06 -0400 Subject: [PATCH 01/62] first batch of change --- components/core/src/clp/DictionaryReader.hpp | 88 +++++++++++-------- components/core/src/clp/DictionaryWriter.hpp | 8 +- components/core/src/clp/FileReader.cpp | 67 +++----------- components/core/src/clp/FileReader.hpp | 24 +---- components/core/src/clp/Utils.cpp | 60 +++++-------- components/core/src/clp/clp/compression.cpp | 59 ++++++------- components/core/src/clp/clp/decompression.cpp | 4 - components/core/src/clp/dictionary_utils.cpp | 4 - components/core/src/clp/dictionary_utils.hpp | 2 - .../clp/streaming_archive/reader/Archive.cpp | 9 +- .../clp/streaming_archive/reader/Archive.hpp | 1 - 11 files changed, 120 insertions(+), 206 deletions(-) diff --git a/components/core/src/clp/DictionaryReader.hpp b/components/core/src/clp/DictionaryReader.hpp index 0499e50eb..5b87920fa 100644 --- a/components/core/src/clp/DictionaryReader.hpp +++ b/components/core/src/clp/DictionaryReader.hpp @@ -54,11 +54,6 @@ class DictionaryReader { */ void close(); - /** - * Reads any new entries from disk - */ - void read_new_entries(); - /** * Gets the dictionary's entries * @return All dictionary entries @@ -103,21 +98,20 @@ class DictionaryReader { /** * Reads a segment's worth of IDs from the segment index */ - void read_segment_ids(); + void read_segment_ids(streaming_compression::Decompressor& segment_index_decompressor); + + /** + * Reads any new entries from disk + */ + void read_new_entries( + FileReader& dictionary_file_reader, + streaming_compression::Decompressor& dictionary_decompressor, + FileReader& segment_index_file_reader, + streaming_compression::Decompressor& segment_index_decompressor + ); // Variables bool m_is_open; - FileReader m_dictionary_file_reader; - FileReader m_segment_index_file_reader; -#if USE_PASSTHROUGH_COMPRESSION - streaming_compression::passthrough::Decompressor m_dictionary_decompressor; - streaming_compression::passthrough::Decompressor m_segment_index_decompressor; -#elif USE_ZSTD_COMPRESSION - streaming_compression::zstd::Decompressor m_dictionary_decompressor; - streaming_compression::zstd::Decompressor m_segment_index_decompressor; -#else - static_assert(false, "Unsupported compression mode."); -#endif size_t m_num_segments_read_from_index; std::vector m_entries; }; @@ -133,17 +127,35 @@ void DictionaryReader::open( constexpr size_t cDecompressorFileReadBufferCapacity = 64 * 1024; // 64 KB + FileReader dictionary_file_reader(dictionary_path); + FileReader segment_index_file_reader(segment_index_path); + +#if USE_PASSTHROUGH_COMPRESSION + streaming_compression::passthrough::Decompressor m_dictionary_decompressor; + streaming_compression::passthrough::Decompressor m_segment_index_decompressor; +#elif USE_ZSTD_COMPRESSION + streaming_compression::zstd::Decompressor dictionary_decompressor; + streaming_compression::zstd::Decompressor segment_index_decompressor; +#else + static_assert(false, "Unsupported compression mode."); +#endif + open_dictionary_for_reading( - dictionary_path, - segment_index_path, cDecompressorFileReadBufferCapacity, - m_dictionary_file_reader, - m_dictionary_decompressor, - m_segment_index_file_reader, - m_segment_index_decompressor + dictionary_file_reader, + dictionary_decompressor, + segment_index_file_reader, + segment_index_decompressor ); m_is_open = true; + + read_new_entries( + dictionary_file_reader, + dictionary_decompressor, + segment_index_file_reader, + segment_index_decompressor + ); } template @@ -152,11 +164,6 @@ void DictionaryReader::close() { throw OperationFailed(ErrorCode_NotInit, __FILENAME__, __LINE__); } - m_segment_index_decompressor.close(); - m_segment_index_file_reader.close(); - m_dictionary_decompressor.close(); - m_dictionary_file_reader.close(); - m_num_segments_read_from_index = 0; m_entries.clear(); @@ -164,13 +171,18 @@ void DictionaryReader::close() { } template -void DictionaryReader::read_new_entries() { +void DictionaryReader::read_new_entries( + FileReader& dictionary_file_reader, + streaming_compression::Decompressor& dictionary_decompressor, + FileReader& segment_index_file_reader, + streaming_compression::Decompressor& segment_index_decompressor +) { if (false == m_is_open) { throw OperationFailed(ErrorCode_NotInit, __FILENAME__, __LINE__); } // Read dictionary header - auto num_dictionary_entries = read_dictionary_header(m_dictionary_file_reader); + auto num_dictionary_entries = read_dictionary_header(dictionary_file_reader); // Validate dictionary header if (num_dictionary_entries < m_entries.size()) { @@ -185,12 +197,12 @@ void DictionaryReader::read_new_entries() { for (size_t i = prev_num_dictionary_entries; i < num_dictionary_entries; ++i) { auto& entry = m_entries[i]; - entry.read_from_file(m_dictionary_decompressor); + entry.read_from_file(dictionary_decompressor); } } // Read segment index header - auto num_segments = read_segment_index_header(m_segment_index_file_reader); + auto num_segments = read_segment_index_header(segment_index_file_reader); // Validate segment index header if (num_segments < m_num_segments_read_from_index) { @@ -200,7 +212,7 @@ void DictionaryReader::read_new_entries() { // Read new segments from index if (num_segments > m_num_segments_read_from_index) { for (size_t i = m_num_segments_read_from_index; i < num_segments; ++i) { - read_segment_ids(); + read_segment_ids(segment_index_decompressor); } } } @@ -269,15 +281,17 @@ void DictionaryReader::get_entries_matching_wildcar } template -void DictionaryReader::read_segment_ids() { +void DictionaryReader::read_segment_ids( + streaming_compression::Decompressor& segment_index_decompressor +) { segment_id_t segment_id; - m_segment_index_decompressor.read_numeric_value(segment_id, false); + segment_index_decompressor.read_numeric_value(segment_id, false); uint64_t num_ids; - m_segment_index_decompressor.read_numeric_value(num_ids, false); + segment_index_decompressor.read_numeric_value(num_ids, false); for (uint64_t i = 0; i < num_ids; ++i) { DictionaryIdType id; - m_segment_index_decompressor.read_numeric_value(id, false); + segment_index_decompressor.read_numeric_value(id, false); if (id >= m_entries.size()) { throw OperationFailed(ErrorCode_Corrupt, __FILENAME__, __LINE__); } diff --git a/components/core/src/clp/DictionaryWriter.hpp b/components/core/src/clp/DictionaryWriter.hpp index e9b6f623c..0d1d27d72 100644 --- a/components/core/src/clp/DictionaryWriter.hpp +++ b/components/core/src/clp/DictionaryWriter.hpp @@ -203,8 +203,8 @@ void DictionaryWriter::open_and_preload( m_max_id = max_id; - FileReader dictionary_file_reader; - FileReader segment_index_file_reader; + FileReader dictionary_file_reader(dictionary_path); + FileReader segment_index_file_reader(segment_index_path); #if USE_PASSTHROUGH_COMPRESSION streaming_compression::passthrough::Decompressor dictionary_decompressor; streaming_compression::passthrough::Decompressor segment_index_decompressor; @@ -216,8 +216,6 @@ void DictionaryWriter::open_and_preload( #endif constexpr size_t cDecompressorFileReadBufferCapacity = 64 * 1024; // 64 KB open_dictionary_for_reading( - dictionary_path, - segment_index_path, cDecompressorFileReadBufferCapacity, dictionary_file_reader, dictionary_decompressor, @@ -249,9 +247,7 @@ void DictionaryWriter::open_and_preload( m_next_id = num_dictionary_entries; segment_index_decompressor.close(); - segment_index_file_reader.close(); dictionary_decompressor.close(); - dictionary_file_reader.close(); m_dictionary_file_writer.open( dictionary_path, diff --git a/components/core/src/clp/FileReader.cpp b/components/core/src/clp/FileReader.cpp index 06a986383..a91ef5e77 100644 --- a/components/core/src/clp/FileReader.cpp +++ b/components/core/src/clp/FileReader.cpp @@ -12,15 +12,24 @@ using std::string; namespace clp { +FileReader::FileReader(string const& path) : m_file{nullptr}, m_getdelim_buf_len(0), m_getdelim_buf(nullptr) { + m_file = fopen(path.c_str(), "rb"); + if (nullptr == m_file) { + if (ENOENT == errno) { + throw OperationFailed(ErrorCode_FileNotFound, __FILE__, __LINE__); + } + throw OperationFailed(ErrorCode_errno, __FILE__, __LINE__); + } + m_path = path; +} FileReader::~FileReader() { - close(); + // NOTE: We don't check errors for fclose since it seems the only reason it could fail is if + // it was interrupted by a signal + fclose(m_file); free(m_getdelim_buf); } ErrorCode FileReader::try_read(char* buf, size_t num_bytes_to_read, size_t& num_bytes_read) { - if (nullptr == m_file) { - return ErrorCode_NotInit; - } if (nullptr == buf) { return ErrorCode_BadParam; } @@ -40,10 +49,6 @@ ErrorCode FileReader::try_read(char* buf, size_t num_bytes_to_read, size_t& num_ } ErrorCode FileReader::try_seek_from_begin(size_t pos) { - if (nullptr == m_file) { - return ErrorCode_NotInit; - } - int retval = fseeko(m_file, pos, SEEK_SET); if (0 != retval) { return ErrorCode_errno; @@ -53,10 +58,6 @@ ErrorCode FileReader::try_seek_from_begin(size_t pos) { } ErrorCode FileReader::try_get_pos(size_t& pos) { - if (nullptr == m_file) { - return ErrorCode_NotInit; - } - pos = ftello(m_file); if ((off_t)-1 == pos) { return ErrorCode_errno; @@ -65,46 +66,8 @@ ErrorCode FileReader::try_get_pos(size_t& pos) { return ErrorCode_Success; } -ErrorCode FileReader::try_open(string const& path) { - // Cleanup in case caller forgot to call close before calling this function - close(); - - m_file = fopen(path.c_str(), "rb"); - if (nullptr == m_file) { - if (ENOENT == errno) { - return ErrorCode_FileNotFound; - } - return ErrorCode_errno; - } - m_path = path; - - return ErrorCode_Success; -} - -void FileReader::open(string const& path) { - ErrorCode error_code = try_open(path); - if (ErrorCode_Success != error_code) { - if (ErrorCode_FileNotFound == error_code) { - throw "File not found: " + boost::filesystem::weakly_canonical(path).string() + "\n"; - } else { - throw OperationFailed(error_code, __FILENAME__, __LINE__); - } - } -} - -void FileReader::close() { - if (m_file != nullptr) { - // NOTE: We don't check errors for fclose since it seems the only reason it could fail is if - // it was interrupted by a signal - fclose(m_file); - m_file = nullptr; - } -} - ErrorCode FileReader::try_read_to_delimiter(char delim, bool keep_delimiter, bool append, string& str) { - assert(nullptr != m_file); - if (false == append) { str.clear(); } @@ -125,10 +88,6 @@ FileReader::try_read_to_delimiter(char delim, bool keep_delimiter, bool append, } ErrorCode FileReader::try_fstat(struct stat& stat_buffer) { - if (nullptr == m_file) { - throw OperationFailed(ErrorCode_NotInit, __FILENAME__, __LINE__); - } - auto return_value = fstat(fileno(m_file), &stat_buffer); if (0 != return_value) { return ErrorCode_errno; diff --git a/components/core/src/clp/FileReader.hpp b/components/core/src/clp/FileReader.hpp index 56e376af6..7bee3efe9 100644 --- a/components/core/src/clp/FileReader.hpp +++ b/components/core/src/clp/FileReader.hpp @@ -25,7 +25,7 @@ class FileReader : public ReaderInterface { char const* what() const noexcept override { return "FileReader operation failed"; } }; - FileReader() : m_file(nullptr), m_getdelim_buf_len(0), m_getdelim_buf(nullptr) {} + FileReader(std::string const& path); ~FileReader(); @@ -73,28 +73,6 @@ class FileReader : public ReaderInterface { ErrorCode try_read_to_delimiter(char delim, bool keep_delimiter, bool append, std::string& str) override; - // Methods - bool is_open() const { return m_file != nullptr; } - - /** - * Tries to open a file - * @param path - * @return ErrorCode_Success on success - * @return ErrorCode_FileNotFound if the file was not found - * @return ErrorCode_errno otherwise - */ - ErrorCode try_open(std::string const& path); - /** - * Opens a file - * @param path - * @throw FileReader::OperationFailed on failure - */ - void open(std::string const& path); - /** - * Closes the file if it's open - */ - void close(); - [[nodiscard]] std::string const& get_path() const { return m_path; } /** diff --git a/components/core/src/clp/Utils.cpp b/components/core/src/clp/Utils.cpp index 1a45c5bf9..866ae840f 100644 --- a/components/core/src/clp/Utils.cpp +++ b/components/core/src/clp/Utils.cpp @@ -137,14 +137,11 @@ string get_unambiguous_path(string const& path) { } ErrorCode read_list_of_paths(string const& list_path, vector& paths) { - FileReader file_reader; - ErrorCode error_code = file_reader.try_open(list_path); - if (ErrorCode_Success != error_code) { - return error_code; - } + FileReader file_reader(list_path); // Read file string line; + ErrorCode error_code; while (true) { error_code = file_reader.try_read_to_delimiter('\n', false, false, line); if (ErrorCode_Success != error_code) { @@ -160,8 +157,6 @@ ErrorCode read_list_of_paths(string const& list_path, vector& paths) { return error_code; } - file_reader.close(); - return ErrorCode_Success; } @@ -262,38 +257,29 @@ void load_lexer_from_file( } if (contains_delimiter) { - FileReader schema_reader; - ErrorCode error_code = schema_reader.try_open(schema_ast->m_file_path); - if (ErrorCode_Success != error_code) { - throw std::runtime_error( - schema_file_path + ":" + std::to_string(rule->m_line_num + 1) + ": error: '" - + rule->m_name + "' has regex pattern which contains delimiter '" - + char(delimiter_name) + "'.\n" - ); - } else { - // more detailed debugging based on looking at the file - string line; - for (uint32_t i = 0; i <= rule->m_line_num; i++) { - schema_reader.read_to_delimiter('\n', false, false, line); - } - int colon_pos = 0; - for (char i : line) { - colon_pos++; - if (i == ':') { - break; - } + FileReader schema_reader(schema_ast->m_file_path); + // more detailed debugging based on looking at the file + string line; + for (uint32_t i = 0; i <= rule->m_line_num; i++) { + schema_reader.read_to_delimiter('\n', false, false, line); + } + int colon_pos = 0; + for (char i : line) { + colon_pos++; + if (i == ':') { + break; } - string indent(10, ' '); - string spaces(colon_pos, ' '); - string arrows(line.size() - colon_pos, '^'); - - throw std::runtime_error( - schema_file_path + ":" + std::to_string(rule->m_line_num + 1) + ": error: '" - + rule->m_name + "' has regex pattern which contains delimiter '" - + char(delimiter_name) + "'.\n" + indent + line + "\n" + indent + spaces - + arrows + "\n" - ); } + string indent(10, ' '); + string spaces(colon_pos, ' '); + string arrows(line.size() - colon_pos, '^'); + + throw std::runtime_error( + schema_file_path + ":" + std::to_string(rule->m_line_num + 1) + ": error: '" + + rule->m_name + "' has regex pattern which contains delimiter '" + + char(delimiter_name) + "'.\n" + indent + line + "\n" + indent + spaces + + arrows + "\n" + ); } lexer.add_rule(lexer.m_symbol_id[rule->m_name], std::move(rule->m_regex_ptr)); } diff --git a/components/core/src/clp/clp/compression.cpp b/components/core/src/clp/clp/compression.cpp index 1741557bc..6a4a575dc 100644 --- a/components/core/src/clp/clp/compression.cpp +++ b/components/core/src/clp/clp/compression.cpp @@ -192,42 +192,43 @@ bool read_and_validate_grouped_file_list( string const& list_path, vector& grouped_files ) { - FileReader grouped_file_path_reader; - ErrorCode error_code = grouped_file_path_reader.try_open(list_path); - if (ErrorCode_Success != error_code) { - if (ErrorCode_FileNotFound == error_code) { - SPDLOG_ERROR("'{}' does not exist.", list_path.c_str()); - } else if (ErrorCode_errno == error_code) { - SPDLOG_ERROR("Failed to read '{}', errno={}", list_path.c_str(), errno); - } else { - SPDLOG_ERROR("Failed to read '{}', error_code={}", list_path.c_str(), error_code); - } - return false; - } + FileReader grouped_file_path_reader(list_path); +// ErrorCode error_code = grouped_file_path_reader.try_open(list_path); +// if (ErrorCode_Success != error_code) { +// if (ErrorCode_FileNotFound == error_code) { +// SPDLOG_ERROR("'{}' does not exist.", list_path.c_str()); +// } else if (ErrorCode_errno == error_code) { +// SPDLOG_ERROR("Failed to read '{}', errno={}", list_path.c_str(), errno); +// } else { +// SPDLOG_ERROR("Failed to read '{}', error_code={}", list_path.c_str(), error_code); +// } +// return false; +// } - FileReader grouped_file_id_reader; string grouped_file_ids_path = list_path.substr(0, list_path.length() - 4) + ".gid"; - error_code = grouped_file_id_reader.try_open(grouped_file_ids_path); - if (ErrorCode_Success != error_code) { - if (ErrorCode_FileNotFound == error_code) { - SPDLOG_ERROR("'{}' does not exist.", grouped_file_ids_path.c_str()); - } else if (ErrorCode_errno == error_code) { - SPDLOG_ERROR("Failed to read '{}', errno={}", grouped_file_ids_path.c_str(), errno); - } else { - SPDLOG_ERROR( - "Failed to read '{}', error_code={}", - grouped_file_ids_path.c_str(), - error_code - ); - } - return false; - } + FileReader grouped_file_id_reader(grouped_file_ids_path); +// error_code = grouped_file_id_reader.try_open(grouped_file_ids_path); +// if (ErrorCode_Success != error_code) { +// if (ErrorCode_FileNotFound == error_code) { +// SPDLOG_ERROR("'{}' does not exist.", grouped_file_ids_path.c_str()); +// } else if (ErrorCode_errno == error_code) { +// SPDLOG_ERROR("Failed to read '{}', errno={}", grouped_file_ids_path.c_str(), errno); +// } else { +// SPDLOG_ERROR( +// "Failed to read '{}', error_code={}", +// grouped_file_ids_path.c_str(), +// error_code +// ); +// } +// return false; +// } // Read list bool all_paths_valid = true; string path; string path_without_prefix; group_id_t group_id; + ErrorCode error_code; while (true) { // Read path error_code = grouped_file_path_reader.try_read_to_delimiter('\n', false, false, path); @@ -294,8 +295,6 @@ bool read_and_validate_grouped_file_list( return false; } - grouped_file_path_reader.close(); - grouped_file_id_reader.close(); // Validate the list contained at least one file if (grouped_files.empty()) { diff --git a/components/core/src/clp/clp/decompression.cpp b/components/core/src/clp/clp/decompression.cpp index 6b87f6777..5a57e6a2a 100644 --- a/components/core/src/clp/clp/decompression.cpp +++ b/components/core/src/clp/clp/decompression.cpp @@ -73,7 +73,6 @@ bool decompress( } archive_reader.open(archive_path.string()); - archive_reader.refresh_dictionaries(); archive_reader.decompress_empty_directories(command_line_args.get_output_dir()); @@ -111,7 +110,6 @@ bool decompress( archive_ix->get_id(archive_id); auto archive_path = archives_dir / archive_id; archive_reader.open(archive_path.string()); - archive_reader.refresh_dictionaries(); // Decompress all splits with the given path auto file_metadata_ix_ptr = archive_reader.get_file_iterator_by_path(file_path); @@ -145,7 +143,6 @@ bool decompress( archive_ix->get_id(archive_id); auto archive_path = archives_dir / archive_id; archive_reader.open(archive_path.string()); - archive_reader.refresh_dictionaries(); // Decompress files auto file_metadata_ix_ptr = archive_reader.get_file_iterator(); @@ -270,7 +267,6 @@ bool decompress_to_ir(CommandLineArguments& command_line_args) { streaming_archive::reader::Archive archive_reader; auto archive_path = archives_dir / archive_id; archive_reader.open(archive_path.string()); - archive_reader.refresh_dictionaries(); auto file_metadata_ix_ptr = archive_reader.get_file_iterator_by_split_id(file_split_id); if (false == file_metadata_ix_ptr->has_next()) { diff --git a/components/core/src/clp/dictionary_utils.cpp b/components/core/src/clp/dictionary_utils.cpp index 2fecd7e04..abd5f8257 100644 --- a/components/core/src/clp/dictionary_utils.cpp +++ b/components/core/src/clp/dictionary_utils.cpp @@ -2,21 +2,17 @@ namespace clp { void open_dictionary_for_reading( - std::string const& dictionary_path, - std::string const& segment_index_path, size_t decompressor_file_read_buffer_capacity, FileReader& dictionary_file_reader, streaming_compression::Decompressor& dictionary_decompressor, FileReader& segment_index_file_reader, streaming_compression::Decompressor& segment_index_decompressor ) { - dictionary_file_reader.open(dictionary_path); // Skip header dictionary_file_reader.seek_from_begin(sizeof(uint64_t)); // Open decompressor dictionary_decompressor.open(dictionary_file_reader, decompressor_file_read_buffer_capacity); - segment_index_file_reader.open(segment_index_path); // Skip header segment_index_file_reader.seek_from_begin(sizeof(uint64_t)); // Open decompressor diff --git a/components/core/src/clp/dictionary_utils.hpp b/components/core/src/clp/dictionary_utils.hpp index 42012964f..58b63e12c 100644 --- a/components/core/src/clp/dictionary_utils.hpp +++ b/components/core/src/clp/dictionary_utils.hpp @@ -8,8 +8,6 @@ namespace clp { void open_dictionary_for_reading( - std::string const& dictionary_path, - std::string const& segment_index_path, size_t decompressor_file_read_buffer_capacity, FileReader& dictionary_file_reader, streaming_compression::Decompressor& dictionary_decompressor, diff --git a/components/core/src/clp/streaming_archive/reader/Archive.cpp b/components/core/src/clp/streaming_archive/reader/Archive.cpp index 05e42cac3..ed7ee277a 100644 --- a/components/core/src/clp/streaming_archive/reader/Archive.cpp +++ b/components/core/src/clp/streaming_archive/reader/Archive.cpp @@ -37,11 +37,9 @@ void Archive::open(string const& path) { string metadata_file_path = path + '/' + cMetadataFileName; archive_format_version_t format_version{}; try { - FileReader file_reader; - file_reader.open(metadata_file_path); + FileReader file_reader(metadata_file_path); ArchiveMetadata const metadata{file_reader}; format_version = metadata.get_archive_format_version(); - file_reader.close(); } catch (TraceableException& traceable_exception) { auto error_code = traceable_exception.get_error_code(); if (ErrorCode_errno == error_code) { @@ -121,11 +119,6 @@ void Archive::close() { m_path.clear(); } -void Archive::refresh_dictionaries() { - m_logtype_dictionary.read_new_entries(); - m_var_dictionary.read_new_entries(); -} - ErrorCode Archive::open_file(File& file, MetadataDB::FileIterator const& file_metadata_ix) { return file.open_me(m_logtype_dictionary, file_metadata_ix, m_segment_manager); } diff --git a/components/core/src/clp/streaming_archive/reader/Archive.hpp b/components/core/src/clp/streaming_archive/reader/Archive.hpp index c724be476..e66561433 100644 --- a/components/core/src/clp/streaming_archive/reader/Archive.hpp +++ b/components/core/src/clp/streaming_archive/reader/Archive.hpp @@ -48,7 +48,6 @@ class Archive { * Reads any new entries added to the dictionaries * @throw Same as LogTypeDictionary::read_from_file and VariableDictionary::read_from_file */ - void refresh_dictionaries(); LogTypeDictionaryReader const& get_logtype_dictionary() const; VariableDictionaryReader const& get_var_dictionary() const; From 21b447166ed4f3dcef3ef1d23f487597e0129bc1 Mon Sep 17 00:00:00 2001 From: Haiqi Xu <14502009+haiqi96@users.noreply.github.com> Date: Fri, 26 Jul 2024 16:07:11 -0400 Subject: [PATCH 02/62] Remove dictionary utils --- components/core/CMakeLists.txt | 2 - components/core/src/clp/DictionaryReader.hpp | 72 ++++++++----------- components/core/src/clp/DictionaryWriter.hpp | 24 +++---- .../core/src/clp/LogTypeDictionaryWriter.cpp | 2 - .../core/src/clp/VariableDictionaryWriter.cpp | 1 - components/core/src/clp/clg/CMakeLists.txt | 2 - components/core/src/clp/clo/CMakeLists.txt | 2 - components/core/src/clp/clp/CMakeLists.txt | 2 - components/core/src/clp/dictionary_utils.cpp | 43 ----------- components/core/src/clp/dictionary_utils.hpp | 23 ------ .../make_dictionaries_readable/CMakeLists.txt | 2 - 11 files changed, 38 insertions(+), 137 deletions(-) delete mode 100644 components/core/src/clp/dictionary_utils.cpp delete mode 100644 components/core/src/clp/dictionary_utils.hpp diff --git a/components/core/CMakeLists.txt b/components/core/CMakeLists.txt index 57ae8cda1..38679a948 100644 --- a/components/core/CMakeLists.txt +++ b/components/core/CMakeLists.txt @@ -288,8 +288,6 @@ set(SOURCE_FILES_unitTest src/clp/database_utils.cpp src/clp/database_utils.hpp src/clp/Defs.h - src/clp/dictionary_utils.cpp - src/clp/dictionary_utils.hpp src/clp/DictionaryEntry.hpp src/clp/DictionaryReader.hpp src/clp/DictionaryWriter.hpp diff --git a/components/core/src/clp/DictionaryReader.hpp b/components/core/src/clp/DictionaryReader.hpp index 5b87920fa..703803ec2 100644 --- a/components/core/src/clp/DictionaryReader.hpp +++ b/components/core/src/clp/DictionaryReader.hpp @@ -7,7 +7,6 @@ #include #include -#include "dictionary_utils.hpp" #include "DictionaryEntry.hpp" #include "FileReader.hpp" #include "streaming_compression/passthrough/Decompressor.hpp" @@ -104,10 +103,8 @@ class DictionaryReader { * Reads any new entries from disk */ void read_new_entries( - FileReader& dictionary_file_reader, - streaming_compression::Decompressor& dictionary_decompressor, - FileReader& segment_index_file_reader, - streaming_compression::Decompressor& segment_index_decompressor + std::string const& dictionary_path, + std::string const& segment_index_path ); // Variables @@ -125,37 +122,9 @@ void DictionaryReader::open( throw OperationFailed(ErrorCode_NotReady, __FILENAME__, __LINE__); } - constexpr size_t cDecompressorFileReadBufferCapacity = 64 * 1024; // 64 KB - - FileReader dictionary_file_reader(dictionary_path); - FileReader segment_index_file_reader(segment_index_path); - -#if USE_PASSTHROUGH_COMPRESSION - streaming_compression::passthrough::Decompressor m_dictionary_decompressor; - streaming_compression::passthrough::Decompressor m_segment_index_decompressor; -#elif USE_ZSTD_COMPRESSION - streaming_compression::zstd::Decompressor dictionary_decompressor; - streaming_compression::zstd::Decompressor segment_index_decompressor; -#else - static_assert(false, "Unsupported compression mode."); -#endif - - open_dictionary_for_reading( - cDecompressorFileReadBufferCapacity, - dictionary_file_reader, - dictionary_decompressor, - segment_index_file_reader, - segment_index_decompressor - ); + read_new_entries(dictionary_path, segment_index_path); m_is_open = true; - - read_new_entries( - dictionary_file_reader, - dictionary_decompressor, - segment_index_file_reader, - segment_index_decompressor - ); } template @@ -172,17 +141,28 @@ void DictionaryReader::close() { template void DictionaryReader::read_new_entries( - FileReader& dictionary_file_reader, - streaming_compression::Decompressor& dictionary_decompressor, - FileReader& segment_index_file_reader, - streaming_compression::Decompressor& segment_index_decompressor + std::string const& dictionary_path, + std::string const& segment_index_path ) { - if (false == m_is_open) { - throw OperationFailed(ErrorCode_NotInit, __FILENAME__, __LINE__); - } + FileReader dictionary_file_reader(dictionary_path); + FileReader segment_index_file_reader(segment_index_path); + +constexpr size_t cDecompressorFileReadBufferCapacity = 64 * 1024; // 64 KB +#if USE_PASSTHROUGH_COMPRESSION + streaming_compression::passthrough::Decompressor m_dictionary_decompressor; + streaming_compression::passthrough::Decompressor m_segment_index_decompressor; +#elif USE_ZSTD_COMPRESSION + streaming_compression::zstd::Decompressor dictionary_decompressor; + streaming_compression::zstd::Decompressor segment_index_decompressor; +#else + static_assert(false, "Unsupported compression mode."); +#endif // Read dictionary header - auto num_dictionary_entries = read_dictionary_header(dictionary_file_reader); + uint64_t num_dictionary_entries {}; + if (false == dictionary_file_reader.read_numeric_value(num_dictionary_entries, false)){ + throw OperationFailed(ErrorCode_Failure, __FILENAME__, __LINE__); + } // Validate dictionary header if (num_dictionary_entries < m_entries.size()) { @@ -193,7 +173,7 @@ void DictionaryReader::read_new_entries( if (num_dictionary_entries > m_entries.size()) { auto prev_num_dictionary_entries = m_entries.size(); m_entries.resize(num_dictionary_entries); - + dictionary_decompressor.open(dictionary_file_reader, cDecompressorFileReadBufferCapacity); for (size_t i = prev_num_dictionary_entries; i < num_dictionary_entries; ++i) { auto& entry = m_entries[i]; @@ -202,7 +182,10 @@ void DictionaryReader::read_new_entries( } // Read segment index header - auto num_segments = read_segment_index_header(segment_index_file_reader); + uint64_t num_segments {}; + if (false == segment_index_file_reader.read_numeric_value(num_segments, false)) { + throw OperationFailed(ErrorCode_Failure, __FILENAME__, __LINE__); + } // Validate segment index header if (num_segments < m_num_segments_read_from_index) { @@ -211,6 +194,7 @@ void DictionaryReader::read_new_entries( // Read new segments from index if (num_segments > m_num_segments_read_from_index) { + segment_index_decompressor.open(segment_index_file_reader, cDecompressorFileReadBufferCapacity); for (size_t i = m_num_segments_read_from_index; i < num_segments; ++i) { read_segment_ids(segment_index_decompressor); } diff --git a/components/core/src/clp/DictionaryWriter.hpp b/components/core/src/clp/DictionaryWriter.hpp index 0d1d27d72..1883df548 100644 --- a/components/core/src/clp/DictionaryWriter.hpp +++ b/components/core/src/clp/DictionaryWriter.hpp @@ -8,7 +8,6 @@ #include "ArrayBackedPosIntSet.hpp" #include "Defs.h" -#include "dictionary_utils.hpp" #include "FileWriter.hpp" #include "spdlog_with_specializations.hpp" #include "streaming_compression/passthrough/Compressor.hpp" @@ -204,32 +203,30 @@ void DictionaryWriter::open_and_preload( m_max_id = max_id; FileReader dictionary_file_reader(dictionary_path); - FileReader segment_index_file_reader(segment_index_path); + + constexpr size_t cDecompressorFileReadBufferCapacity = 64 * 1024; // 64 KB #if USE_PASSTHROUGH_COMPRESSION streaming_compression::passthrough::Decompressor dictionary_decompressor; - streaming_compression::passthrough::Decompressor segment_index_decompressor; #elif USE_ZSTD_COMPRESSION streaming_compression::zstd::Decompressor dictionary_decompressor; - streaming_compression::zstd::Decompressor segment_index_decompressor; #else static_assert(false, "Unsupported compression mode."); #endif - constexpr size_t cDecompressorFileReadBufferCapacity = 64 * 1024; // 64 KB - open_dictionary_for_reading( - cDecompressorFileReadBufferCapacity, - dictionary_file_reader, - dictionary_decompressor, - segment_index_file_reader, - segment_index_decompressor - ); - auto num_dictionary_entries = read_dictionary_header(dictionary_file_reader); + // Read dictionary header + uint64_t num_dictionary_entries {}; + if (false == dictionary_file_reader.read_numeric_value(num_dictionary_entries, false)){ + throw OperationFailed(ErrorCode_Failure, __FILENAME__, __LINE__); + } + + // Validate dictionary header if (num_dictionary_entries > m_max_id) { SPDLOG_ERROR("DictionaryWriter ran out of IDs."); throw OperationFailed(ErrorCode_OutOfBounds, __FILENAME__, __LINE__); } // Loads entries from the given dictionary file EntryType entry; + dictionary_decompressor.open(dictionary_file_reader, cDecompressorFileReadBufferCapacity); for (size_t i = 0; i < num_dictionary_entries; ++i) { entry.clear(); entry.read_from_file(dictionary_decompressor); @@ -246,7 +243,6 @@ void DictionaryWriter::open_and_preload( m_next_id = num_dictionary_entries; - segment_index_decompressor.close(); dictionary_decompressor.close(); m_dictionary_file_writer.open( diff --git a/components/core/src/clp/LogTypeDictionaryWriter.cpp b/components/core/src/clp/LogTypeDictionaryWriter.cpp index 4420b2789..e7170746b 100644 --- a/components/core/src/clp/LogTypeDictionaryWriter.cpp +++ b/components/core/src/clp/LogTypeDictionaryWriter.cpp @@ -1,7 +1,5 @@ #include "LogTypeDictionaryWriter.hpp" -#include "dictionary_utils.hpp" - using std::string; namespace clp { diff --git a/components/core/src/clp/VariableDictionaryWriter.cpp b/components/core/src/clp/VariableDictionaryWriter.cpp index 77b063503..119ca0daf 100644 --- a/components/core/src/clp/VariableDictionaryWriter.cpp +++ b/components/core/src/clp/VariableDictionaryWriter.cpp @@ -1,6 +1,5 @@ #include "VariableDictionaryWriter.hpp" -#include "dictionary_utils.hpp" #include "spdlog_with_specializations.hpp" namespace clp { diff --git a/components/core/src/clp/clg/CMakeLists.txt b/components/core/src/clp/clg/CMakeLists.txt index bed6c11fc..6ec2b49e9 100644 --- a/components/core/src/clp/clg/CMakeLists.txt +++ b/components/core/src/clp/clg/CMakeLists.txt @@ -5,8 +5,6 @@ set( ../database_utils.cpp ../database_utils.hpp ../Defs.h - ../dictionary_utils.cpp - ../dictionary_utils.hpp ../DictionaryEntry.hpp ../DictionaryReader.hpp ../EncodedVariableInterpreter.cpp diff --git a/components/core/src/clp/clo/CMakeLists.txt b/components/core/src/clp/clo/CMakeLists.txt index 1eea2b5bb..50d19e638 100644 --- a/components/core/src/clp/clo/CMakeLists.txt +++ b/components/core/src/clp/clo/CMakeLists.txt @@ -9,8 +9,6 @@ set( ../database_utils.cpp ../database_utils.hpp ../Defs.h - ../dictionary_utils.cpp - ../dictionary_utils.hpp ../DictionaryEntry.hpp ../DictionaryReader.hpp ../EncodedVariableInterpreter.cpp diff --git a/components/core/src/clp/clp/CMakeLists.txt b/components/core/src/clp/clp/CMakeLists.txt index 0f18777d9..2c722d39c 100644 --- a/components/core/src/clp/clp/CMakeLists.txt +++ b/components/core/src/clp/clp/CMakeLists.txt @@ -8,8 +8,6 @@ set( ../database_utils.cpp ../database_utils.hpp ../Defs.h - ../dictionary_utils.cpp - ../dictionary_utils.hpp ../DictionaryEntry.hpp ../DictionaryReader.hpp ../DictionaryWriter.hpp diff --git a/components/core/src/clp/dictionary_utils.cpp b/components/core/src/clp/dictionary_utils.cpp deleted file mode 100644 index abd5f8257..000000000 --- a/components/core/src/clp/dictionary_utils.cpp +++ /dev/null @@ -1,43 +0,0 @@ -#include "dictionary_utils.hpp" - -namespace clp { -void open_dictionary_for_reading( - size_t decompressor_file_read_buffer_capacity, - FileReader& dictionary_file_reader, - streaming_compression::Decompressor& dictionary_decompressor, - FileReader& segment_index_file_reader, - streaming_compression::Decompressor& segment_index_decompressor -) { - // Skip header - dictionary_file_reader.seek_from_begin(sizeof(uint64_t)); - // Open decompressor - dictionary_decompressor.open(dictionary_file_reader, decompressor_file_read_buffer_capacity); - - // Skip header - segment_index_file_reader.seek_from_begin(sizeof(uint64_t)); - // Open decompressor - segment_index_decompressor.open( - segment_index_file_reader, - decompressor_file_read_buffer_capacity - ); -} - -uint64_t read_dictionary_header(FileReader& file_reader) { - auto dictionary_file_reader_pos = file_reader.get_pos(); - file_reader.seek_from_begin(0); - uint64_t num_dictionary_entries; - file_reader.read_numeric_value(num_dictionary_entries, false); - file_reader.seek_from_begin(dictionary_file_reader_pos); - return num_dictionary_entries; -} - -uint64_t read_segment_index_header(FileReader& file_reader) { - // Read segment index header - auto segment_index_file_reader_pos = file_reader.get_pos(); - file_reader.seek_from_begin(0); - uint64_t num_segments; - file_reader.read_numeric_value(num_segments, false); - file_reader.seek_from_begin(segment_index_file_reader_pos); - return num_segments; -} -} // namespace clp diff --git a/components/core/src/clp/dictionary_utils.hpp b/components/core/src/clp/dictionary_utils.hpp deleted file mode 100644 index 58b63e12c..000000000 --- a/components/core/src/clp/dictionary_utils.hpp +++ /dev/null @@ -1,23 +0,0 @@ -#ifndef CLP_DICTIONARY_UTILS_HPP -#define CLP_DICTIONARY_UTILS_HPP - -#include - -#include "FileReader.hpp" -#include "streaming_compression/Decompressor.hpp" - -namespace clp { -void open_dictionary_for_reading( - size_t decompressor_file_read_buffer_capacity, - FileReader& dictionary_file_reader, - streaming_compression::Decompressor& dictionary_decompressor, - FileReader& segment_index_file_reader, - streaming_compression::Decompressor& segment_index_decompressor -); - -uint64_t read_dictionary_header(FileReader& file_reader); - -uint64_t read_segment_index_header(FileReader& file_reader); -} // namespace clp - -#endif // CLP_DICTIONARY_UTILS_HPP diff --git a/components/core/src/clp/make_dictionaries_readable/CMakeLists.txt b/components/core/src/clp/make_dictionaries_readable/CMakeLists.txt index fd62a39fb..139b5e363 100644 --- a/components/core/src/clp/make_dictionaries_readable/CMakeLists.txt +++ b/components/core/src/clp/make_dictionaries_readable/CMakeLists.txt @@ -1,7 +1,5 @@ set( MAKE_DICTIONARIES_READABLE_SOURCES - ../dictionary_utils.cpp - ../dictionary_utils.hpp ../DictionaryEntry.hpp ../DictionaryReader.hpp ../FileDescriptor.cpp From 3a3c9baa55216f017b3c10a465306ffd8a0baa45 Mon Sep 17 00:00:00 2001 From: Haiqi Xu <14502009+haiqi96@users.noreply.github.com> Date: Fri, 26 Jul 2024 16:47:59 -0400 Subject: [PATCH 03/62] fix exception issues and make sure unit tests and clg can build --- components/core/src/clp/Utils.cpp | 14 +++- components/core/src/clp/clg/clg.cpp | 32 +-------- components/core/src/clp/clp/compression.cpp | 65 ++++++++++--------- .../core/tests/test-BufferedFileReader.cpp | 4 +- .../tests/test-EncodedVariableInterpreter.cpp | 1 - .../core/tests/test-MemoryMappedFile.cpp | 4 +- components/core/tests/test-NetworkReader.cpp | 8 +-- .../core/tests/test-ParserWithUserSchema.cpp | 6 +- 8 files changed, 53 insertions(+), 81 deletions(-) diff --git a/components/core/src/clp/Utils.cpp b/components/core/src/clp/Utils.cpp index 866ae840f..f35d9dc07 100644 --- a/components/core/src/clp/Utils.cpp +++ b/components/core/src/clp/Utils.cpp @@ -6,6 +6,7 @@ #include #include +#include #include #include @@ -19,6 +20,8 @@ using std::list; using std::string; using std::vector; +using std::unique_ptr; +using std::make_unique; namespace clp { ErrorCode create_directory(string const& path, mode_t mode, bool exist_ok) { @@ -137,13 +140,18 @@ string get_unambiguous_path(string const& path) { } ErrorCode read_list_of_paths(string const& list_path, vector& paths) { - FileReader file_reader(list_path); + unique_ptr file_reader; + try { + file_reader = make_unique(list_path); + } catch (FileReader::OperationFailed const& err) { + return err.get_error_code(); + } // Read file string line; - ErrorCode error_code; + ErrorCode error_code{}; while (true) { - error_code = file_reader.try_read_to_delimiter('\n', false, false, line); + error_code = file_reader->try_read_to_delimiter('\n', false, false, line); if (ErrorCode_Success != error_code) { break; } diff --git a/components/core/src/clp/clg/clg.cpp b/components/core/src/clp/clg/clg.cpp index 4580358b7..68be928aa 100644 --- a/components/core/src/clp/clg/clg.cpp +++ b/components/core/src/clp/clg/clg.cpp @@ -173,31 +173,6 @@ static bool open_archive(string const& archive_path, Archive& archive_reader) { } } - try { - archive_reader.refresh_dictionaries(); - } catch (TraceableException& e) { - error_code = e.get_error_code(); - if (ErrorCode_errno == error_code) { - SPDLOG_ERROR( - "Reading dictionaries failed: {}:{} {}, errno={}", - e.get_filename(), - e.get_line_number(), - e.what(), - errno - ); - return false; - } else { - SPDLOG_ERROR( - "Reading dictionaries failed: {}:{} {}, error_code={}", - e.get_filename(), - e.get_line_number(), - e.what(), - error_code - ); - return false; - } - } - return true; } @@ -494,15 +469,13 @@ int main(int argc, char const* argv[]) { if (command_line_args.get_search_strings_file_path().empty()) { search_strings.push_back(command_line_args.get_search_string()); } else { - FileReader file_reader; - file_reader.open(command_line_args.get_search_strings_file_path()); + FileReader file_reader(command_line_args.get_search_strings_file_path()); string line; while (file_reader.read_to_delimiter('\n', false, false, line)) { if (!line.empty()) { search_strings.push_back(line); } } - file_reader.close(); } // Validate archives directory @@ -589,8 +562,7 @@ int main(int argc, char const* argv[]) { use_heuristic = false; char buf[max_map_schema_length]; - FileReader file_reader; - file_reader.try_open(schema_file_path); + FileReader file_reader(schema_file_path); size_t num_bytes_read; file_reader.read(buf, max_map_schema_length, num_bytes_read); diff --git a/components/core/src/clp/clp/compression.cpp b/components/core/src/clp/clp/compression.cpp index 6a4a575dc..97f33ea42 100644 --- a/components/core/src/clp/clp/compression.cpp +++ b/components/core/src/clp/clp/compression.cpp @@ -1,6 +1,7 @@ #include "compression.hpp" #include +#include #include #include @@ -22,6 +23,8 @@ using std::endl; using std::out_of_range; using std::string; using std::vector; +using std::unique_ptr; +using std::make_unique; namespace clp::clp { // Local prototypes @@ -192,46 +195,46 @@ bool read_and_validate_grouped_file_list( string const& list_path, vector& grouped_files ) { - FileReader grouped_file_path_reader(list_path); -// ErrorCode error_code = grouped_file_path_reader.try_open(list_path); -// if (ErrorCode_Success != error_code) { -// if (ErrorCode_FileNotFound == error_code) { -// SPDLOG_ERROR("'{}' does not exist.", list_path.c_str()); -// } else if (ErrorCode_errno == error_code) { -// SPDLOG_ERROR("Failed to read '{}', errno={}", list_path.c_str(), errno); -// } else { -// SPDLOG_ERROR("Failed to read '{}', error_code={}", list_path.c_str(), error_code); -// } -// return false; -// } + unique_ptr grouped_file_path_reader; + try { + grouped_file_path_reader = make_unique(list_path); + } catch (FileReader::OperationFailed const& err) { + auto const error_code = err.get_error_code(); + if (ErrorCode_FileNotFound == error_code) { + SPDLOG_ERROR("'{}' does not exist.", list_path.c_str()); + } else if (ErrorCode_errno == error_code) { + SPDLOG_ERROR("Failed to read '{}', errno={}", list_path.c_str(), errno); + } else { + SPDLOG_ERROR("Failed to read '{}', error_code={}", list_path.c_str(), error_code); + } + return false; + } string grouped_file_ids_path = list_path.substr(0, list_path.length() - 4) + ".gid"; - FileReader grouped_file_id_reader(grouped_file_ids_path); -// error_code = grouped_file_id_reader.try_open(grouped_file_ids_path); -// if (ErrorCode_Success != error_code) { -// if (ErrorCode_FileNotFound == error_code) { -// SPDLOG_ERROR("'{}' does not exist.", grouped_file_ids_path.c_str()); -// } else if (ErrorCode_errno == error_code) { -// SPDLOG_ERROR("Failed to read '{}', errno={}", grouped_file_ids_path.c_str(), errno); -// } else { -// SPDLOG_ERROR( -// "Failed to read '{}', error_code={}", -// grouped_file_ids_path.c_str(), -// error_code -// ); -// } -// return false; -// } + unique_ptr grouped_file_id_reader; + try { + grouped_file_id_reader = make_unique(grouped_file_ids_path); + } catch (FileReader::OperationFailed const& err) { + auto const error_code = err.get_error_code(); + if (ErrorCode_FileNotFound == error_code) { + SPDLOG_ERROR("'{}' does not exist.", grouped_file_ids_path.c_str()); + } else if (ErrorCode_errno == error_code) { + SPDLOG_ERROR("Failed to read '{}', errno={}", grouped_file_ids_path.c_str(), errno); + } else { + SPDLOG_ERROR("Failed to read '{}', error_code={}", grouped_file_ids_path.c_str(), error_code); + } + return false; + } // Read list bool all_paths_valid = true; string path; string path_without_prefix; group_id_t group_id; - ErrorCode error_code; + ErrorCode error_code{}; while (true) { // Read path - error_code = grouped_file_path_reader.try_read_to_delimiter('\n', false, false, path); + error_code = grouped_file_path_reader->try_read_to_delimiter('\n', false, false, path); if (ErrorCode_Success != error_code) { break; } @@ -243,7 +246,7 @@ bool read_and_validate_grouped_file_list( } // Read group ID - error_code = grouped_file_id_reader.try_read_numeric_value(group_id); + error_code = grouped_file_id_reader->try_read_numeric_value(group_id); if (ErrorCode_Success != error_code) { if (ErrorCode_EndOfFile == error_code) { SPDLOG_ERROR("There are more grouped file paths than IDs."); diff --git a/components/core/tests/test-BufferedFileReader.cpp b/components/core/tests/test-BufferedFileReader.cpp index 734eca87b..c5e969745 100644 --- a/components/core/tests/test-BufferedFileReader.cpp +++ b/components/core/tests/test-BufferedFileReader.cpp @@ -277,8 +277,7 @@ TEST_CASE("Test delimiter", "[BufferedFileReader]") { file_reader.open(test_file_path); std::string test_string; - clp::FileReader ref_file_reader; - ref_file_reader.open(test_file_path); + clp::FileReader ref_file_reader(test_file_path); std::string ref_string; // Validate that a FileReader and a BufferedFileReader return the same strings (split by @@ -292,7 +291,6 @@ TEST_CASE("Test delimiter", "[BufferedFileReader]") { REQUIRE(test_string == ref_string); } - ref_file_reader.close(); file_reader.close(); boost::filesystem::remove(test_file_path); } diff --git a/components/core/tests/test-EncodedVariableInterpreter.cpp b/components/core/tests/test-EncodedVariableInterpreter.cpp index 6ab0687a9..5600b2ccf 100644 --- a/components/core/tests/test-EncodedVariableInterpreter.cpp +++ b/components/core/tests/test-EncodedVariableInterpreter.cpp @@ -439,7 +439,6 @@ TEST_CASE("EncodedVariableInterpreter", "[EncodedVariableInterpreter]") { // Open reader clp::VariableDictionaryReader var_dict_reader; var_dict_reader.open(cVarDictPath, cVarSegmentIndexPath); - var_dict_reader.read_new_entries(); // Test searching string search_logtype = "here is a string with a small int "; diff --git a/components/core/tests/test-MemoryMappedFile.cpp b/components/core/tests/test-MemoryMappedFile.cpp index 20d433f32..cd4ee9aa3 100644 --- a/components/core/tests/test-MemoryMappedFile.cpp +++ b/components/core/tests/test-MemoryMappedFile.cpp @@ -42,10 +42,8 @@ TEST_CASE("memory_mapped_file_view_basic", "[ReadOnlyMemoryMappedFile]") { auto const test_input_path{ get_test_dir() / std::filesystem::path{"test_network_reader_src"} / "random.log" }; - clp::FileReader file_reader; - file_reader.open(test_input_path.string()); + clp::FileReader file_reader(test_input_path.string()); auto const expected{read_content(file_reader)}; - file_reader.close(); clp::ReadOnlyMemoryMappedFile const mmap_file{test_input_path.string()}; auto const view{mmap_file.get_view()}; diff --git a/components/core/tests/test-NetworkReader.cpp b/components/core/tests/test-NetworkReader.cpp index 958e31d8b..fcde4a11a 100644 --- a/components/core/tests/test-NetworkReader.cpp +++ b/components/core/tests/test-NetworkReader.cpp @@ -66,10 +66,8 @@ auto get_content(clp::ReaderInterface& reader, size_t read_buf_size) -> std::vec } // namespace TEST_CASE("network_reader_basic", "[NetworkReader]") { - clp::FileReader ref_reader; - ref_reader.open(get_test_input_local_path()); + clp::FileReader ref_reader{get_test_input_local_path()}; auto const expected{get_content(ref_reader)}; - ref_reader.close(); REQUIRE((clp::ErrorCode_Success == clp::NetworkReader::init())); clp::NetworkReader reader{get_test_input_remote_url()}; @@ -84,12 +82,10 @@ TEST_CASE("network_reader_basic", "[NetworkReader]") { TEST_CASE("network_reader_with_offset_and_seek", "[NetworkReader]") { constexpr size_t cOffset{319}; - clp::FileReader ref_reader; - ref_reader.open(get_test_input_local_path()); + clp::FileReader ref_reader{get_test_input_local_path()}; ref_reader.seek_from_begin(cOffset); auto const expected{get_content(ref_reader)}; auto const ref_end_pos{ref_reader.get_pos()}; - ref_reader.close(); REQUIRE((clp::ErrorCode_Success == clp::NetworkReader::init())); diff --git a/components/core/tests/test-ParserWithUserSchema.cpp b/components/core/tests/test-ParserWithUserSchema.cpp index d69a94958..3689c69e8 100644 --- a/components/core/tests/test-ParserWithUserSchema.cpp +++ b/components/core/tests/test-ParserWithUserSchema.cpp @@ -162,9 +162,8 @@ TEST_CASE("Test forward lexer", "[Search]") { std::string schema_file_name = "../tests/test_schema_files/search_schema.txt"; std::string schema_file_path = boost::filesystem::weakly_canonical(schema_file_name).string(); load_lexer_from_file(schema_file_path, false, forward_lexer); - FileReader file_reader; + FileReader file_reader{"../tests/test_search_queries/easy.txt"}; LogSurgeonReader reader_wrapper(file_reader); - file_reader.open("../tests/test_search_queries/easy.txt"); log_surgeon::ParserInputBuffer parser_input_buffer; parser_input_buffer.read_if_safe(reader_wrapper); forward_lexer.reset(); @@ -187,9 +186,8 @@ TEST_CASE("Test reverse lexer", "[Search]") { std::string schema_file_name = "../tests/test_schema_files/search_schema.txt"; std::string schema_file_path = boost::filesystem::weakly_canonical(schema_file_name).string(); load_lexer_from_file(schema_file_path, false, reverse_lexer); - FileReader file_reader; + FileReader file_reader{"../tests/test_search_queries/easy.txt"}; LogSurgeonReader reader_wrapper(file_reader); - file_reader.open("../tests/test_search_queries/easy.txt"); log_surgeon::ParserInputBuffer parser_input_buffer; parser_input_buffer.read_if_safe(reader_wrapper); reverse_lexer.reset(); From 2c40e29947f0cf0114f8025c83f8c52439c2f4bf Mon Sep 17 00:00:00 2001 From: Haiqi Xu <14502009+haiqi96@users.noreply.github.com> Date: Fri, 26 Jul 2024 16:48:45 -0400 Subject: [PATCH 04/62] Linter --- components/core/src/clp/DictionaryReader.hpp | 19 ++++++++++--------- components/core/src/clp/DictionaryWriter.hpp | 4 ++-- components/core/src/clp/FileReader.cpp | 6 +++++- components/core/src/clp/Utils.cpp | 4 ++-- components/core/src/clp/clp/compression.cpp | 11 +++++++---- 5 files changed, 26 insertions(+), 18 deletions(-) diff --git a/components/core/src/clp/DictionaryReader.hpp b/components/core/src/clp/DictionaryReader.hpp index 703803ec2..38a525331 100644 --- a/components/core/src/clp/DictionaryReader.hpp +++ b/components/core/src/clp/DictionaryReader.hpp @@ -102,10 +102,8 @@ class DictionaryReader { /** * Reads any new entries from disk */ - void read_new_entries( - std::string const& dictionary_path, - std::string const& segment_index_path - ); + void + read_new_entries(std::string const& dictionary_path, std::string const& segment_index_path); // Variables bool m_is_open; @@ -147,7 +145,7 @@ void DictionaryReader::read_new_entries( FileReader dictionary_file_reader(dictionary_path); FileReader segment_index_file_reader(segment_index_path); -constexpr size_t cDecompressorFileReadBufferCapacity = 64 * 1024; // 64 KB + constexpr size_t cDecompressorFileReadBufferCapacity = 64 * 1024; // 64 KB #if USE_PASSTHROUGH_COMPRESSION streaming_compression::passthrough::Decompressor m_dictionary_decompressor; streaming_compression::passthrough::Decompressor m_segment_index_decompressor; @@ -159,8 +157,8 @@ constexpr size_t cDecompressorFileReadBufferCapacity = 64 * 1024; // 64 KB #endif // Read dictionary header - uint64_t num_dictionary_entries {}; - if (false == dictionary_file_reader.read_numeric_value(num_dictionary_entries, false)){ + uint64_t num_dictionary_entries{}; + if (false == dictionary_file_reader.read_numeric_value(num_dictionary_entries, false)) { throw OperationFailed(ErrorCode_Failure, __FILENAME__, __LINE__); } @@ -182,7 +180,7 @@ constexpr size_t cDecompressorFileReadBufferCapacity = 64 * 1024; // 64 KB } // Read segment index header - uint64_t num_segments {}; + uint64_t num_segments{}; if (false == segment_index_file_reader.read_numeric_value(num_segments, false)) { throw OperationFailed(ErrorCode_Failure, __FILENAME__, __LINE__); } @@ -194,7 +192,10 @@ constexpr size_t cDecompressorFileReadBufferCapacity = 64 * 1024; // 64 KB // Read new segments from index if (num_segments > m_num_segments_read_from_index) { - segment_index_decompressor.open(segment_index_file_reader, cDecompressorFileReadBufferCapacity); + segment_index_decompressor.open( + segment_index_file_reader, + cDecompressorFileReadBufferCapacity + ); for (size_t i = m_num_segments_read_from_index; i < num_segments; ++i) { read_segment_ids(segment_index_decompressor); } diff --git a/components/core/src/clp/DictionaryWriter.hpp b/components/core/src/clp/DictionaryWriter.hpp index 1883df548..900029520 100644 --- a/components/core/src/clp/DictionaryWriter.hpp +++ b/components/core/src/clp/DictionaryWriter.hpp @@ -214,8 +214,8 @@ void DictionaryWriter::open_and_preload( #endif // Read dictionary header - uint64_t num_dictionary_entries {}; - if (false == dictionary_file_reader.read_numeric_value(num_dictionary_entries, false)){ + uint64_t num_dictionary_entries{}; + if (false == dictionary_file_reader.read_numeric_value(num_dictionary_entries, false)) { throw OperationFailed(ErrorCode_Failure, __FILENAME__, __LINE__); } diff --git a/components/core/src/clp/FileReader.cpp b/components/core/src/clp/FileReader.cpp index a91ef5e77..89d50a7f4 100644 --- a/components/core/src/clp/FileReader.cpp +++ b/components/core/src/clp/FileReader.cpp @@ -12,7 +12,10 @@ using std::string; namespace clp { -FileReader::FileReader(string const& path) : m_file{nullptr}, m_getdelim_buf_len(0), m_getdelim_buf(nullptr) { +FileReader::FileReader(string const& path) + : m_file{nullptr}, + m_getdelim_buf_len(0), + m_getdelim_buf(nullptr) { m_file = fopen(path.c_str(), "rb"); if (nullptr == m_file) { if (ENOENT == errno) { @@ -22,6 +25,7 @@ FileReader::FileReader(string const& path) : m_file{nullptr}, m_getdelim_buf_len } m_path = path; } + FileReader::~FileReader() { // NOTE: We don't check errors for fclose since it seems the only reason it could fail is if // it was interrupted by a signal diff --git a/components/core/src/clp/Utils.cpp b/components/core/src/clp/Utils.cpp index f35d9dc07..393b49230 100644 --- a/components/core/src/clp/Utils.cpp +++ b/components/core/src/clp/Utils.cpp @@ -18,10 +18,10 @@ #include "spdlog_with_specializations.hpp" using std::list; +using std::make_unique; using std::string; -using std::vector; using std::unique_ptr; -using std::make_unique; +using std::vector; namespace clp { ErrorCode create_directory(string const& path, mode_t mode, bool exist_ok) { diff --git a/components/core/src/clp/clp/compression.cpp b/components/core/src/clp/clp/compression.cpp index 97f33ea42..03b74458f 100644 --- a/components/core/src/clp/clp/compression.cpp +++ b/components/core/src/clp/clp/compression.cpp @@ -20,11 +20,11 @@ using clp::streaming_archive::writer::split_archive; using std::cerr; using std::cout; using std::endl; +using std::make_unique; using std::out_of_range; using std::string; -using std::vector; using std::unique_ptr; -using std::make_unique; +using std::vector; namespace clp::clp { // Local prototypes @@ -221,7 +221,11 @@ bool read_and_validate_grouped_file_list( } else if (ErrorCode_errno == error_code) { SPDLOG_ERROR("Failed to read '{}', errno={}", grouped_file_ids_path.c_str(), errno); } else { - SPDLOG_ERROR("Failed to read '{}', error_code={}", grouped_file_ids_path.c_str(), error_code); + SPDLOG_ERROR( + "Failed to read '{}', error_code={}", + grouped_file_ids_path.c_str(), + error_code + ); } return false; } @@ -298,7 +302,6 @@ bool read_and_validate_grouped_file_list( return false; } - // Validate the list contained at least one file if (grouped_files.empty()) { SPDLOG_ERROR("'{}' did not contain any paths.", list_path.c_str()); From d5034369c99d19cd60e7edc2a20fa6b8cb3448eb Mon Sep 17 00:00:00 2001 From: Haiqi Xu <14502009+haiqi96@users.noreply.github.com> Date: Fri, 26 Jul 2024 16:58:25 -0400 Subject: [PATCH 05/62] Fixes --- components/core/src/clp/DictionaryReader.hpp | 10 +++++----- components/core/src/clp/DictionaryWriter.hpp | 2 +- components/core/src/clp/FileReader.cpp | 3 +-- components/core/src/clp/Utils.cpp | 4 ++-- components/core/src/clp/clp/compression.cpp | 4 ++-- .../core/src/clp/streaming_archive/reader/Archive.cpp | 2 +- 6 files changed, 12 insertions(+), 13 deletions(-) diff --git a/components/core/src/clp/DictionaryReader.hpp b/components/core/src/clp/DictionaryReader.hpp index 38a525331..d7b47aa4e 100644 --- a/components/core/src/clp/DictionaryReader.hpp +++ b/components/core/src/clp/DictionaryReader.hpp @@ -142,8 +142,8 @@ void DictionaryReader::read_new_entries( std::string const& dictionary_path, std::string const& segment_index_path ) { - FileReader dictionary_file_reader(dictionary_path); - FileReader segment_index_file_reader(segment_index_path); + FileReader dictionary_file_reader{dictionary_path}; + FileReader segment_index_file_reader{segment_index_path}; constexpr size_t cDecompressorFileReadBufferCapacity = 64 * 1024; // 64 KB #if USE_PASSTHROUGH_COMPRESSION @@ -269,13 +269,13 @@ template void DictionaryReader::read_segment_ids( streaming_compression::Decompressor& segment_index_decompressor ) { - segment_id_t segment_id; + segment_id_t segment_id{}; segment_index_decompressor.read_numeric_value(segment_id, false); - uint64_t num_ids; + uint64_t num_ids{}; segment_index_decompressor.read_numeric_value(num_ids, false); for (uint64_t i = 0; i < num_ids; ++i) { - DictionaryIdType id; + DictionaryIdType id{}; segment_index_decompressor.read_numeric_value(id, false); if (id >= m_entries.size()) { throw OperationFailed(ErrorCode_Corrupt, __FILENAME__, __LINE__); diff --git a/components/core/src/clp/DictionaryWriter.hpp b/components/core/src/clp/DictionaryWriter.hpp index 900029520..19048f089 100644 --- a/components/core/src/clp/DictionaryWriter.hpp +++ b/components/core/src/clp/DictionaryWriter.hpp @@ -202,7 +202,7 @@ void DictionaryWriter::open_and_preload( m_max_id = max_id; - FileReader dictionary_file_reader(dictionary_path); + FileReader dictionary_file_reader{dictionary_path}; constexpr size_t cDecompressorFileReadBufferCapacity = 64 * 1024; // 64 KB #if USE_PASSTHROUGH_COMPRESSION diff --git a/components/core/src/clp/FileReader.cpp b/components/core/src/clp/FileReader.cpp index 89d50a7f4..2182c61fa 100644 --- a/components/core/src/clp/FileReader.cpp +++ b/components/core/src/clp/FileReader.cpp @@ -13,10 +13,9 @@ using std::string; namespace clp { FileReader::FileReader(string const& path) - : m_file{nullptr}, + : m_file{fopen(path.c_str(), "rb")}, m_getdelim_buf_len(0), m_getdelim_buf(nullptr) { - m_file = fopen(path.c_str(), "rb"); if (nullptr == m_file) { if (ENOENT == errno) { throw OperationFailed(ErrorCode_FileNotFound, __FILE__, __LINE__); diff --git a/components/core/src/clp/Utils.cpp b/components/core/src/clp/Utils.cpp index 393b49230..f487a3880 100644 --- a/components/core/src/clp/Utils.cpp +++ b/components/core/src/clp/Utils.cpp @@ -149,7 +149,7 @@ ErrorCode read_list_of_paths(string const& list_path, vector& paths) { // Read file string line; - ErrorCode error_code{}; + ErrorCode error_code{ErrorCode_Success}; while (true) { error_code = file_reader->try_read_to_delimiter('\n', false, false, line); if (ErrorCode_Success != error_code) { @@ -265,7 +265,7 @@ void load_lexer_from_file( } if (contains_delimiter) { - FileReader schema_reader(schema_ast->m_file_path); + FileReader schema_reader{schema_ast->m_file_path}; // more detailed debugging based on looking at the file string line; for (uint32_t i = 0; i <= rule->m_line_num; i++) { diff --git a/components/core/src/clp/clp/compression.cpp b/components/core/src/clp/clp/compression.cpp index 03b74458f..c27e688e0 100644 --- a/components/core/src/clp/clp/compression.cpp +++ b/components/core/src/clp/clp/compression.cpp @@ -210,8 +210,8 @@ bool read_and_validate_grouped_file_list( return false; } - string grouped_file_ids_path = list_path.substr(0, list_path.length() - 4) + ".gid"; unique_ptr grouped_file_id_reader; + string grouped_file_ids_path = list_path.substr(0, list_path.length() - 4) + ".gid"; try { grouped_file_id_reader = make_unique(grouped_file_ids_path); } catch (FileReader::OperationFailed const& err) { @@ -235,7 +235,7 @@ bool read_and_validate_grouped_file_list( string path; string path_without_prefix; group_id_t group_id; - ErrorCode error_code{}; + ErrorCode error_code{ErrorCode_Success}; while (true) { // Read path error_code = grouped_file_path_reader->try_read_to_delimiter('\n', false, false, path); diff --git a/components/core/src/clp/streaming_archive/reader/Archive.cpp b/components/core/src/clp/streaming_archive/reader/Archive.cpp index ed7ee277a..541b25595 100644 --- a/components/core/src/clp/streaming_archive/reader/Archive.cpp +++ b/components/core/src/clp/streaming_archive/reader/Archive.cpp @@ -37,7 +37,7 @@ void Archive::open(string const& path) { string metadata_file_path = path + '/' + cMetadataFileName; archive_format_version_t format_version{}; try { - FileReader file_reader(metadata_file_path); + FileReader file_reader{metadata_file_path}; ArchiveMetadata const metadata{file_reader}; format_version = metadata.get_archive_format_version(); } catch (TraceableException& traceable_exception) { From 1ff5ea8bf13ee3414b99c139e52b3b0b2a4dfeab Mon Sep 17 00:00:00 2001 From: Haiqi Xu <14502009+haiqi96@users.noreply.github.com> Date: Fri, 26 Jul 2024 17:05:27 -0400 Subject: [PATCH 06/62] Rearrange code --- components/core/src/clp/DictionaryReader.hpp | 2 +- components/core/src/clp/DictionaryWriter.hpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/components/core/src/clp/DictionaryReader.hpp b/components/core/src/clp/DictionaryReader.hpp index d7b47aa4e..2f50e71d5 100644 --- a/components/core/src/clp/DictionaryReader.hpp +++ b/components/core/src/clp/DictionaryReader.hpp @@ -145,7 +145,6 @@ void DictionaryReader::read_new_entries( FileReader dictionary_file_reader{dictionary_path}; FileReader segment_index_file_reader{segment_index_path}; - constexpr size_t cDecompressorFileReadBufferCapacity = 64 * 1024; // 64 KB #if USE_PASSTHROUGH_COMPRESSION streaming_compression::passthrough::Decompressor m_dictionary_decompressor; streaming_compression::passthrough::Decompressor m_segment_index_decompressor; @@ -155,6 +154,7 @@ void DictionaryReader::read_new_entries( #else static_assert(false, "Unsupported compression mode."); #endif + constexpr size_t cDecompressorFileReadBufferCapacity = 64 * 1024; // 64 KB // Read dictionary header uint64_t num_dictionary_entries{}; diff --git a/components/core/src/clp/DictionaryWriter.hpp b/components/core/src/clp/DictionaryWriter.hpp index 19048f089..0e1d9ca1c 100644 --- a/components/core/src/clp/DictionaryWriter.hpp +++ b/components/core/src/clp/DictionaryWriter.hpp @@ -204,7 +204,6 @@ void DictionaryWriter::open_and_preload( FileReader dictionary_file_reader{dictionary_path}; - constexpr size_t cDecompressorFileReadBufferCapacity = 64 * 1024; // 64 KB #if USE_PASSTHROUGH_COMPRESSION streaming_compression::passthrough::Decompressor dictionary_decompressor; #elif USE_ZSTD_COMPRESSION @@ -212,6 +211,7 @@ void DictionaryWriter::open_and_preload( #else static_assert(false, "Unsupported compression mode."); #endif + constexpr size_t cDecompressorFileReadBufferCapacity = 64 * 1024; // 64 KB // Read dictionary header uint64_t num_dictionary_entries{}; From 8a7ddc765c47bb1f3408ed7b337d0907f839f716 Mon Sep 17 00:00:00 2001 From: Haiqi Xu <14502009+haiqi96@users.noreply.github.com> Date: Fri, 26 Jul 2024 17:08:40 -0400 Subject: [PATCH 07/62] fix clo build --- components/core/src/clp/clo/clo.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/components/core/src/clp/clo/clo.cpp b/components/core/src/clp/clo/clo.cpp index f29df0306..5ddab50ea 100644 --- a/components/core/src/clp/clo/clo.cpp +++ b/components/core/src/clp/clo/clo.cpp @@ -140,7 +140,6 @@ bool extract_ir(CommandLineArguments const& command_line_args) { Archive archive_reader; archive_reader.open(archive_path.string()); - archive_reader.refresh_dictionaries(); auto const& file_split_id = command_line_args.get_file_split_id(); auto file_metadata_ix_ptr = archive_reader.get_file_iterator_by_split_id(file_split_id); @@ -482,7 +481,6 @@ static bool search_archive( Archive archive_reader; archive_reader.open(archive_path.string()); - archive_reader.refresh_dictionaries(); auto search_begin_ts = command_line_args.get_search_begin_ts(); auto search_end_ts = command_line_args.get_search_end_ts(); From 70325a94b25ff5e04f203cd84f9abc0708c9774a Mon Sep 17 00:00:00 2001 From: Haiqi Xu <14502009+haiqi96@users.noreply.github.com> Date: Fri, 26 Jul 2024 17:20:26 -0400 Subject: [PATCH 08/62] fix --- components/core/src/clp/clg/clg.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/components/core/src/clp/clg/clg.cpp b/components/core/src/clp/clg/clg.cpp index 68be928aa..9bb787a4c 100644 --- a/components/core/src/clp/clg/clg.cpp +++ b/components/core/src/clp/clg/clg.cpp @@ -469,7 +469,7 @@ int main(int argc, char const* argv[]) { if (command_line_args.get_search_strings_file_path().empty()) { search_strings.push_back(command_line_args.get_search_string()); } else { - FileReader file_reader(command_line_args.get_search_strings_file_path()); + FileReader file_reader{command_line_args.get_search_strings_file_path()}; string line; while (file_reader.read_to_delimiter('\n', false, false, line)) { if (!line.empty()) { @@ -562,7 +562,7 @@ int main(int argc, char const* argv[]) { use_heuristic = false; char buf[max_map_schema_length]; - FileReader file_reader(schema_file_path); + FileReader file_reader{schema_file_path}; size_t num_bytes_read; file_reader.read(buf, max_map_schema_length, num_bytes_read); From 99d45a61dba28984accd11d70405a39d32071fe3 Mon Sep 17 00:00:00 2001 From: Haiqi Xu <14502009+haiqi96@users.noreply.github.com> Date: Fri, 26 Jul 2024 17:33:22 -0400 Subject: [PATCH 09/62] Fix make-dictionaries-readable --- .../make_dictionaries_readable/make-dictionaries-readable.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/components/core/src/clp/make_dictionaries_readable/make-dictionaries-readable.cpp b/components/core/src/clp/make_dictionaries_readable/make-dictionaries-readable.cpp index f35932fc3..f40a0b110 100644 --- a/components/core/src/clp/make_dictionaries_readable/make-dictionaries-readable.cpp +++ b/components/core/src/clp/make_dictionaries_readable/make-dictionaries-readable.cpp @@ -55,7 +55,6 @@ int main(int argc, char const* argv[]) { / clp::streaming_archive::cLogTypeSegmentIndexFilename; clp::LogTypeDictionaryReader logtype_dict; logtype_dict.open(logtype_dict_path.string(), logtype_segment_index_path.string()); - logtype_dict.read_new_entries(); // Write readable dictionary auto readable_logtype_dict_path = boost::filesystem::path(command_line_args.get_output_dir()) @@ -139,7 +138,6 @@ int main(int argc, char const* argv[]) { / clp::streaming_archive::cVarSegmentIndexFilename; clp::VariableDictionaryReader var_dict; var_dict.open(var_dict_path.string(), var_segment_index_path.string()); - var_dict.read_new_entries(); // Write readable dictionary auto readable_var_dict_path = boost::filesystem::path(command_line_args.get_output_dir()) From b086953d4232614fdca2d0e06aecbe8845b8eb86 Mon Sep 17 00:00:00 2001 From: Haiqi Xu <14502009+haiqi96@users.noreply.github.com> Date: Fri, 26 Jul 2024 17:35:36 -0400 Subject: [PATCH 10/62] fix constructors --- components/core/tests/test-BufferedFileReader.cpp | 2 +- components/core/tests/test-MemoryMappedFile.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/components/core/tests/test-BufferedFileReader.cpp b/components/core/tests/test-BufferedFileReader.cpp index c5e969745..f182a777c 100644 --- a/components/core/tests/test-BufferedFileReader.cpp +++ b/components/core/tests/test-BufferedFileReader.cpp @@ -277,7 +277,7 @@ TEST_CASE("Test delimiter", "[BufferedFileReader]") { file_reader.open(test_file_path); std::string test_string; - clp::FileReader ref_file_reader(test_file_path); + clp::FileReader ref_file_reader{test_file_path}; std::string ref_string; // Validate that a FileReader and a BufferedFileReader return the same strings (split by diff --git a/components/core/tests/test-MemoryMappedFile.cpp b/components/core/tests/test-MemoryMappedFile.cpp index cd4ee9aa3..9cb855e0b 100644 --- a/components/core/tests/test-MemoryMappedFile.cpp +++ b/components/core/tests/test-MemoryMappedFile.cpp @@ -42,7 +42,7 @@ TEST_CASE("memory_mapped_file_view_basic", "[ReadOnlyMemoryMappedFile]") { auto const test_input_path{ get_test_dir() / std::filesystem::path{"test_network_reader_src"} / "random.log" }; - clp::FileReader file_reader(test_input_path.string()); + clp::FileReader file_reader{test_input_path.string()}; auto const expected{read_content(file_reader)}; clp::ReadOnlyMemoryMappedFile const mmap_file{test_input_path.string()}; From 746834f30960748e5ee2e2ea6f23ef0184ea2af8 Mon Sep 17 00:00:00 2001 From: Haiqi Xu <14502009+haiqi96@users.noreply.github.com> Date: Fri, 26 Jul 2024 17:42:48 -0400 Subject: [PATCH 11/62] refactor --- components/core/src/clp/DictionaryReader.hpp | 30 +++++++++++++------- components/core/src/clp/DictionaryWriter.hpp | 3 +- 2 files changed, 20 insertions(+), 13 deletions(-) diff --git a/components/core/src/clp/DictionaryReader.hpp b/components/core/src/clp/DictionaryReader.hpp index 2f50e71d5..7627c2cf9 100644 --- a/components/core/src/clp/DictionaryReader.hpp +++ b/components/core/src/clp/DictionaryReader.hpp @@ -142,20 +142,11 @@ void DictionaryReader::read_new_entries( std::string const& dictionary_path, std::string const& segment_index_path ) { + constexpr size_t cDecompressorFileReadBufferCapacity = 64 * 1024; // 64 KB + FileReader dictionary_file_reader{dictionary_path}; FileReader segment_index_file_reader{segment_index_path}; -#if USE_PASSTHROUGH_COMPRESSION - streaming_compression::passthrough::Decompressor m_dictionary_decompressor; - streaming_compression::passthrough::Decompressor m_segment_index_decompressor; -#elif USE_ZSTD_COMPRESSION - streaming_compression::zstd::Decompressor dictionary_decompressor; - streaming_compression::zstd::Decompressor segment_index_decompressor; -#else - static_assert(false, "Unsupported compression mode."); -#endif - constexpr size_t cDecompressorFileReadBufferCapacity = 64 * 1024; // 64 KB - // Read dictionary header uint64_t num_dictionary_entries{}; if (false == dictionary_file_reader.read_numeric_value(num_dictionary_entries, false)) { @@ -171,12 +162,21 @@ void DictionaryReader::read_new_entries( if (num_dictionary_entries > m_entries.size()) { auto prev_num_dictionary_entries = m_entries.size(); m_entries.resize(num_dictionary_entries); + +#if USE_PASSTHROUGH_COMPRESSION + streaming_compression::passthrough::Decompressor m_dictionary_decompressor; +#elif USE_ZSTD_COMPRESSION + streaming_compression::zstd::Decompressor dictionary_decompressor; +#else + static_assert(false, "Unsupported compression mode."); +#endif dictionary_decompressor.open(dictionary_file_reader, cDecompressorFileReadBufferCapacity); for (size_t i = prev_num_dictionary_entries; i < num_dictionary_entries; ++i) { auto& entry = m_entries[i]; entry.read_from_file(dictionary_decompressor); } + dictionary_decompressor.close(); } // Read segment index header @@ -192,6 +192,13 @@ void DictionaryReader::read_new_entries( // Read new segments from index if (num_segments > m_num_segments_read_from_index) { +#if USE_PASSTHROUGH_COMPRESSION + streaming_compression::passthrough::Decompressor m_segment_index_decompressor; +#elif USE_ZSTD_COMPRESSION + streaming_compression::zstd::Decompressor segment_index_decompressor; +#else + static_assert(false, "Unsupported compression mode."); +#endif segment_index_decompressor.open( segment_index_file_reader, cDecompressorFileReadBufferCapacity @@ -199,6 +206,7 @@ void DictionaryReader::read_new_entries( for (size_t i = m_num_segments_read_from_index; i < num_segments; ++i) { read_segment_ids(segment_index_decompressor); } + segment_index_decompressor.close(); } } diff --git a/components/core/src/clp/DictionaryWriter.hpp b/components/core/src/clp/DictionaryWriter.hpp index 0e1d9ca1c..b1f25a386 100644 --- a/components/core/src/clp/DictionaryWriter.hpp +++ b/components/core/src/clp/DictionaryWriter.hpp @@ -240,11 +240,10 @@ void DictionaryWriter::open_and_preload( ; m_data_size += entry.get_data_size(); } + dictionary_decompressor.close(); m_next_id = num_dictionary_entries; - dictionary_decompressor.close(); - m_dictionary_file_writer.open( dictionary_path, FileWriter::OpenMode::CREATE_IF_NONEXISTENT_FOR_SEEKABLE_WRITING From d2a6836a403d64458dd4807c58c4468c41c47933 Mon Sep 17 00:00:00 2001 From: Haiqi Xu <14502009+haiqi96@users.noreply.github.com> Date: Mon, 29 Jul 2024 10:59:09 -0400 Subject: [PATCH 12/62] Update docstrings and comments. Also updated destructor --- components/core/src/clp/DictionaryReader.hpp | 5 ++++- components/core/src/clp/FileReader.cpp | 14 ++++++++++---- components/core/src/clp/FileReader.hpp | 3 --- 3 files changed, 14 insertions(+), 8 deletions(-) diff --git a/components/core/src/clp/DictionaryReader.hpp b/components/core/src/clp/DictionaryReader.hpp index 7627c2cf9..437d24b69 100644 --- a/components/core/src/clp/DictionaryReader.hpp +++ b/components/core/src/clp/DictionaryReader.hpp @@ -95,12 +95,15 @@ class DictionaryReader { protected: // Methods /** - * Reads a segment's worth of IDs from the segment index + * Reads a segment's worth of IDs from the segment index from the `segment_index_decompressor` + * @param segment_index_decompressor */ void read_segment_ids(streaming_compression::Decompressor& segment_index_decompressor); /** * Reads any new entries from disk + * @param dictionary_path + * @param segment_index_path */ void read_new_entries(std::string const& dictionary_path, std::string const& segment_index_path); diff --git a/components/core/src/clp/FileReader.cpp b/components/core/src/clp/FileReader.cpp index 2182c61fa..c7952cda4 100644 --- a/components/core/src/clp/FileReader.cpp +++ b/components/core/src/clp/FileReader.cpp @@ -26,10 +26,14 @@ FileReader::FileReader(string const& path) } FileReader::~FileReader() { - // NOTE: We don't check errors for fclose since it seems the only reason it could fail is if - // it was interrupted by a signal - fclose(m_file); - free(m_getdelim_buf); + if (nullptr != m_file) { + // NOTE: We don't check errors for fclose since it seems the only reason it could fail is + // if it was interrupted by a signal + fclose(m_file); + } + if (nullptr != m_getdelim_buf) { + free(m_getdelim_buf); + } } ErrorCode FileReader::try_read(char* buf, size_t num_bytes_to_read, size_t& num_bytes_read) { @@ -74,6 +78,8 @@ FileReader::try_read_to_delimiter(char delim, bool keep_delimiter, bool append, if (false == append) { str.clear(); } + // Note: If `m_getdelim_buf` is a null pointer or if `m_getdelim_buf_len` is insufficient in + // size, `getdelim` will malloc or realloc enough memory, respectively, to hold the characters. ssize_t num_bytes_read = getdelim(&m_getdelim_buf, &m_getdelim_buf_len, delim, m_file); if (num_bytes_read < 1) { if (ferror(m_file)) { diff --git a/components/core/src/clp/FileReader.hpp b/components/core/src/clp/FileReader.hpp index 7bee3efe9..42dffe0fa 100644 --- a/components/core/src/clp/FileReader.hpp +++ b/components/core/src/clp/FileReader.hpp @@ -33,7 +33,6 @@ class FileReader : public ReaderInterface { /** * Tries to get the current position of the read head in the file * @param pos Position of the read head in the file - * @return ErrorCode_NotInit if the file is not open * @return ErrorCode_errno on error * @return ErrorCode_Success on success */ @@ -41,7 +40,6 @@ class FileReader : public ReaderInterface { /** * Tries to seek from the beginning of the file to the given position * @param pos - * @return ErrorCode_NotInit if the file is not open * @return ErrorCode_errno on error * @return ErrorCode_Success on success */ @@ -52,7 +50,6 @@ class FileReader : public ReaderInterface { * @param buf * @param num_bytes_to_read The number of bytes to try and read * @param num_bytes_read The actual number of bytes read - * @return ErrorCode_NotInit if the file is not open * @return ErrorCode_BadParam if buf is invalid * @return ErrorCode_errno on error * @return ErrorCode_EndOfFile on EOF From 7d347c1464902e8febd31b3b7ab705270aa920c0 Mon Sep 17 00:00:00 2001 From: Haiqi Xu <14502009+haiqi96@users.noreply.github.com> Date: Sat, 27 Jul 2024 21:32:57 -0400 Subject: [PATCH 13/62] Initial change --- .../core/src/clp/BufferedFileReader.cpp | 112 +++--------------- .../core/src/clp/BufferedFileReader.hpp | 35 +----- .../core/src/clp/clp/FileCompressor.cpp | 17 +-- .../core/src/clp/clp/FileCompressor.hpp | 2 +- .../core/tests/test-BufferedFileReader.cpp | 11 +- 5 files changed, 37 insertions(+), 140 deletions(-) diff --git a/components/core/src/clp/BufferedFileReader.cpp b/components/core/src/clp/BufferedFileReader.cpp index ad6636cef..0c8be1d86 100644 --- a/components/core/src/clp/BufferedFileReader.cpp +++ b/components/core/src/clp/BufferedFileReader.cpp @@ -22,101 +22,38 @@ namespace { * @return ErrorCode_EndOfFile on EOF * @return ErrorCode_Success on success */ -auto read_into_buffer(int fd, char* buf, size_t num_bytes_to_read, size_t& num_bytes_read) - -> ErrorCode; +auto read_into_buffer(ReaderInterface& reader, char* buf, size_t num_bytes_to_read, size_t& num_bytes_read) +-> ErrorCode; -auto read_into_buffer(int fd, char* buf, size_t num_bytes_to_read, size_t& num_bytes_read) - -> ErrorCode { +auto read_into_buffer(ReaderInterface& reader, char* buf, size_t num_bytes_to_read, size_t& num_bytes_read) +-> ErrorCode { num_bytes_read = 0; - while (true) { - auto const bytes_read = ::read(fd, buf, num_bytes_to_read); - if (0 == bytes_read) { - break; - } - if (bytes_read < 0) { - return ErrorCode_errno; - } - - buf += bytes_read; - num_bytes_read += bytes_read; - num_bytes_to_read -= bytes_read; - if (num_bytes_read == num_bytes_to_read) { - return ErrorCode_Success; - } - } + auto const error_code = reader.try_read(buf, num_bytes_to_read, num_bytes_read); if (0 == num_bytes_read) { return ErrorCode_EndOfFile; } - return ErrorCode_Success; + return error_code; } } // namespace -BufferedFileReader::BufferedFileReader(size_t base_buffer_size) { +BufferedFileReader::BufferedFileReader(ReaderInterface& reader_interface, size_t base_buffer_size): m_file_reader{reader_interface} { if (base_buffer_size % cMinBufferSize != 0) { throw OperationFailed(ErrorCode_BadParam, __FILENAME__, __LINE__); } m_base_buffer_size = base_buffer_size; m_buffer.resize(m_base_buffer_size); -} -BufferedFileReader::~BufferedFileReader() { - close(); -} - -auto BufferedFileReader::try_open(string const& path) -> ErrorCode { - // Cleanup in case caller forgot to call close before calling this function - close(); - - m_fd = ::open(path.c_str(), O_RDONLY); - if (-1 == m_fd) { - if (ENOENT == errno) { - return ErrorCode_FileNotFound; - } - return ErrorCode_errno; + // TODO: anyway to get rid of this? + if (0 != m_file_reader.get_pos()) { + throw OperationFailed(ErrorCode_Failure, __FILENAME__, __LINE__); } - m_path = path; m_file_pos = 0; m_buffer_begin_pos = 0; m_buffer_reader.emplace(m_buffer.data(), 0); m_highest_read_pos = 0; - return ErrorCode_Success; -} - -void BufferedFileReader::open(string const& path) { - auto const error_code = try_open(path); - if (ErrorCode_Success != error_code) { - if (ErrorCode_FileNotFound == error_code) { - throw OperationFailed( - error_code, - __FILENAME__, - __LINE__, - "File not found: " + boost::filesystem::weakly_canonical(path).string() - ); - } - throw OperationFailed(error_code, __FILENAME__, __LINE__); - } -} - -auto BufferedFileReader::close() -> void { - if (-1 == m_fd) { - return; - } - - if (m_checkpoint_pos.has_value()) { - m_buffer.resize(m_base_buffer_size); - m_checkpoint_pos.reset(); - } - - // NOTE: We don't check errors for close since, in the read case, it seems the only reason it - // could fail is if it was interrupted by a signal - ::close(m_fd); - m_fd = -1; } auto BufferedFileReader::try_refill_buffer_if_empty() -> ErrorCode { - if (-1 == m_fd) { - return ErrorCode_NotInit; - } if (m_buffer_reader->get_buffer_size() > 0) { return ErrorCode_Success; } @@ -131,10 +68,7 @@ void BufferedFileReader::refill_buffer_if_empty() { } auto BufferedFileReader::try_peek_buffered_data(char const*& buf, size_t& peek_size) const - -> ErrorCode { - if (-1 == m_fd) { - return ErrorCode_NotInit; - } +-> ErrorCode { m_buffer_reader->peek_buffer(buf, peek_size); return ErrorCode_Success; } @@ -171,17 +105,11 @@ auto BufferedFileReader::clear_checkpoint() -> void { } auto BufferedFileReader::try_get_pos(size_t& pos) -> ErrorCode { - if (-1 == m_fd) { - return ErrorCode_NotInit; - } pos = m_file_pos; return ErrorCode_Success; } auto BufferedFileReader::try_seek_from_begin(size_t pos) -> ErrorCode { - if (-1 == m_fd) { - return ErrorCode_NotInit; - } if (pos == m_file_pos) { return ErrorCode_Success; } @@ -196,9 +124,9 @@ auto BufferedFileReader::try_seek_from_begin(size_t pos) -> ErrorCode { if (false == m_checkpoint_pos.has_value()) { // If checkpoint is not set, simply move the file_pos and invalidate // the buffer reader - auto offset = lseek(m_fd, static_cast(pos), SEEK_SET); - if (-1 == offset) { - return ErrorCode_errno; + if(auto const error_code = m_file_reader.try_seek_from_begin(pos); + error_code != ErrorCode_Success) { + return error_code; } m_buffer_reader.emplace(m_buffer.data(), 0); m_buffer_begin_pos = pos; @@ -224,10 +152,7 @@ auto BufferedFileReader::try_seek_from_begin(size_t pos) -> ErrorCode { } auto BufferedFileReader::try_read(char* buf, size_t num_bytes_to_read, size_t& num_bytes_read) - -> ErrorCode { - if (-1 == m_fd) { - return ErrorCode_NotInit; - } +-> ErrorCode { if (nullptr == buf) { return ErrorCode_BadParam; } @@ -271,9 +196,6 @@ auto BufferedFileReader::try_read_to_delimiter( bool append, string& str ) -> ErrorCode { - if (-1 == m_fd) { - return ErrorCode_NotInit; - } if (false == append) { str.clear(); } @@ -288,7 +210,7 @@ auto BufferedFileReader::try_read_to_delimiter( found_delim, num_bytes_read ); - ret_code != ErrorCode_Success && ret_code != ErrorCode_EndOfFile) + ret_code != ErrorCode_Success && ret_code != ErrorCode_EndOfFile) { return ret_code; } @@ -343,7 +265,7 @@ auto BufferedFileReader::refill_reader_buffer(size_t num_bytes_to_refill) -> Err size_t num_bytes_read{0}; auto error_code - = read_into_buffer(m_fd, &m_buffer[next_buffer_pos], num_bytes_to_read, num_bytes_read); + = read_into_buffer(m_file_reader, &m_buffer[next_buffer_pos], num_bytes_to_read, num_bytes_read); if (error_code != ErrorCode_Success && ErrorCode_EndOfFile != error_code) { return error_code; } diff --git a/components/core/src/clp/BufferedFileReader.hpp b/components/core/src/clp/BufferedFileReader.hpp index dd55e658e..726f2102f 100644 --- a/components/core/src/clp/BufferedFileReader.hpp +++ b/components/core/src/clp/BufferedFileReader.hpp @@ -73,11 +73,9 @@ class BufferedFileReader : public ReaderInterface { * @param base_buffer_size The size for the fixed-size buffer used when no checkpoint is set. It * must be a multiple of BufferedFileReader::cMinBufferSize. */ - explicit BufferedFileReader(size_t base_buffer_size); + explicit BufferedFileReader(ReaderInterface& reader_interface, size_t base_buffer_size); - BufferedFileReader() : BufferedFileReader(cDefaultBufferSize) {} - - ~BufferedFileReader(); + BufferedFileReader(ReaderInterface& reader_interface) : BufferedFileReader(reader_interface, cDefaultBufferSize) {} // Disable copy/move construction/assignment BufferedFileReader(BufferedFileReader const&) = delete; @@ -86,27 +84,8 @@ class BufferedFileReader : public ReaderInterface { auto operator=(BufferedFileReader&&) -> BufferedFileReader& = delete; // Methods - /** - * Tries to open a file - * @param path - * @return ErrorCode_Success on success - * @return ErrorCode_FileNotFound if the file was not found - * @return ErrorCode_errno otherwise - */ - [[nodiscard]] auto try_open(std::string const& path) -> ErrorCode; - - auto open(std::string const& path) -> void; - - /** - * Closes the file if it's open - */ - auto close() -> void; - - [[nodiscard]] auto get_path() const -> std::string const& { return m_path; } - /** * Tries to fill the internal buffer if it's empty - * @return ErrorCode_NotInit if the file is not opened * @return ErrorCode_errno on error reading from the underlying file * @return ErrorCode_EndOfFile on EOF * @return ErrorCode_Success on success @@ -124,7 +103,6 @@ class BufferedFileReader : public ReaderInterface { * NOTE: Any subsequent read or seek operations may invalidate the returned buffer. * @param buf Returns a pointer to the remaining content in the buffer * @param peek_size Returns the size of the remaining content in the buffer - * @return ErrorCode_NotInit if the file is not opened * @return ErrorCode_Success on success */ [[nodiscard]] auto @@ -158,7 +136,6 @@ class BufferedFileReader : public ReaderInterface { // Methods implementing the ReaderInterface /** * @param pos Returns the position of the read head in the file - * @return ErrorCode_NotInit if the file isn't open * @return ErrorCode_Success on success */ [[nodiscard]] auto try_get_pos(size_t& pos) -> ErrorCode override; @@ -168,7 +145,6 @@ class BufferedFileReader : public ReaderInterface { * is set, callers can only seek forwards in the file; When a checkpoint is set, callers can * seek to any position in the file that's after and including the checkpoint. * @param pos - * @return ErrorCode_NotInit if the file isn't open * @return ErrorCode_Unsupported if a checkpoint is set and the requested position is less than * the checkpoint, or no checkpoint is set and the requested position is less the current read * head's position. @@ -185,7 +161,6 @@ class BufferedFileReader : public ReaderInterface { * @param buf * @param num_bytes_to_read The number of bytes to try and read * @param num_bytes_read The actual number of bytes read - * @return ErrorCode_NotInit if the file is not open * @return ErrorCode_BadParam if buf is null * @return ErrorCode_errno on error reading from the underlying file * @return ErrorCode_EndOfFile on EOF @@ -200,7 +175,6 @@ class BufferedFileReader : public ReaderInterface { * @param keep_delimiter Whether to include the delimiter in the output string * @param append Whether to append to the given string or replace its contents * @param str Returns the content read - * @return ErrorCode_NotInit if the file is not open * @return ErrorCode_EndOfFile on EOF * @return ErrorCode_errno on error reading from the underlying file * @return Same as BufferReader::try_read_to_delimiter if it fails @@ -248,10 +222,11 @@ class BufferedFileReader : public ReaderInterface { static constexpr size_t cDefaultBufferSize = (16 * cMinBufferSize); // Variables - int m_fd{-1}; - std::string m_path; size_t m_file_pos{0}; + // ReaderInterfaceSpecific + ReaderInterface& m_file_reader; + // Buffer specific data std::vector m_buffer; size_t m_base_buffer_size; diff --git a/components/core/src/clp/clp/FileCompressor.cpp b/components/core/src/clp/clp/FileCompressor.cpp index 9898602cc..3a3375866 100644 --- a/components/core/src/clp/clp/FileCompressor.cpp +++ b/components/core/src/clp/clp/FileCompressor.cpp @@ -121,10 +121,11 @@ bool FileCompressor::compress_file( PROFILER_SPDLOG_INFO("Start parsing {}", file_name) Profiler::start_continuous_measurement(); - m_file_reader.open(file_to_compress.get_path()); + FileReader file_reader{file_to_compress.get_path()}; + BufferedFileReader buffered_file_reader{file_reader}; // Check that file is UTF-8 encoded - if (auto error_code = m_file_reader.try_refill_buffer_if_empty(); + if (auto error_code = buffered_file_reader.try_refill_buffer_if_empty(); ErrorCode_Success != error_code && ErrorCode_EndOfFile != error_code) { if (ErrorCode_errno == error_code) { @@ -144,7 +145,7 @@ bool FileCompressor::compress_file( } char const* utf8_validation_buf{nullptr}; size_t peek_size{0}; - m_file_reader.peek_buffered_data(utf8_validation_buf, peek_size); + buffered_file_reader.peek_buffered_data(utf8_validation_buf, peek_size); bool succeeded = true; auto const utf8_validation_buf_len = std::min(peek_size, cUtfMaxValidationLen); if (is_utf8_encoded({utf8_validation_buf, utf8_validation_buf_len})) { @@ -156,7 +157,7 @@ bool FileCompressor::compress_file( file_to_compress.get_path_for_compression(), file_to_compress.get_group_id(), archive_writer, - m_file_reader + buffered_file_reader ); } else { parse_and_encode_with_library( @@ -166,7 +167,7 @@ bool FileCompressor::compress_file( file_to_compress.get_path_for_compression(), file_to_compress.get_group_id(), archive_writer, - m_file_reader + buffered_file_reader ); } } else { @@ -177,6 +178,7 @@ bool FileCompressor::compress_file( target_encoded_file_size, file_to_compress, archive_writer, + buffered_file_reader, use_heuristic )) { @@ -184,8 +186,6 @@ bool FileCompressor::compress_file( } } - m_file_reader.close(); - Profiler::stop_continuous_measurement(); LOG_CONTINUOUS_MEASUREMENT(Profiler::ContinuousMeasurementIndex::ParseLogFile) PROFILER_SPDLOG_INFO("Done parsing {}", file_name) @@ -274,6 +274,7 @@ bool FileCompressor::try_compressing_as_archive( size_t target_encoded_file_size, FileToCompress const& file_to_compress, streaming_archive::writer::Archive& archive_writer, + ReaderInterface& file_reader, bool use_heuristic ) { auto file_boost_path = boost::filesystem::path(file_to_compress.get_path_for_compression()); @@ -289,7 +290,7 @@ bool FileCompressor::try_compressing_as_archive( } // Check if it's an archive - auto error_code = m_libarchive_reader.try_open(m_file_reader, filename_if_compressed); + auto error_code = m_libarchive_reader.try_open(file_reader, filename_if_compressed); if (ErrorCode_Success != error_code) { SPDLOG_ERROR( "Cannot compress {} - failed to open with libarchive.", diff --git a/components/core/src/clp/clp/FileCompressor.hpp b/components/core/src/clp/clp/FileCompressor.hpp index b8b6c55fd..bfd629580 100644 --- a/components/core/src/clp/clp/FileCompressor.hpp +++ b/components/core/src/clp/clp/FileCompressor.hpp @@ -100,6 +100,7 @@ class FileCompressor { size_t target_encoded_file_size, FileToCompress const& file_to_compress, streaming_archive::writer::Archive& archive_writer, + ReaderInterface& file_reader, bool use_heuristic ); @@ -150,7 +151,6 @@ class FileCompressor { // Variables boost::uuids::random_generator& m_uuid_generator; - BufferedFileReader m_file_reader; LibarchiveReader m_libarchive_reader; LibarchiveFileReader m_libarchive_file_reader; MessageParser m_message_parser; diff --git a/components/core/tests/test-BufferedFileReader.cpp b/components/core/tests/test-BufferedFileReader.cpp index f182a777c..d236e48e2 100644 --- a/components/core/tests/test-BufferedFileReader.cpp +++ b/components/core/tests/test-BufferedFileReader.cpp @@ -11,6 +11,7 @@ using clp::BufferedFileReader; using clp::ErrorCode_EndOfFile; using clp::ErrorCode_Success; using clp::ErrorCode_Unsupported; +using clp::FileReader; using clp::FileWriter; static constexpr size_t cNumAlphabets = 'z' - 'a'; @@ -34,8 +35,8 @@ TEST_CASE("Test reading data", "[BufferedFileReader]") { auto read_buf_uniq_ptr = std::make_unique>(); auto& read_buf = *read_buf_uniq_ptr; size_t const base_buffer_size = BufferedFileReader::cMinBufferSize << 4; - BufferedFileReader reader{base_buffer_size}; - reader.open(test_file_path); + FileReader file_reader{test_file_path}; + BufferedFileReader reader{file_reader, base_buffer_size}; size_t num_bytes_read{0}; size_t buf_pos{0}; @@ -253,7 +254,6 @@ TEST_CASE("Test reading data", "[BufferedFileReader]") { REQUIRE(reader.get_pos() == buf_pos); } - reader.close(); boost::filesystem::remove(test_file_path); } @@ -273,8 +273,8 @@ TEST_CASE("Test delimiter", "[BufferedFileReader]") { file_writer.write(test_data.data(), test_data_size); file_writer.close(); - BufferedFileReader file_reader; - file_reader.open(test_file_path); + FileReader file_reader{test_file_path}; + BufferedFileReader reader{file_reader}; std::string test_string; clp::FileReader ref_file_reader{test_file_path}; @@ -291,6 +291,5 @@ TEST_CASE("Test delimiter", "[BufferedFileReader]") { REQUIRE(test_string == ref_string); } - file_reader.close(); boost::filesystem::remove(test_file_path); } From 63f0cfd31d1530ad817a54272d591940610e723d Mon Sep 17 00:00:00 2001 From: haiqi96 <14502009+haiqi96@users.noreply.github.com> Date: Wed, 7 Aug 2024 11:24:26 -0400 Subject: [PATCH 14/62] Linter fix --- .../core/src/clp/BufferedFileReader.cpp | 42 ++++++++++++------- .../core/src/clp/BufferedFileReader.hpp | 3 +- 2 files changed, 30 insertions(+), 15 deletions(-) diff --git a/components/core/src/clp/BufferedFileReader.cpp b/components/core/src/clp/BufferedFileReader.cpp index 0c8be1d86..ecab4ce8e 100644 --- a/components/core/src/clp/BufferedFileReader.cpp +++ b/components/core/src/clp/BufferedFileReader.cpp @@ -22,13 +22,21 @@ namespace { * @return ErrorCode_EndOfFile on EOF * @return ErrorCode_Success on success */ -auto read_into_buffer(ReaderInterface& reader, char* buf, size_t num_bytes_to_read, size_t& num_bytes_read) --> ErrorCode; - -auto read_into_buffer(ReaderInterface& reader, char* buf, size_t num_bytes_to_read, size_t& num_bytes_read) --> ErrorCode { +auto read_into_buffer( + ReaderInterface& reader, + char* buf, + size_t num_bytes_to_read, + size_t& num_bytes_read +) -> ErrorCode; + +auto read_into_buffer( + ReaderInterface& reader, + char* buf, + size_t num_bytes_to_read, + size_t& num_bytes_read +) -> ErrorCode { num_bytes_read = 0; - auto const error_code = reader.try_read(buf, num_bytes_to_read, num_bytes_read); + auto const error_code = reader.try_read(buf, num_bytes_to_read, num_bytes_read); if (0 == num_bytes_read) { return ErrorCode_EndOfFile; } @@ -36,7 +44,8 @@ auto read_into_buffer(ReaderInterface& reader, char* buf, size_t num_bytes_to_re } } // namespace -BufferedFileReader::BufferedFileReader(ReaderInterface& reader_interface, size_t base_buffer_size): m_file_reader{reader_interface} { +BufferedFileReader::BufferedFileReader(ReaderInterface& reader_interface, size_t base_buffer_size) + : m_file_reader{reader_interface} { if (base_buffer_size % cMinBufferSize != 0) { throw OperationFailed(ErrorCode_BadParam, __FILENAME__, __LINE__); } @@ -68,7 +77,7 @@ void BufferedFileReader::refill_buffer_if_empty() { } auto BufferedFileReader::try_peek_buffered_data(char const*& buf, size_t& peek_size) const --> ErrorCode { + -> ErrorCode { m_buffer_reader->peek_buffer(buf, peek_size); return ErrorCode_Success; } @@ -124,8 +133,9 @@ auto BufferedFileReader::try_seek_from_begin(size_t pos) -> ErrorCode { if (false == m_checkpoint_pos.has_value()) { // If checkpoint is not set, simply move the file_pos and invalidate // the buffer reader - if(auto const error_code = m_file_reader.try_seek_from_begin(pos); - error_code != ErrorCode_Success) { + if (auto const error_code = m_file_reader.try_seek_from_begin(pos); + error_code != ErrorCode_Success) + { return error_code; } m_buffer_reader.emplace(m_buffer.data(), 0); @@ -152,7 +162,7 @@ auto BufferedFileReader::try_seek_from_begin(size_t pos) -> ErrorCode { } auto BufferedFileReader::try_read(char* buf, size_t num_bytes_to_read, size_t& num_bytes_read) --> ErrorCode { + -> ErrorCode { if (nullptr == buf) { return ErrorCode_BadParam; } @@ -210,7 +220,7 @@ auto BufferedFileReader::try_read_to_delimiter( found_delim, num_bytes_read ); - ret_code != ErrorCode_Success && ret_code != ErrorCode_EndOfFile) + ret_code != ErrorCode_Success && ret_code != ErrorCode_EndOfFile) { return ret_code; } @@ -264,8 +274,12 @@ auto BufferedFileReader::refill_reader_buffer(size_t num_bytes_to_refill) -> Err } size_t num_bytes_read{0}; - auto error_code - = read_into_buffer(m_file_reader, &m_buffer[next_buffer_pos], num_bytes_to_read, num_bytes_read); + auto error_code = read_into_buffer( + m_file_reader, + &m_buffer[next_buffer_pos], + num_bytes_to_read, + num_bytes_read + ); if (error_code != ErrorCode_Success && ErrorCode_EndOfFile != error_code) { return error_code; } diff --git a/components/core/src/clp/BufferedFileReader.hpp b/components/core/src/clp/BufferedFileReader.hpp index 726f2102f..53c9ba0dc 100644 --- a/components/core/src/clp/BufferedFileReader.hpp +++ b/components/core/src/clp/BufferedFileReader.hpp @@ -75,7 +75,8 @@ class BufferedFileReader : public ReaderInterface { */ explicit BufferedFileReader(ReaderInterface& reader_interface, size_t base_buffer_size); - BufferedFileReader(ReaderInterface& reader_interface) : BufferedFileReader(reader_interface, cDefaultBufferSize) {} + BufferedFileReader(ReaderInterface& reader_interface) + : BufferedFileReader(reader_interface, cDefaultBufferSize) {} // Disable copy/move construction/assignment BufferedFileReader(BufferedFileReader const&) = delete; From 71920228232233941671d3f78da793463fa0709c Mon Sep 17 00:00:00 2001 From: Haiqi Xu <14502009+haiqi96@users.noreply.github.com> Date: Thu, 8 Aug 2024 17:54:38 -0400 Subject: [PATCH 15/62] Factor out duplicated function --- .../core/src/clp/clp/FileCompressor.cpp | 95 +++++++++++-------- .../core/src/clp/clp/FileCompressor.hpp | 11 +++ 2 files changed, 64 insertions(+), 42 deletions(-) diff --git a/components/core/src/clp/clp/FileCompressor.cpp b/components/core/src/clp/clp/FileCompressor.cpp index 3a3375866..e338b1f62 100644 --- a/components/core/src/clp/clp/FileCompressor.cpp +++ b/components/core/src/clp/clp/FileCompressor.cpp @@ -149,27 +149,16 @@ bool FileCompressor::compress_file( bool succeeded = true; auto const utf8_validation_buf_len = std::min(peek_size, cUtfMaxValidationLen); if (is_utf8_encoded({utf8_validation_buf, utf8_validation_buf_len})) { - if (use_heuristic) { - parse_and_encode_with_heuristic( - target_data_size_of_dicts, - archive_user_config, - target_encoded_file_size, - file_to_compress.get_path_for_compression(), - file_to_compress.get_group_id(), - archive_writer, - buffered_file_reader - ); - } else { - parse_and_encode_with_library( - target_data_size_of_dicts, - archive_user_config, - target_encoded_file_size, - file_to_compress.get_path_for_compression(), - file_to_compress.get_group_id(), - archive_writer, - buffered_file_reader - ); - } + parse_and_encode( + target_data_size_of_dicts, + archive_user_config, + target_encoded_file_size, + file_to_compress.get_path_for_compression(), + file_to_compress.get_group_id(), + archive_writer, + buffered_file_reader, + use_heuristic + ); } else { if (false == try_compressing_as_archive( @@ -193,6 +182,39 @@ bool FileCompressor::compress_file( return succeeded; } +auto FileCompressor::parse_and_encode( + size_t target_data_size_of_dicts, + streaming_archive::writer::Archive::UserConfig& archive_user_config, + size_t target_encoded_file_size, + string const& path_for_compression, + group_id_t group_id, + streaming_archive::writer::Archive& archive_writer, + ReaderInterface& reader, + bool use_heuristic +) -> void { + if (use_heuristic) { + parse_and_encode_with_heuristic( + target_data_size_of_dicts, + archive_user_config, + target_encoded_file_size, + path_for_compression, + group_id, + archive_writer, + reader + ); + } else { + parse_and_encode_with_library( + target_data_size_of_dicts, + archive_user_config, + target_encoded_file_size, + path_for_compression, + group_id, + archive_writer, + reader + ); + } +} + void FileCompressor::parse_and_encode_with_library( size_t target_data_size_of_dicts, streaming_archive::writer::Archive::UserConfig& archive_user_config, @@ -364,27 +386,16 @@ bool FileCompressor::try_compressing_as_archive( auto const utf8_validation_buf_len = std::min(peek_size, cUtfMaxValidationLen); if (is_utf8_encoded({utf8_validation_buf, utf8_validation_buf_len})) { auto boost_path_for_compression = parent_boost_path / file_path; - if (use_heuristic) { - parse_and_encode_with_heuristic( - target_data_size_of_dicts, - archive_user_config, - target_encoded_file_size, - boost_path_for_compression.string(), - file_to_compress.get_group_id(), - archive_writer, - m_libarchive_file_reader - ); - } else { - parse_and_encode_with_library( - target_data_size_of_dicts, - archive_user_config, - target_encoded_file_size, - boost_path_for_compression.string(), - file_to_compress.get_group_id(), - archive_writer, - m_libarchive_file_reader - ); - } + parse_and_encode( + target_data_size_of_dicts, + archive_user_config, + target_encoded_file_size, + boost_path_for_compression.string(), + file_to_compress.get_group_id(), + archive_writer, + m_libarchive_file_reader, + use_heuristic + ); } else if (has_ir_stream_magic_number({utf8_validation_buf, peek_size})) { // Remove .clp suffix if found static constexpr char cIrStreamExtension[] = ".clp"; diff --git a/components/core/src/clp/clp/FileCompressor.hpp b/components/core/src/clp/clp/FileCompressor.hpp index bfd629580..b857ca2f9 100644 --- a/components/core/src/clp/clp/FileCompressor.hpp +++ b/components/core/src/clp/clp/FileCompressor.hpp @@ -54,6 +54,17 @@ class FileCompressor { static constexpr size_t cUtfMaxValidationLen = 4096; // Methods + auto parse_and_encode( + size_t target_data_size_of_dicts, + streaming_archive::writer::Archive::UserConfig& archive_user_config, + size_t target_encoded_file_size, + std::string const& path_for_compression, + group_id_t group_id, + streaming_archive::writer::Archive& archive_writer, + ReaderInterface& reader, + bool use_heuristic + ) -> void; + /** * Parses and encodes content from the given reader into the given archive_writer * @param target_data_size_of_dicts From f6b6757908e26105dc4c1d63d0f95203d30688fe Mon Sep 17 00:00:00 2001 From: Haiqi Xu <14502009+haiqi96@users.noreply.github.com> Date: Fri, 9 Aug 2024 17:06:49 -0400 Subject: [PATCH 16/62] Add sysfile Reader --- components/core/src/clp/SysFileReader.cpp | 95 +++++++++++++++++++++++ components/core/src/clp/SysFileReader.hpp | 85 ++++++++++++++++++++ 2 files changed, 180 insertions(+) create mode 100644 components/core/src/clp/SysFileReader.cpp create mode 100644 components/core/src/clp/SysFileReader.hpp diff --git a/components/core/src/clp/SysFileReader.cpp b/components/core/src/clp/SysFileReader.cpp new file mode 100644 index 000000000..d69bd3395 --- /dev/null +++ b/components/core/src/clp/SysFileReader.cpp @@ -0,0 +1,95 @@ +#include "SysFileReader.hpp" + +#include +#include +#include +#include + +#include +#include + +#include + +using std::string; + +namespace clp { +SysFileReader::SysFileReader(string const& path) : m_fd{::open(path.c_str(), O_RDONLY)} { + if (-1 == m_fd) { + if (ENOENT == errno) { + throw OperationFailed(ErrorCode_FileNotFound, __FILE__, __LINE__); + } + throw OperationFailed(ErrorCode_errno, __FILE__, __LINE__); + } + m_path = path; +} + +SysFileReader::~SysFileReader() { + if (-1 != m_fd) { + // NOTE: We don't check errors for fclose since it seems the only reason it could fail is + // if it was interrupted by a signal + ::close(m_fd); + } +} + +SysFileReader::SysFileReader(SysFileReader&& other) + : m_fd(other.m_fd), + m_path(std::move(other.m_path)) { + if (-1 != other.m_fd) { + other.m_fd = -1; + } +} + +auto SysFileReader::try_read(char* buf, size_t num_bytes_to_read, size_t& num_bytes_read) + -> ErrorCode { + if (nullptr == buf) { + return ErrorCode_BadParam; + } + + num_bytes_read = 0; + while (true) { + auto const bytes_read = ::read(m_fd, buf, num_bytes_to_read); + if (0 == bytes_read) { + break; + } + if (bytes_read < 0) { + return ErrorCode_errno; + } + + buf += bytes_read; + num_bytes_read += bytes_read; + num_bytes_to_read -= bytes_read; + if (num_bytes_read == num_bytes_to_read) { + return ErrorCode_Success; + } + } + if (0 == num_bytes_read) { + return ErrorCode_EndOfFile; + } + return ErrorCode_Success; +} + +auto SysFileReader::try_seek_from_begin(size_t pos) -> ErrorCode { + if (auto const offset = lseek(m_fd, static_cast(pos), SEEK_SET); -1 == offset) { + return ErrorCode_errno; + } + + return ErrorCode_Success; +} + +auto SysFileReader::try_get_pos(size_t& pos) -> ErrorCode { + pos = lseek(m_fd, 0, SEEK_CUR); + if ((off_t)-1 == pos) { + return ErrorCode_errno; + } + + return ErrorCode_Success; +} + +auto SysFileReader::try_fstat(struct stat& stat_buffer) const -> ErrorCode { + auto return_value = fstat(m_fd, &stat_buffer); + if (0 != return_value) { + return ErrorCode_errno; + } + return ErrorCode_Success; +} +} // namespace clp diff --git a/components/core/src/clp/SysFileReader.hpp b/components/core/src/clp/SysFileReader.hpp new file mode 100644 index 000000000..d475dfb77 --- /dev/null +++ b/components/core/src/clp/SysFileReader.hpp @@ -0,0 +1,85 @@ +#ifndef CLP_SYSFILEREADER_HPP +#define CLP_SYSFILEREADER_HPP + +#include + +#include +#include + +#include "Defs.h" +#include "ErrorCode.hpp" +#include "ReaderInterface.hpp" +#include "TraceableException.hpp" + +namespace clp { +class SysFileReader : public ReaderInterface { +public: + // Types + class OperationFailed : public TraceableException { + public: + // Constructors + OperationFailed(ErrorCode error_code, char const* const filename, int line_number) + : TraceableException(error_code, filename, line_number) {} + + // Methods + char const* what() const noexcept override { return "FileReader operation failed"; } + }; + + SysFileReader(std::string const& path); + + ~SysFileReader(); + + // Explicitly disable copy constructor and assignment operator + SysFileReader(SysFileReader const&) = delete; + SysFileReader& operator=(SysFileReader const&) = delete; + + // Move constructor and assignment operator + SysFileReader(SysFileReader&&); + SysFileReader& operator=(SysFileReader&&) = delete; + + // Methods implementing the ReaderInterface + /** + * Tries to get the current position of the read head in the file + * @param pos Position of the read head in the file + * @return ErrorCode_errno on error + * @return ErrorCode_Success on success + */ + [[nodiscard]] auto try_get_pos(size_t& pos) -> ErrorCode override; + /** + * Tries to seek from the beginning of the file to the given position + * @param pos + * @return ErrorCode_errno on error + * @return ErrorCode_Success on success + */ + [[nodiscard]] auto try_seek_from_begin(size_t pos) -> ErrorCode override; + + /** + * Tries to read up to a given number of bytes from the file + * @param buf + * @param num_bytes_to_read The number of bytes to try and read + * @param num_bytes_read The actual number of bytes read + * @return ErrorCode_BadParam if buf is invalid + * @return ErrorCode_errno on error + * @return ErrorCode_EndOfFile on EOF + * @return ErrorCode_Success on success + */ + [[nodiscard]] auto + try_read(char* buf, size_t num_bytes_to_read, size_t& num_bytes_read) -> ErrorCode override; + + [[nodiscard]] auto get_path() const -> std::string_view { return m_path; } + + /** + * Tries to stat the current file + * @param stat_buffer + * @return ErrorCode_errno on error + * @return ErrorCode_Success on success + */ + [[nodiscard]] auto try_fstat(struct stat& stat_buffer) const -> ErrorCode; + +private: + int m_fd{-1}; + std::string m_path; +}; +} // namespace clp + +#endif // CLP_SYSFILEREADER_HPP From 712d10bef0abf3a27b98720a304ae51642318a39 Mon Sep 17 00:00:00 2001 From: Haiqi Xu <14502009+haiqi96@users.noreply.github.com> Date: Fri, 9 Aug 2024 17:07:20 -0400 Subject: [PATCH 17/62] Update BufferedFileReader to use move instead of reference --- components/core/CMakeLists.txt | 2 ++ .../core/src/clp/BufferedFileReader.cpp | 20 +++++++----- .../core/src/clp/BufferedFileReader.hpp | 11 ++++--- components/core/src/clp/clp/CMakeLists.txt | 2 ++ .../core/src/clp/clp/FileCompressor.cpp | 10 +++--- .../core/src/clp/clp/FileCompressor.hpp | 16 +++++----- .../core/tests/test-BufferedFileReader.cpp | 32 +++++++++++-------- 7 files changed, 55 insertions(+), 38 deletions(-) diff --git a/components/core/CMakeLists.txt b/components/core/CMakeLists.txt index f0cd327ef..18eb2edca 100644 --- a/components/core/CMakeLists.txt +++ b/components/core/CMakeLists.txt @@ -446,6 +446,8 @@ set(SOURCE_FILES_unitTest src/clp/streaming_compression/zstd/Decompressor.hpp src/clp/StringReader.cpp src/clp/StringReader.hpp + src/clp/SysFileReader.cpp + src/clp/SysFileReader.hpp src/clp/Thread.cpp src/clp/Thread.hpp src/clp/time_types.hpp diff --git a/components/core/src/clp/BufferedFileReader.cpp b/components/core/src/clp/BufferedFileReader.cpp index ecab4ce8e..74275a01a 100644 --- a/components/core/src/clp/BufferedFileReader.cpp +++ b/components/core/src/clp/BufferedFileReader.cpp @@ -6,6 +6,7 @@ #include +#include "../glt/streaming_archive/writer/Archive.hpp" #include "math_utils.hpp" using std::string; @@ -23,20 +24,20 @@ namespace { * @return ErrorCode_Success on success */ auto read_into_buffer( - ReaderInterface& reader, + ReaderInterface* reader, char* buf, size_t num_bytes_to_read, size_t& num_bytes_read ) -> ErrorCode; auto read_into_buffer( - ReaderInterface& reader, + ReaderInterface* reader, char* buf, size_t num_bytes_to_read, size_t& num_bytes_read ) -> ErrorCode { num_bytes_read = 0; - auto const error_code = reader.try_read(buf, num_bytes_to_read, num_bytes_read); + auto const error_code = reader->try_read(buf, num_bytes_to_read, num_bytes_read); if (0 == num_bytes_read) { return ErrorCode_EndOfFile; } @@ -44,8 +45,11 @@ auto read_into_buffer( } } // namespace -BufferedFileReader::BufferedFileReader(ReaderInterface& reader_interface, size_t base_buffer_size) - : m_file_reader{reader_interface} { +BufferedFileReader::BufferedFileReader( + std::unique_ptr reader_interface, + size_t base_buffer_size +) + : m_file_reader(std::move(reader_interface)) { if (base_buffer_size % cMinBufferSize != 0) { throw OperationFailed(ErrorCode_BadParam, __FILENAME__, __LINE__); } @@ -53,7 +57,7 @@ BufferedFileReader::BufferedFileReader(ReaderInterface& reader_interface, size_t m_buffer.resize(m_base_buffer_size); // TODO: anyway to get rid of this? - if (0 != m_file_reader.get_pos()) { + if (0 != m_file_reader->get_pos()) { throw OperationFailed(ErrorCode_Failure, __FILENAME__, __LINE__); } m_file_pos = 0; @@ -133,7 +137,7 @@ auto BufferedFileReader::try_seek_from_begin(size_t pos) -> ErrorCode { if (false == m_checkpoint_pos.has_value()) { // If checkpoint is not set, simply move the file_pos and invalidate // the buffer reader - if (auto const error_code = m_file_reader.try_seek_from_begin(pos); + if (auto const error_code = m_file_reader->try_seek_from_begin(pos); error_code != ErrorCode_Success) { return error_code; @@ -275,7 +279,7 @@ auto BufferedFileReader::refill_reader_buffer(size_t num_bytes_to_refill) -> Err size_t num_bytes_read{0}; auto error_code = read_into_buffer( - m_file_reader, + m_file_reader.get(), &m_buffer[next_buffer_pos], num_bytes_to_read, num_bytes_read diff --git a/components/core/src/clp/BufferedFileReader.hpp b/components/core/src/clp/BufferedFileReader.hpp index 53c9ba0dc..56b0c2b08 100644 --- a/components/core/src/clp/BufferedFileReader.hpp +++ b/components/core/src/clp/BufferedFileReader.hpp @@ -73,10 +73,13 @@ class BufferedFileReader : public ReaderInterface { * @param base_buffer_size The size for the fixed-size buffer used when no checkpoint is set. It * must be a multiple of BufferedFileReader::cMinBufferSize. */ - explicit BufferedFileReader(ReaderInterface& reader_interface, size_t base_buffer_size); + explicit BufferedFileReader( + std::unique_ptr reader_interface, + size_t base_buffer_size + ); - BufferedFileReader(ReaderInterface& reader_interface) - : BufferedFileReader(reader_interface, cDefaultBufferSize) {} + BufferedFileReader(std::unique_ptr reader_interface) + : BufferedFileReader(std::move(reader_interface), cDefaultBufferSize) {} // Disable copy/move construction/assignment BufferedFileReader(BufferedFileReader const&) = delete; @@ -226,7 +229,7 @@ class BufferedFileReader : public ReaderInterface { size_t m_file_pos{0}; // ReaderInterfaceSpecific - ReaderInterface& m_file_reader; + std::unique_ptr m_file_reader; // Buffer specific data std::vector m_buffer; diff --git a/components/core/src/clp/clp/CMakeLists.txt b/components/core/src/clp/clp/CMakeLists.txt index f3a74ac78..e33addc74 100644 --- a/components/core/src/clp/clp/CMakeLists.txt +++ b/components/core/src/clp/clp/CMakeLists.txt @@ -128,6 +128,8 @@ set( ../streaming_compression/zstd/Decompressor.hpp ../StringReader.cpp ../StringReader.hpp + ../SysFileReader.cpp + ../SysFileReader.hpp ../time_types.hpp ../TimestampPattern.cpp ../TimestampPattern.hpp diff --git a/components/core/src/clp/clp/FileCompressor.cpp b/components/core/src/clp/clp/FileCompressor.cpp index e338b1f62..c676e93a8 100644 --- a/components/core/src/clp/clp/FileCompressor.cpp +++ b/components/core/src/clp/clp/FileCompressor.cpp @@ -32,8 +32,11 @@ using log_surgeon::Reader; using log_surgeon::ReaderParser; using std::cout; using std::endl; +using std::make_unique; +using std::move; using std::set; using std::string; +using std::unique_ptr; using std::vector; // Local prototypes @@ -116,13 +119,12 @@ bool FileCompressor::compress_file( streaming_archive::writer::Archive& archive_writer, bool use_heuristic ) { - std::string file_name = std::filesystem::canonical(file_to_compress.get_path()).string(); + string file_name = std::filesystem::canonical(file_to_compress.get_path()).string(); PROFILER_SPDLOG_INFO("Start parsing {}", file_name) Profiler::start_continuous_measurement(); - FileReader file_reader{file_to_compress.get_path()}; - BufferedFileReader buffered_file_reader{file_reader}; + BufferedFileReader buffered_file_reader(make_unique(file_to_compress.get_path())); // Check that file is UTF-8 encoded if (auto error_code = buffered_file_reader.try_refill_buffer_if_empty(); @@ -304,7 +306,7 @@ bool FileCompressor::try_compressing_as_archive( // Determine path without extension (used if file is a single compressed file, e.g., syslog.gz // -> syslog) - std::string filename_if_compressed; + string filename_if_compressed; if (file_boost_path.has_stem()) { filename_if_compressed = file_boost_path.stem().string(); } else { diff --git a/components/core/src/clp/clp/FileCompressor.hpp b/components/core/src/clp/clp/FileCompressor.hpp index b857ca2f9..24949fb99 100644 --- a/components/core/src/clp/clp/FileCompressor.hpp +++ b/components/core/src/clp/clp/FileCompressor.hpp @@ -55,14 +55,14 @@ class FileCompressor { // Methods auto parse_and_encode( - size_t target_data_size_of_dicts, - streaming_archive::writer::Archive::UserConfig& archive_user_config, - size_t target_encoded_file_size, - std::string const& path_for_compression, - group_id_t group_id, - streaming_archive::writer::Archive& archive_writer, - ReaderInterface& reader, - bool use_heuristic + size_t target_data_size_of_dicts, + streaming_archive::writer::Archive::UserConfig& archive_user_config, + size_t target_encoded_file_size, + std::string const& path_for_compression, + group_id_t group_id, + streaming_archive::writer::Archive& archive_writer, + ReaderInterface& reader, + bool use_heuristic ) -> void; /** diff --git a/components/core/tests/test-BufferedFileReader.cpp b/components/core/tests/test-BufferedFileReader.cpp index d236e48e2..862677870 100644 --- a/components/core/tests/test-BufferedFileReader.cpp +++ b/components/core/tests/test-BufferedFileReader.cpp @@ -6,37 +6,42 @@ #include "../src/clp/BufferedFileReader.hpp" #include "../src/clp/FileReader.hpp" #include "../src/clp/FileWriter.hpp" +#include "../src/clp/SysFileReader.hpp" using clp::BufferedFileReader; +using clp::ErrorCode; using clp::ErrorCode_EndOfFile; using clp::ErrorCode_Success; using clp::ErrorCode_Unsupported; using clp::FileReader; using clp::FileWriter; +using clp::ReaderInterface; +using clp::SysFileReader; +using std::make_unique; +using std::string; static constexpr size_t cNumAlphabets = 'z' - 'a'; TEST_CASE("Test reading data", "[BufferedFileReader]") { // Initialize data for testing size_t const test_data_size = 4L * 1024 * 1024 + 1; // 4MB + 1 - auto test_data_uniq_ptr = std::make_unique>(); + auto test_data_uniq_ptr = make_unique>(); auto& test_data = *test_data_uniq_ptr; for (size_t i = 0; i < test_data.size(); ++i) { test_data[i] = static_cast('a' + (i % (cNumAlphabets))); } - std::string const test_file_path{"BufferedFileReader.test"}; + string const test_file_path{"BufferedFileReader.test"}; // Write to test file FileWriter file_writer; file_writer.open(test_file_path, FileWriter::OpenMode::CREATE_FOR_WRITING); file_writer.write(test_data.cbegin(), test_data_size); file_writer.close(); - auto read_buf_uniq_ptr = std::make_unique>(); + auto read_buf_uniq_ptr = make_unique>(); auto& read_buf = *read_buf_uniq_ptr; size_t const base_buffer_size = BufferedFileReader::cMinBufferSize << 4; - FileReader file_reader{test_file_path}; - BufferedFileReader reader{file_reader, base_buffer_size}; + BufferedFileReader reader(make_unique(test_file_path), base_buffer_size); size_t num_bytes_read{0}; size_t buf_pos{0}; @@ -260,33 +265,32 @@ TEST_CASE("Test reading data", "[BufferedFileReader]") { TEST_CASE("Test delimiter", "[BufferedFileReader]") { // Initialize data for testing size_t const test_data_size = 1L * 1024 * 1024 + 1; // 1MB - auto test_data_uniq_ptr = std::make_unique>(); + auto test_data_uniq_ptr = make_unique>(); auto& test_data = *test_data_uniq_ptr; for (size_t i = 0; i < test_data.size(); ++i) { test_data[i] = static_cast('a' + (std::rand() % (cNumAlphabets))); } // Write to test file - std::string const test_file_path{"BufferedFileReader.delimiter.test"}; + string const test_file_path{"BufferedFileReader.delimiter.test"}; FileWriter file_writer; file_writer.open(test_file_path, FileWriter::OpenMode::CREATE_FOR_WRITING); file_writer.write(test_data.data(), test_data_size); file_writer.close(); - FileReader file_reader{test_file_path}; - BufferedFileReader reader{file_reader}; - std::string test_string; + BufferedFileReader reader(make_unique(test_file_path)); + string test_string; - clp::FileReader ref_file_reader{test_file_path}; - std::string ref_string; + FileReader ref_file_reader{test_file_path}; + string ref_string; // Validate that a FileReader and a BufferedFileReader return the same strings (split by // delimiters) - clp::ErrorCode error_code{ErrorCode_Success}; + ErrorCode error_code{ErrorCode_Success}; auto delimiter = (char)('a' + (std::rand() % (cNumAlphabets))); while (ErrorCode_EndOfFile != error_code) { error_code = ref_file_reader.try_read_to_delimiter(delimiter, true, false, ref_string); - auto error_code2 = file_reader.try_read_to_delimiter(delimiter, true, false, test_string); + auto error_code2 = reader.try_read_to_delimiter(delimiter, true, false, test_string); REQUIRE(error_code2 == error_code); REQUIRE(test_string == ref_string); } From 80532eacba33c4115ab1d7c8ae346349fd553fa5 Mon Sep 17 00:00:00 2001 From: Haiqi Xu <14502009+haiqi96@users.noreply.github.com> Date: Fri, 9 Aug 2024 17:28:10 -0400 Subject: [PATCH 18/62] FileDescriptor update --- components/core/src/clp/FileDescriptor.cpp | 7 +++++ components/core/src/clp/FileDescriptor.hpp | 6 ++-- components/core/src/clp/SysFileReader.cpp | 36 +++++----------------- components/core/src/clp/SysFileReader.hpp | 13 ++++---- 4 files changed, 25 insertions(+), 37 deletions(-) diff --git a/components/core/src/clp/FileDescriptor.cpp b/components/core/src/clp/FileDescriptor.cpp index 2e17bfe05..31eb906a9 100644 --- a/components/core/src/clp/FileDescriptor.cpp +++ b/components/core/src/clp/FileDescriptor.cpp @@ -47,6 +47,13 @@ FileDescriptor::~FileDescriptor() { } } +FileDescriptor::FileDescriptor(FileDescriptor&& other) + : m_fd{other.m_fd}, + m_open_mode{other.m_open_mode}, + m_close_failure_callback{other.m_close_failure_callback} { + other.m_fd = -1; +} + auto FileDescriptor::get_size() const -> size_t { struct stat stat_result {}; diff --git a/components/core/src/clp/FileDescriptor.hpp b/components/core/src/clp/FileDescriptor.hpp index a704326bf..c1784cf33 100644 --- a/components/core/src/clp/FileDescriptor.hpp +++ b/components/core/src/clp/FileDescriptor.hpp @@ -62,9 +62,11 @@ class FileDescriptor { // Destructor ~FileDescriptor(); - // Disable copy/move constructors/assignment operators + // Move constructor + FileDescriptor(FileDescriptor&&); + + // Disable copy constructors and move/copy assignment operators FileDescriptor(FileDescriptor const&) = delete; - FileDescriptor(FileDescriptor&&) = delete; auto operator=(FileDescriptor const&) -> FileDescriptor& = delete; auto operator=(FileDescriptor&&) -> FileDescriptor& = delete; diff --git a/components/core/src/clp/SysFileReader.cpp b/components/core/src/clp/SysFileReader.cpp index d69bd3395..6c97e3957 100644 --- a/components/core/src/clp/SysFileReader.cpp +++ b/components/core/src/clp/SysFileReader.cpp @@ -13,31 +13,9 @@ using std::string; namespace clp { -SysFileReader::SysFileReader(string const& path) : m_fd{::open(path.c_str(), O_RDONLY)} { - if (-1 == m_fd) { - if (ENOENT == errno) { - throw OperationFailed(ErrorCode_FileNotFound, __FILE__, __LINE__); - } - throw OperationFailed(ErrorCode_errno, __FILE__, __LINE__); - } - m_path = path; -} - -SysFileReader::~SysFileReader() { - if (-1 != m_fd) { - // NOTE: We don't check errors for fclose since it seems the only reason it could fail is - // if it was interrupted by a signal - ::close(m_fd); - } -} - SysFileReader::SysFileReader(SysFileReader&& other) - : m_fd(other.m_fd), - m_path(std::move(other.m_path)) { - if (-1 != other.m_fd) { - other.m_fd = -1; - } -} + : m_fd{std::move(other.m_fd)}, + m_path{std::move(other.m_path)} {} auto SysFileReader::try_read(char* buf, size_t num_bytes_to_read, size_t& num_bytes_read) -> ErrorCode { @@ -47,7 +25,7 @@ auto SysFileReader::try_read(char* buf, size_t num_bytes_to_read, size_t& num_by num_bytes_read = 0; while (true) { - auto const bytes_read = ::read(m_fd, buf, num_bytes_to_read); + auto const bytes_read = ::read(m_fd.get_raw_fd(), buf, num_bytes_to_read); if (0 == bytes_read) { break; } @@ -69,7 +47,9 @@ auto SysFileReader::try_read(char* buf, size_t num_bytes_to_read, size_t& num_by } auto SysFileReader::try_seek_from_begin(size_t pos) -> ErrorCode { - if (auto const offset = lseek(m_fd, static_cast(pos), SEEK_SET); -1 == offset) { + if (auto const offset = lseek(m_fd.get_raw_fd(), static_cast(pos), SEEK_SET); + -1 == offset) + { return ErrorCode_errno; } @@ -77,7 +57,7 @@ auto SysFileReader::try_seek_from_begin(size_t pos) -> ErrorCode { } auto SysFileReader::try_get_pos(size_t& pos) -> ErrorCode { - pos = lseek(m_fd, 0, SEEK_CUR); + pos = lseek(m_fd.get_raw_fd(), 0, SEEK_CUR); if ((off_t)-1 == pos) { return ErrorCode_errno; } @@ -86,7 +66,7 @@ auto SysFileReader::try_get_pos(size_t& pos) -> ErrorCode { } auto SysFileReader::try_fstat(struct stat& stat_buffer) const -> ErrorCode { - auto return_value = fstat(m_fd, &stat_buffer); + auto return_value = fstat(m_fd.get_raw_fd(), &stat_buffer); if (0 != return_value) { return ErrorCode_errno; } diff --git a/components/core/src/clp/SysFileReader.hpp b/components/core/src/clp/SysFileReader.hpp index d475dfb77..a60f12c58 100644 --- a/components/core/src/clp/SysFileReader.hpp +++ b/components/core/src/clp/SysFileReader.hpp @@ -3,11 +3,10 @@ #include -#include -#include +#include -#include "Defs.h" #include "ErrorCode.hpp" +#include "FileDescriptor.hpp" #include "ReaderInterface.hpp" #include "TraceableException.hpp" @@ -25,9 +24,9 @@ class SysFileReader : public ReaderInterface { char const* what() const noexcept override { return "FileReader operation failed"; } }; - SysFileReader(std::string const& path); - - ~SysFileReader(); + SysFileReader(std::string_view const& path) + : m_fd{path, FileDescriptor::OpenMode::ReadOnly}, + m_path{path} {} // Explicitly disable copy constructor and assignment operator SysFileReader(SysFileReader const&) = delete; @@ -77,7 +76,7 @@ class SysFileReader : public ReaderInterface { [[nodiscard]] auto try_fstat(struct stat& stat_buffer) const -> ErrorCode; private: - int m_fd{-1}; + FileDescriptor m_fd; std::string m_path; }; } // namespace clp From 4dabc96ce7415354b97977d934cf9bb7bd55749f Mon Sep 17 00:00:00 2001 From: Haiqi Xu <14502009+haiqi96@users.noreply.github.com> Date: Mon, 12 Aug 2024 18:11:25 -0400 Subject: [PATCH 19/62] Revert unintended changes --- components/core/src/clp/DictionaryReader.hpp | 110 ++++++++---------- components/core/src/clp/DictionaryWriter.hpp | 5 +- .../core/src/clp/LogTypeDictionaryWriter.cpp | 2 + .../core/src/clp/VariableDictionaryWriter.cpp | 1 + components/core/src/clp/clg/CMakeLists.txt | 2 + components/core/src/clp/clo/CMakeLists.txt | 2 + components/core/src/clp/clp/CMakeLists.txt | 2 + components/core/src/clp/clp/decompression.cpp | 1 + components/core/src/clp/dictionary_utils.cpp | 55 +++++++++ components/core/src/clp/dictionary_utils.hpp | 26 +++++ .../make_dictionaries_readable/CMakeLists.txt | 2 + .../clp/streaming_archive/reader/Archive.cpp | 5 + .../clp/streaming_archive/reader/Archive.hpp | 1 + 13 files changed, 152 insertions(+), 62 deletions(-) create mode 100644 components/core/src/clp/dictionary_utils.cpp create mode 100644 components/core/src/clp/dictionary_utils.hpp diff --git a/components/core/src/clp/DictionaryReader.hpp b/components/core/src/clp/DictionaryReader.hpp index 437d24b69..5b8a007a1 100644 --- a/components/core/src/clp/DictionaryReader.hpp +++ b/components/core/src/clp/DictionaryReader.hpp @@ -7,6 +7,7 @@ #include #include +#include "dictionary_utils.hpp" #include "DictionaryEntry.hpp" #include "FileReader.hpp" #include "streaming_compression/passthrough/Decompressor.hpp" @@ -53,6 +54,11 @@ class DictionaryReader { */ void close(); + /** + * Reads any new entries from disk + */ + void read_new_entries(); + /** * Gets the dictionary's entries * @return All dictionary entries @@ -95,21 +101,23 @@ class DictionaryReader { protected: // Methods /** - * Reads a segment's worth of IDs from the segment index from the `segment_index_decompressor` - * @param segment_index_decompressor - */ - void read_segment_ids(streaming_compression::Decompressor& segment_index_decompressor); - - /** - * Reads any new entries from disk - * @param dictionary_path - * @param segment_index_path + * Reads a segment's worth of IDs from the segment index */ - void - read_new_entries(std::string const& dictionary_path, std::string const& segment_index_path); + void read_segment_ids(); // Variables bool m_is_open; + std::unique_ptr m_dictionary_file_reader; + std::unique_ptr m_segment_index_file_reader; +#if USE_PASSTHROUGH_COMPRESSION + streaming_compression::passthrough::Decompressor m_dictionary_decompressor; + streaming_compression::passthrough::Decompressor m_segment_index_decompressor; +#elif USE_ZSTD_COMPRESSION + streaming_compression::zstd::Decompressor m_dictionary_decompressor; + streaming_compression::zstd::Decompressor m_segment_index_decompressor; +#else + static_assert(false, "Unsupported compression mode."); +#endif size_t m_num_segments_read_from_index; std::vector m_entries; }; @@ -123,7 +131,17 @@ void DictionaryReader::open( throw OperationFailed(ErrorCode_NotReady, __FILENAME__, __LINE__); } - read_new_entries(dictionary_path, segment_index_path); + constexpr size_t cDecompressorFileReadBufferCapacity = 64 * 1024; // 64 KB + + open_dictionary_for_reading( + dictionary_path, + segment_index_path, + cDecompressorFileReadBufferCapacity, + m_dictionary_file_reader, + m_dictionary_decompressor, + m_segment_index_file_reader, + m_segment_index_decompressor + ); m_is_open = true; } @@ -134,6 +152,11 @@ void DictionaryReader::close() { throw OperationFailed(ErrorCode_NotInit, __FILENAME__, __LINE__); } + m_segment_index_decompressor.close(); + m_segment_index_file_reader.reset(); + m_dictionary_decompressor.close(); + m_dictionary_file_reader.reset(); + m_num_segments_read_from_index = 0; m_entries.clear(); @@ -141,20 +164,13 @@ void DictionaryReader::close() { } template -void DictionaryReader::read_new_entries( - std::string const& dictionary_path, - std::string const& segment_index_path -) { - constexpr size_t cDecompressorFileReadBufferCapacity = 64 * 1024; // 64 KB - - FileReader dictionary_file_reader{dictionary_path}; - FileReader segment_index_file_reader{segment_index_path}; +void DictionaryReader::read_new_entries() { + if (false == m_is_open) { + throw OperationFailed(ErrorCode_NotInit, __FILENAME__, __LINE__); + } // Read dictionary header - uint64_t num_dictionary_entries{}; - if (false == dictionary_file_reader.read_numeric_value(num_dictionary_entries, false)) { - throw OperationFailed(ErrorCode_Failure, __FILENAME__, __LINE__); - } + auto num_dictionary_entries = read_dictionary_header(*m_dictionary_file_reader); // Validate dictionary header if (num_dictionary_entries < m_entries.size()) { @@ -166,27 +182,15 @@ void DictionaryReader::read_new_entries( auto prev_num_dictionary_entries = m_entries.size(); m_entries.resize(num_dictionary_entries); -#if USE_PASSTHROUGH_COMPRESSION - streaming_compression::passthrough::Decompressor m_dictionary_decompressor; -#elif USE_ZSTD_COMPRESSION - streaming_compression::zstd::Decompressor dictionary_decompressor; -#else - static_assert(false, "Unsupported compression mode."); -#endif - dictionary_decompressor.open(dictionary_file_reader, cDecompressorFileReadBufferCapacity); for (size_t i = prev_num_dictionary_entries; i < num_dictionary_entries; ++i) { auto& entry = m_entries[i]; - entry.read_from_file(dictionary_decompressor); + entry.read_from_file(m_dictionary_decompressor); } - dictionary_decompressor.close(); } // Read segment index header - uint64_t num_segments{}; - if (false == segment_index_file_reader.read_numeric_value(num_segments, false)) { - throw OperationFailed(ErrorCode_Failure, __FILENAME__, __LINE__); - } + auto num_segments = read_segment_index_header(*m_segment_index_file_reader); // Validate segment index header if (num_segments < m_num_segments_read_from_index) { @@ -195,21 +199,9 @@ void DictionaryReader::read_new_entries( // Read new segments from index if (num_segments > m_num_segments_read_from_index) { -#if USE_PASSTHROUGH_COMPRESSION - streaming_compression::passthrough::Decompressor m_segment_index_decompressor; -#elif USE_ZSTD_COMPRESSION - streaming_compression::zstd::Decompressor segment_index_decompressor; -#else - static_assert(false, "Unsupported compression mode."); -#endif - segment_index_decompressor.open( - segment_index_file_reader, - cDecompressorFileReadBufferCapacity - ); for (size_t i = m_num_segments_read_from_index; i < num_segments; ++i) { - read_segment_ids(segment_index_decompressor); + read_segment_ids(); } - segment_index_decompressor.close(); } } @@ -277,17 +269,15 @@ void DictionaryReader::get_entries_matching_wildcar } template -void DictionaryReader::read_segment_ids( - streaming_compression::Decompressor& segment_index_decompressor -) { - segment_id_t segment_id{}; - segment_index_decompressor.read_numeric_value(segment_id, false); +void DictionaryReader::read_segment_ids() { + segment_id_t segment_id; + m_segment_index_decompressor.read_numeric_value(segment_id, false); - uint64_t num_ids{}; - segment_index_decompressor.read_numeric_value(num_ids, false); + uint64_t num_ids; + m_segment_index_decompressor.read_numeric_value(num_ids, false); for (uint64_t i = 0; i < num_ids; ++i) { - DictionaryIdType id{}; - segment_index_decompressor.read_numeric_value(id, false); + DictionaryIdType id; + m_segment_index_decompressor.read_numeric_value(id, false); if (id >= m_entries.size()) { throw OperationFailed(ErrorCode_Corrupt, __FILENAME__, __LINE__); } diff --git a/components/core/src/clp/DictionaryWriter.hpp b/components/core/src/clp/DictionaryWriter.hpp index b1f25a386..ccdfad5e6 100644 --- a/components/core/src/clp/DictionaryWriter.hpp +++ b/components/core/src/clp/DictionaryWriter.hpp @@ -4,10 +4,10 @@ #include #include #include -#include #include "ArrayBackedPosIntSet.hpp" #include "Defs.h" +#include "dictionary_utils.hpp" #include "FileWriter.hpp" #include "spdlog_with_specializations.hpp" #include "streaming_compression/passthrough/Compressor.hpp" @@ -240,10 +240,11 @@ void DictionaryWriter::open_and_preload( ; m_data_size += entry.get_data_size(); } - dictionary_decompressor.close(); m_next_id = num_dictionary_entries; + dictionary_decompressor.close(); + m_dictionary_file_writer.open( dictionary_path, FileWriter::OpenMode::CREATE_IF_NONEXISTENT_FOR_SEEKABLE_WRITING diff --git a/components/core/src/clp/LogTypeDictionaryWriter.cpp b/components/core/src/clp/LogTypeDictionaryWriter.cpp index e7170746b..4420b2789 100644 --- a/components/core/src/clp/LogTypeDictionaryWriter.cpp +++ b/components/core/src/clp/LogTypeDictionaryWriter.cpp @@ -1,5 +1,7 @@ #include "LogTypeDictionaryWriter.hpp" +#include "dictionary_utils.hpp" + using std::string; namespace clp { diff --git a/components/core/src/clp/VariableDictionaryWriter.cpp b/components/core/src/clp/VariableDictionaryWriter.cpp index 119ca0daf..77b063503 100644 --- a/components/core/src/clp/VariableDictionaryWriter.cpp +++ b/components/core/src/clp/VariableDictionaryWriter.cpp @@ -1,5 +1,6 @@ #include "VariableDictionaryWriter.hpp" +#include "dictionary_utils.hpp" #include "spdlog_with_specializations.hpp" namespace clp { diff --git a/components/core/src/clp/clg/CMakeLists.txt b/components/core/src/clp/clg/CMakeLists.txt index 3a598ac39..a0ca5e9d0 100644 --- a/components/core/src/clp/clg/CMakeLists.txt +++ b/components/core/src/clp/clg/CMakeLists.txt @@ -5,6 +5,8 @@ set( ../database_utils.cpp ../database_utils.hpp ../Defs.h + ../dictionary_utils.cpp + ../dictionary_utils.hpp ../DictionaryEntry.hpp ../DictionaryReader.hpp ../EncodedVariableInterpreter.cpp diff --git a/components/core/src/clp/clo/CMakeLists.txt b/components/core/src/clp/clo/CMakeLists.txt index 9754aaa33..931bffeaf 100644 --- a/components/core/src/clp/clo/CMakeLists.txt +++ b/components/core/src/clp/clo/CMakeLists.txt @@ -9,6 +9,8 @@ set( ../database_utils.cpp ../database_utils.hpp ../Defs.h + ../dictionary_utils.cpp + ../dictionary_utils.hpp ../DictionaryEntry.hpp ../DictionaryReader.hpp ../EncodedVariableInterpreter.cpp diff --git a/components/core/src/clp/clp/CMakeLists.txt b/components/core/src/clp/clp/CMakeLists.txt index f3a74ac78..53342f3a9 100644 --- a/components/core/src/clp/clp/CMakeLists.txt +++ b/components/core/src/clp/clp/CMakeLists.txt @@ -8,6 +8,8 @@ set( ../database_utils.cpp ../database_utils.hpp ../Defs.h + ../dictionary_utils.cpp + ../dictionary_utils.hpp ../DictionaryEntry.hpp ../DictionaryReader.hpp ../DictionaryWriter.hpp diff --git a/components/core/src/clp/clp/decompression.cpp b/components/core/src/clp/clp/decompression.cpp index 5a57e6a2a..e24ad8413 100644 --- a/components/core/src/clp/clp/decompression.cpp +++ b/components/core/src/clp/clp/decompression.cpp @@ -73,6 +73,7 @@ bool decompress( } archive_reader.open(archive_path.string()); + archive_reader.refresh_dictionaries(); archive_reader.decompress_empty_directories(command_line_args.get_output_dir()); diff --git a/components/core/src/clp/dictionary_utils.cpp b/components/core/src/clp/dictionary_utils.cpp new file mode 100644 index 000000000..7eedd98eb --- /dev/null +++ b/components/core/src/clp/dictionary_utils.cpp @@ -0,0 +1,55 @@ +#include "dictionary_utils.hpp" + +#include +#include "FileReader.hpp" +#include + +using std::unique_ptr; +using std::make_unique; +using std::move; + +namespace clp { +void open_dictionary_for_reading( + std::string const& dictionary_path, + std::string const& segment_index_path, + size_t decompressor_file_read_buffer_capacity, + unique_ptr& dictionary_file_reader, + streaming_compression::Decompressor& dictionary_decompressor, + unique_ptr& segment_index_file_reader, + streaming_compression::Decompressor& segment_index_decompressor +) { + dictionary_file_reader = move(make_unique(dictionary_path)); + // Skip header + dictionary_file_reader->seek_from_begin(sizeof(uint64_t)); + // Open decompressor + dictionary_decompressor.open(*dictionary_file_reader, decompressor_file_read_buffer_capacity); + + segment_index_file_reader = move(make_unique(segment_index_path)); + // Skip header + segment_index_file_reader->seek_from_begin(sizeof(uint64_t)); + // Open decompressor + segment_index_decompressor.open( + *segment_index_file_reader, + decompressor_file_read_buffer_capacity + ); +} + +uint64_t read_dictionary_header(FileReader& file_reader) { + auto dictionary_file_reader_pos = file_reader.get_pos(); + file_reader.seek_from_begin(0); + uint64_t num_dictionary_entries; + file_reader.read_numeric_value(num_dictionary_entries, false); + file_reader.seek_from_begin(dictionary_file_reader_pos); + return num_dictionary_entries; +} + +uint64_t read_segment_index_header(FileReader& file_reader) { + // Read segment index header + auto segment_index_file_reader_pos = file_reader.get_pos(); + file_reader.seek_from_begin(0); + uint64_t num_segments; + file_reader.read_numeric_value(num_segments, false); + file_reader.seek_from_begin(segment_index_file_reader_pos); + return num_segments; +} +} // namespace clp diff --git a/components/core/src/clp/dictionary_utils.hpp b/components/core/src/clp/dictionary_utils.hpp new file mode 100644 index 000000000..883d49bd2 --- /dev/null +++ b/components/core/src/clp/dictionary_utils.hpp @@ -0,0 +1,26 @@ +#ifndef CLP_DICTIONARY_UTILS_HPP +#define CLP_DICTIONARY_UTILS_HPP + +#include +#include + +#include "FileReader.hpp" +#include "streaming_compression/Decompressor.hpp" + +namespace clp { +void open_dictionary_for_reading( + std::string const& dictionary_path, + std::string const& segment_index_path, + size_t decompressor_file_read_buffer_capacity, + std::unique_ptr& dictionary_file_reader, + streaming_compression::Decompressor& dictionary_decompressor, + std::unique_ptr& segment_index_file_reader, + streaming_compression::Decompressor& segment_index_decompressor +); + +uint64_t read_dictionary_header(FileReader& file_reader); + +uint64_t read_segment_index_header(FileReader& file_reader); +} // namespace clp + +#endif // CLP_DICTIONARY_UTILS_HPP diff --git a/components/core/src/clp/make_dictionaries_readable/CMakeLists.txt b/components/core/src/clp/make_dictionaries_readable/CMakeLists.txt index 139b5e363..fd62a39fb 100644 --- a/components/core/src/clp/make_dictionaries_readable/CMakeLists.txt +++ b/components/core/src/clp/make_dictionaries_readable/CMakeLists.txt @@ -1,5 +1,7 @@ set( MAKE_DICTIONARIES_READABLE_SOURCES + ../dictionary_utils.cpp + ../dictionary_utils.hpp ../DictionaryEntry.hpp ../DictionaryReader.hpp ../FileDescriptor.cpp diff --git a/components/core/src/clp/streaming_archive/reader/Archive.cpp b/components/core/src/clp/streaming_archive/reader/Archive.cpp index 541b25595..141e5fce5 100644 --- a/components/core/src/clp/streaming_archive/reader/Archive.cpp +++ b/components/core/src/clp/streaming_archive/reader/Archive.cpp @@ -119,6 +119,11 @@ void Archive::close() { m_path.clear(); } +void Archive::refresh_dictionaries() { + m_logtype_dictionary.read_new_entries(); + m_var_dictionary.read_new_entries(); +} + ErrorCode Archive::open_file(File& file, MetadataDB::FileIterator const& file_metadata_ix) { return file.open_me(m_logtype_dictionary, file_metadata_ix, m_segment_manager); } diff --git a/components/core/src/clp/streaming_archive/reader/Archive.hpp b/components/core/src/clp/streaming_archive/reader/Archive.hpp index e66561433..c724be476 100644 --- a/components/core/src/clp/streaming_archive/reader/Archive.hpp +++ b/components/core/src/clp/streaming_archive/reader/Archive.hpp @@ -48,6 +48,7 @@ class Archive { * Reads any new entries added to the dictionaries * @throw Same as LogTypeDictionary::read_from_file and VariableDictionary::read_from_file */ + void refresh_dictionaries(); LogTypeDictionaryReader const& get_logtype_dictionary() const; VariableDictionaryReader const& get_var_dictionary() const; From a8d0747cac6382b4316a81e2fab4d8da24da0e67 Mon Sep 17 00:00:00 2001 From: Haiqi Xu <14502009+haiqi96@users.noreply.github.com> Date: Mon, 12 Aug 2024 18:16:20 -0400 Subject: [PATCH 20/62] fix --- components/core/src/clp/dictionary_utils.cpp | 5 +++-- components/core/src/clp/dictionary_utils.hpp | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/components/core/src/clp/dictionary_utils.cpp b/components/core/src/clp/dictionary_utils.cpp index 7eedd98eb..1f8477745 100644 --- a/components/core/src/clp/dictionary_utils.cpp +++ b/components/core/src/clp/dictionary_utils.cpp @@ -1,12 +1,13 @@ #include "dictionary_utils.hpp" #include -#include "FileReader.hpp" #include -using std::unique_ptr; +#include "FileReader.hpp" + using std::make_unique; using std::move; +using std::unique_ptr; namespace clp { void open_dictionary_for_reading( diff --git a/components/core/src/clp/dictionary_utils.hpp b/components/core/src/clp/dictionary_utils.hpp index 883d49bd2..996e35e80 100644 --- a/components/core/src/clp/dictionary_utils.hpp +++ b/components/core/src/clp/dictionary_utils.hpp @@ -1,8 +1,8 @@ #ifndef CLP_DICTIONARY_UTILS_HPP #define CLP_DICTIONARY_UTILS_HPP -#include #include +#include #include "FileReader.hpp" #include "streaming_compression/Decompressor.hpp" From ff0b9d82618f2fc89807a0551af7cb2a46b38f41 Mon Sep 17 00:00:00 2001 From: Haiqi Xu <14502009+haiqi96@users.noreply.github.com> Date: Mon, 12 Aug 2024 19:27:31 -0400 Subject: [PATCH 21/62] Missing fix --- components/core/CMakeLists.txt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/components/core/CMakeLists.txt b/components/core/CMakeLists.txt index f0cd327ef..70090ba30 100644 --- a/components/core/CMakeLists.txt +++ b/components/core/CMakeLists.txt @@ -298,6 +298,8 @@ set(SOURCE_FILES_unitTest src/clp/database_utils.cpp src/clp/database_utils.hpp src/clp/Defs.h + src/clp/dictionary_utils.cpp + src/clp/dictionary_utils.hpp src/clp/DictionaryEntry.hpp src/clp/DictionaryReader.hpp src/clp/DictionaryWriter.hpp From b3f27950f2538a79a51e0f4ae0ded3fa4d030916 Mon Sep 17 00:00:00 2001 From: Haiqi Xu <14502009+haiqi96@users.noreply.github.com> Date: Mon, 12 Aug 2024 19:42:11 -0400 Subject: [PATCH 22/62] Fixes --- components/core/src/clp/DictionaryReader.hpp | 21 ++++++----- components/core/src/clp/clg/clg.cpp | 25 +++++++++++++ components/core/src/clp/clo/clo.cpp | 1 + components/core/src/clp/clp/decompression.cpp | 1 + components/core/src/clp/dictionary_utils.cpp | 36 ++----------------- components/core/src/clp/dictionary_utils.hpp | 13 ------- .../tests/test-EncodedVariableInterpreter.cpp | 1 + 7 files changed, 43 insertions(+), 55 deletions(-) diff --git a/components/core/src/clp/DictionaryReader.hpp b/components/core/src/clp/DictionaryReader.hpp index 5b8a007a1..a1f8e7965 100644 --- a/components/core/src/clp/DictionaryReader.hpp +++ b/components/core/src/clp/DictionaryReader.hpp @@ -133,14 +133,19 @@ void DictionaryReader::open( constexpr size_t cDecompressorFileReadBufferCapacity = 64 * 1024; // 64 KB - open_dictionary_for_reading( - dictionary_path, - segment_index_path, - cDecompressorFileReadBufferCapacity, - m_dictionary_file_reader, - m_dictionary_decompressor, - m_segment_index_file_reader, - m_segment_index_decompressor + m_dictionary_file_reader = move(make_unique(dictionary_path)); + // Skip header + m_dictionary_file_reader->seek_from_begin(sizeof(uint64_t)); + // Open decompressor + m_dictionary_decompressor.open(*m_dictionary_file_reader, cDecompressorFileReadBufferCapacity); + + m_segment_index_file_reader = move(make_unique(segment_index_path)); + // Skip header + m_segment_index_file_reader->seek_from_begin(sizeof(uint64_t)); + // Open decompressor + m_segment_index_decompressor.open( + *m_segment_index_file_reader, + cDecompressorFileReadBufferCapacity ); m_is_open = true; diff --git a/components/core/src/clp/clg/clg.cpp b/components/core/src/clp/clg/clg.cpp index 9bb787a4c..dd35a3283 100644 --- a/components/core/src/clp/clg/clg.cpp +++ b/components/core/src/clp/clg/clg.cpp @@ -173,6 +173,31 @@ static bool open_archive(string const& archive_path, Archive& archive_reader) { } } + try { + archive_reader.refresh_dictionaries(); + } catch (TraceableException& e) { + error_code = e.get_error_code(); + if (ErrorCode_errno == error_code) { + SPDLOG_ERROR( + "Reading dictionaries failed: {}:{} {}, errno={}", + e.get_filename(), + e.get_line_number(), + e.what(), + errno + ); + return false; + } else { + SPDLOG_ERROR( + "Reading dictionaries failed: {}:{} {}, error_code={}", + e.get_filename(), + e.get_line_number(), + e.what(), + error_code + ); + return false; + } + } + return true; } diff --git a/components/core/src/clp/clo/clo.cpp b/components/core/src/clp/clo/clo.cpp index 5ddab50ea..fcee710a3 100644 --- a/components/core/src/clp/clo/clo.cpp +++ b/components/core/src/clp/clo/clo.cpp @@ -140,6 +140,7 @@ bool extract_ir(CommandLineArguments const& command_line_args) { Archive archive_reader; archive_reader.open(archive_path.string()); + archive_reader.refresh_dictionaries(); auto const& file_split_id = command_line_args.get_file_split_id(); auto file_metadata_ix_ptr = archive_reader.get_file_iterator_by_split_id(file_split_id); diff --git a/components/core/src/clp/clp/decompression.cpp b/components/core/src/clp/clp/decompression.cpp index e24ad8413..11115dbe0 100644 --- a/components/core/src/clp/clp/decompression.cpp +++ b/components/core/src/clp/clp/decompression.cpp @@ -111,6 +111,7 @@ bool decompress( archive_ix->get_id(archive_id); auto archive_path = archives_dir / archive_id; archive_reader.open(archive_path.string()); + archive_reader.refresh_dictionaries(); // Decompress all splits with the given path auto file_metadata_ix_ptr = archive_reader.get_file_iterator_by_path(file_path); diff --git a/components/core/src/clp/dictionary_utils.cpp b/components/core/src/clp/dictionary_utils.cpp index 1f8477745..a81a1591c 100644 --- a/components/core/src/clp/dictionary_utils.cpp +++ b/components/core/src/clp/dictionary_utils.cpp @@ -1,44 +1,12 @@ #include "dictionary_utils.hpp" -#include -#include - #include "FileReader.hpp" -using std::make_unique; -using std::move; -using std::unique_ptr; - namespace clp { -void open_dictionary_for_reading( - std::string const& dictionary_path, - std::string const& segment_index_path, - size_t decompressor_file_read_buffer_capacity, - unique_ptr& dictionary_file_reader, - streaming_compression::Decompressor& dictionary_decompressor, - unique_ptr& segment_index_file_reader, - streaming_compression::Decompressor& segment_index_decompressor -) { - dictionary_file_reader = move(make_unique(dictionary_path)); - // Skip header - dictionary_file_reader->seek_from_begin(sizeof(uint64_t)); - // Open decompressor - dictionary_decompressor.open(*dictionary_file_reader, decompressor_file_read_buffer_capacity); - - segment_index_file_reader = move(make_unique(segment_index_path)); - // Skip header - segment_index_file_reader->seek_from_begin(sizeof(uint64_t)); - // Open decompressor - segment_index_decompressor.open( - *segment_index_file_reader, - decompressor_file_read_buffer_capacity - ); -} - uint64_t read_dictionary_header(FileReader& file_reader) { auto dictionary_file_reader_pos = file_reader.get_pos(); file_reader.seek_from_begin(0); - uint64_t num_dictionary_entries; + uint64_t num_dictionary_entries{}; file_reader.read_numeric_value(num_dictionary_entries, false); file_reader.seek_from_begin(dictionary_file_reader_pos); return num_dictionary_entries; @@ -48,7 +16,7 @@ uint64_t read_segment_index_header(FileReader& file_reader) { // Read segment index header auto segment_index_file_reader_pos = file_reader.get_pos(); file_reader.seek_from_begin(0); - uint64_t num_segments; + uint64_t num_segments{}; file_reader.read_numeric_value(num_segments, false); file_reader.seek_from_begin(segment_index_file_reader_pos); return num_segments; diff --git a/components/core/src/clp/dictionary_utils.hpp b/components/core/src/clp/dictionary_utils.hpp index 996e35e80..14509a4e9 100644 --- a/components/core/src/clp/dictionary_utils.hpp +++ b/components/core/src/clp/dictionary_utils.hpp @@ -1,23 +1,10 @@ #ifndef CLP_DICTIONARY_UTILS_HPP #define CLP_DICTIONARY_UTILS_HPP -#include -#include - #include "FileReader.hpp" #include "streaming_compression/Decompressor.hpp" namespace clp { -void open_dictionary_for_reading( - std::string const& dictionary_path, - std::string const& segment_index_path, - size_t decompressor_file_read_buffer_capacity, - std::unique_ptr& dictionary_file_reader, - streaming_compression::Decompressor& dictionary_decompressor, - std::unique_ptr& segment_index_file_reader, - streaming_compression::Decompressor& segment_index_decompressor -); - uint64_t read_dictionary_header(FileReader& file_reader); uint64_t read_segment_index_header(FileReader& file_reader); diff --git a/components/core/tests/test-EncodedVariableInterpreter.cpp b/components/core/tests/test-EncodedVariableInterpreter.cpp index 5600b2ccf..6ab0687a9 100644 --- a/components/core/tests/test-EncodedVariableInterpreter.cpp +++ b/components/core/tests/test-EncodedVariableInterpreter.cpp @@ -439,6 +439,7 @@ TEST_CASE("EncodedVariableInterpreter", "[EncodedVariableInterpreter]") { // Open reader clp::VariableDictionaryReader var_dict_reader; var_dict_reader.open(cVarDictPath, cVarSegmentIndexPath); + var_dict_reader.read_new_entries(); // Test searching string search_logtype = "here is a string with a small int "; From 4d0d80bb03aaaf0be65e8ce0399856ac4c55d444 Mon Sep 17 00:00:00 2001 From: Haiqi Xu <14502009+haiqi96@users.noreply.github.com> Date: Tue, 13 Aug 2024 10:18:22 -0400 Subject: [PATCH 23/62] Revert unintended changes --- components/core/src/clp/DictionaryWriter.hpp | 2 +- components/core/src/clp/clo/clo.cpp | 1 + components/core/src/clp/clp/decompression.cpp | 2 ++ .../make_dictionaries_readable/make-dictionaries-readable.cpp | 2 ++ 4 files changed, 6 insertions(+), 1 deletion(-) diff --git a/components/core/src/clp/DictionaryWriter.hpp b/components/core/src/clp/DictionaryWriter.hpp index ccdfad5e6..3405ee46e 100644 --- a/components/core/src/clp/DictionaryWriter.hpp +++ b/components/core/src/clp/DictionaryWriter.hpp @@ -214,7 +214,7 @@ void DictionaryWriter::open_and_preload( constexpr size_t cDecompressorFileReadBufferCapacity = 64 * 1024; // 64 KB // Read dictionary header - uint64_t num_dictionary_entries{}; + uint64_t num_dictionary_entries{0}; if (false == dictionary_file_reader.read_numeric_value(num_dictionary_entries, false)) { throw OperationFailed(ErrorCode_Failure, __FILENAME__, __LINE__); } diff --git a/components/core/src/clp/clo/clo.cpp b/components/core/src/clp/clo/clo.cpp index fcee710a3..f29df0306 100644 --- a/components/core/src/clp/clo/clo.cpp +++ b/components/core/src/clp/clo/clo.cpp @@ -482,6 +482,7 @@ static bool search_archive( Archive archive_reader; archive_reader.open(archive_path.string()); + archive_reader.refresh_dictionaries(); auto search_begin_ts = command_line_args.get_search_begin_ts(); auto search_end_ts = command_line_args.get_search_end_ts(); diff --git a/components/core/src/clp/clp/decompression.cpp b/components/core/src/clp/clp/decompression.cpp index 11115dbe0..6b87f6777 100644 --- a/components/core/src/clp/clp/decompression.cpp +++ b/components/core/src/clp/clp/decompression.cpp @@ -145,6 +145,7 @@ bool decompress( archive_ix->get_id(archive_id); auto archive_path = archives_dir / archive_id; archive_reader.open(archive_path.string()); + archive_reader.refresh_dictionaries(); // Decompress files auto file_metadata_ix_ptr = archive_reader.get_file_iterator(); @@ -269,6 +270,7 @@ bool decompress_to_ir(CommandLineArguments& command_line_args) { streaming_archive::reader::Archive archive_reader; auto archive_path = archives_dir / archive_id; archive_reader.open(archive_path.string()); + archive_reader.refresh_dictionaries(); auto file_metadata_ix_ptr = archive_reader.get_file_iterator_by_split_id(file_split_id); if (false == file_metadata_ix_ptr->has_next()) { diff --git a/components/core/src/clp/make_dictionaries_readable/make-dictionaries-readable.cpp b/components/core/src/clp/make_dictionaries_readable/make-dictionaries-readable.cpp index f40a0b110..f35932fc3 100644 --- a/components/core/src/clp/make_dictionaries_readable/make-dictionaries-readable.cpp +++ b/components/core/src/clp/make_dictionaries_readable/make-dictionaries-readable.cpp @@ -55,6 +55,7 @@ int main(int argc, char const* argv[]) { / clp::streaming_archive::cLogTypeSegmentIndexFilename; clp::LogTypeDictionaryReader logtype_dict; logtype_dict.open(logtype_dict_path.string(), logtype_segment_index_path.string()); + logtype_dict.read_new_entries(); // Write readable dictionary auto readable_logtype_dict_path = boost::filesystem::path(command_line_args.get_output_dir()) @@ -138,6 +139,7 @@ int main(int argc, char const* argv[]) { / clp::streaming_archive::cVarSegmentIndexFilename; clp::VariableDictionaryReader var_dict; var_dict.open(var_dict_path.string(), var_segment_index_path.string()); + var_dict.read_new_entries(); // Write readable dictionary auto readable_var_dict_path = boost::filesystem::path(command_line_args.get_output_dir()) From 70c8342510c7e4a1fa68f25747c797672edb7746 Mon Sep 17 00:00:00 2001 From: Haiqi Xu <14502009+haiqi96@users.noreply.github.com> Date: Tue, 13 Aug 2024 11:08:24 -0400 Subject: [PATCH 24/62] Fix one weird issue --- components/core/src/clp/DictionaryWriter.hpp | 1 - 1 file changed, 1 deletion(-) diff --git a/components/core/src/clp/DictionaryWriter.hpp b/components/core/src/clp/DictionaryWriter.hpp index 3405ee46e..0499937be 100644 --- a/components/core/src/clp/DictionaryWriter.hpp +++ b/components/core/src/clp/DictionaryWriter.hpp @@ -237,7 +237,6 @@ void DictionaryWriter::open_and_preload( } m_value_to_id[str_value] = entry.get_id(); - ; m_data_size += entry.get_data_size(); } From 83ee77eef62958f093ad1c00149beacfd9f3edc4 Mon Sep 17 00:00:00 2001 From: Haiqi Xu <14502009+haiqi96@users.noreply.github.com> Date: Tue, 13 Aug 2024 12:24:07 -0400 Subject: [PATCH 25/62] small fix --- components/core/src/clp/dictionary_utils.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/components/core/src/clp/dictionary_utils.cpp b/components/core/src/clp/dictionary_utils.cpp index a81a1591c..d8bd20a9a 100644 --- a/components/core/src/clp/dictionary_utils.cpp +++ b/components/core/src/clp/dictionary_utils.cpp @@ -6,7 +6,7 @@ namespace clp { uint64_t read_dictionary_header(FileReader& file_reader) { auto dictionary_file_reader_pos = file_reader.get_pos(); file_reader.seek_from_begin(0); - uint64_t num_dictionary_entries{}; + uint64_t num_dictionary_entries{0}; file_reader.read_numeric_value(num_dictionary_entries, false); file_reader.seek_from_begin(dictionary_file_reader_pos); return num_dictionary_entries; @@ -16,7 +16,7 @@ uint64_t read_segment_index_header(FileReader& file_reader) { // Read segment index header auto segment_index_file_reader_pos = file_reader.get_pos(); file_reader.seek_from_begin(0); - uint64_t num_segments{}; + uint64_t num_segments{0}; file_reader.read_numeric_value(num_segments, false); file_reader.seek_from_begin(segment_index_file_reader_pos); return num_segments; From 59db88b686ca317a4b82893ca3b64421b4066eb7 Mon Sep 17 00:00:00 2001 From: Haiqi Xu <14502009+haiqi96@users.noreply.github.com> Date: Tue, 13 Aug 2024 16:21:16 -0400 Subject: [PATCH 26/62] Improvement --- .../core/src/clp/BufferedFileReader.cpp | 1 - .../core/src/clp/BufferedFileReader.hpp | 16 ++++-------- components/core/src/clp/FileDescriptor.cpp | 7 ------ components/core/src/clp/FileDescriptor.hpp | 6 ++--- components/core/src/clp/SysFileReader.cpp | 12 ++------- components/core/src/clp/SysFileReader.hpp | 25 +++++++++++++------ 6 files changed, 26 insertions(+), 41 deletions(-) diff --git a/components/core/src/clp/BufferedFileReader.cpp b/components/core/src/clp/BufferedFileReader.cpp index 74275a01a..347da3e3c 100644 --- a/components/core/src/clp/BufferedFileReader.cpp +++ b/components/core/src/clp/BufferedFileReader.cpp @@ -6,7 +6,6 @@ #include -#include "../glt/streaming_archive/writer/Archive.hpp" #include "math_utils.hpp" using std::string; diff --git a/components/core/src/clp/BufferedFileReader.hpp b/components/core/src/clp/BufferedFileReader.hpp index 56b0c2b08..ed17211b3 100644 --- a/components/core/src/clp/BufferedFileReader.hpp +++ b/components/core/src/clp/BufferedFileReader.hpp @@ -1,23 +1,22 @@ #ifndef CLP_BUFFEREDFILEREADER_HPP #define CLP_BUFFEREDFILEREADER_HPP -#include #include #include #include #include #include "BufferReader.hpp" -#include "Defs.h" #include "ErrorCode.hpp" #include "ReaderInterface.hpp" #include "TraceableException.hpp" namespace clp { /** - * Class for performing buffered (in memory) reads from an on-disk file with control over when and - * how much data is buffered. This allows us to support use cases where we want to perform unordered - * reads from files which only support sequential access (e.g. files from block storage like S3). + * Class for performing buffered (in memory) reads from another ReaderInterface with control over + * when and how much data is buffered. This allows us to support use cases where we want to perform + * unordered reads from input which only support sequential access (e.g. files from block storage + * like S3). * * To control how much data is buffered, we allow callers to set a checkpoint such that all reads * and seeks past the checkpoint will be buffered until the checkpoint is cleared. This allows @@ -25,13 +24,10 @@ namespace clp { * When no checkpoint is set, we maintain a fixed-size buffer. * * NOTE 1: Unless otherwise noted, the "file position" mentioned in docstrings is the position in - * the buffered file, not the position in the on-disk file. + * the buffered file, not the position in the original input file. * * NOTE 2: This class restricts the buffer size to a multiple of the page size and we avoid reading * anything less than a page to avoid multiple page faults. - * - * NOTE 3: Although the FILE stream interface provided by glibc also performs buffered reads, it - * does not allow us to control the buffering. */ class BufferedFileReader : public ReaderInterface { public: @@ -227,8 +223,6 @@ class BufferedFileReader : public ReaderInterface { // Variables size_t m_file_pos{0}; - - // ReaderInterfaceSpecific std::unique_ptr m_file_reader; // Buffer specific data diff --git a/components/core/src/clp/FileDescriptor.cpp b/components/core/src/clp/FileDescriptor.cpp index 31eb906a9..2e17bfe05 100644 --- a/components/core/src/clp/FileDescriptor.cpp +++ b/components/core/src/clp/FileDescriptor.cpp @@ -47,13 +47,6 @@ FileDescriptor::~FileDescriptor() { } } -FileDescriptor::FileDescriptor(FileDescriptor&& other) - : m_fd{other.m_fd}, - m_open_mode{other.m_open_mode}, - m_close_failure_callback{other.m_close_failure_callback} { - other.m_fd = -1; -} - auto FileDescriptor::get_size() const -> size_t { struct stat stat_result {}; diff --git a/components/core/src/clp/FileDescriptor.hpp b/components/core/src/clp/FileDescriptor.hpp index c1784cf33..a704326bf 100644 --- a/components/core/src/clp/FileDescriptor.hpp +++ b/components/core/src/clp/FileDescriptor.hpp @@ -62,11 +62,9 @@ class FileDescriptor { // Destructor ~FileDescriptor(); - // Move constructor - FileDescriptor(FileDescriptor&&); - - // Disable copy constructors and move/copy assignment operators + // Disable copy/move constructors/assignment operators FileDescriptor(FileDescriptor const&) = delete; + FileDescriptor(FileDescriptor&&) = delete; auto operator=(FileDescriptor const&) -> FileDescriptor& = delete; auto operator=(FileDescriptor&&) -> FileDescriptor& = delete; diff --git a/components/core/src/clp/SysFileReader.cpp b/components/core/src/clp/SysFileReader.cpp index 6c97e3957..67df6e93d 100644 --- a/components/core/src/clp/SysFileReader.cpp +++ b/components/core/src/clp/SysFileReader.cpp @@ -5,18 +5,11 @@ #include #include -#include -#include - #include using std::string; namespace clp { -SysFileReader::SysFileReader(SysFileReader&& other) - : m_fd{std::move(other.m_fd)}, - m_path{std::move(other.m_path)} {} - auto SysFileReader::try_read(char* buf, size_t num_bytes_to_read, size_t& num_bytes_read) -> ErrorCode { if (nullptr == buf) { @@ -58,7 +51,7 @@ auto SysFileReader::try_seek_from_begin(size_t pos) -> ErrorCode { auto SysFileReader::try_get_pos(size_t& pos) -> ErrorCode { pos = lseek(m_fd.get_raw_fd(), 0, SEEK_CUR); - if ((off_t)-1 == pos) { + if (static_cast(-1) == pos) { return ErrorCode_errno; } @@ -66,8 +59,7 @@ auto SysFileReader::try_get_pos(size_t& pos) -> ErrorCode { } auto SysFileReader::try_fstat(struct stat& stat_buffer) const -> ErrorCode { - auto return_value = fstat(m_fd.get_raw_fd(), &stat_buffer); - if (0 != return_value) { + if (auto const return_value = fstat(m_fd.get_raw_fd(), &stat_buffer); 0 != return_value) { return ErrorCode_errno; } return ErrorCode_Success; diff --git a/components/core/src/clp/SysFileReader.hpp b/components/core/src/clp/SysFileReader.hpp index a60f12c58..0026d75eb 100644 --- a/components/core/src/clp/SysFileReader.hpp +++ b/components/core/src/clp/SysFileReader.hpp @@ -3,7 +3,9 @@ #include +#include #include +#include #include "ErrorCode.hpp" #include "FileDescriptor.hpp" @@ -11,6 +13,15 @@ #include "TraceableException.hpp" namespace clp { +/** + * Class for performing reads from an on-disk file directly using C style system call. + * Unlike reader classes using FILE stream interface, This class operates on raw fd and does not + * internally buffer any data. Instead, the user of this class is expected to buffer and read the + * data efficiently. + * + * Note: If you don't plan to handle the data buffering yourself, do not use this class. Use + * FileReader instead. + */ class SysFileReader : public ReaderInterface { public: // Types @@ -24,16 +35,14 @@ class SysFileReader : public ReaderInterface { char const* what() const noexcept override { return "FileReader operation failed"; } }; - SysFileReader(std::string_view const& path) - : m_fd{path, FileDescriptor::OpenMode::ReadOnly}, - m_path{path} {} + explicit SysFileReader(std::string path) + : m_path{std::move(path)}, + m_fd{m_path, FileDescriptor::OpenMode::ReadOnly} {} - // Explicitly disable copy constructor and assignment operator + // Explicitly disable copy and move constructor and assignment operator SysFileReader(SysFileReader const&) = delete; SysFileReader& operator=(SysFileReader const&) = delete; - - // Move constructor and assignment operator - SysFileReader(SysFileReader&&); + SysFileReader(SysFileReader&&) = delete; SysFileReader& operator=(SysFileReader&&) = delete; // Methods implementing the ReaderInterface @@ -76,8 +85,8 @@ class SysFileReader : public ReaderInterface { [[nodiscard]] auto try_fstat(struct stat& stat_buffer) const -> ErrorCode; private: - FileDescriptor m_fd; std::string m_path; + FileDescriptor m_fd; }; } // namespace clp From c6c8c9be2ca1a62ad7926f678df1b917a98a2f56 Mon Sep 17 00:00:00 2001 From: Haiqi Xu <14502009+haiqi96@users.noreply.github.com> Date: Wed, 14 Aug 2024 17:37:34 -0400 Subject: [PATCH 27/62] Add initial class and Unittest --- components/core/CMakeLists.txt | 3 + components/core/src/clp/SysFileReader.cpp | 67 ++++++++++++++++ components/core/src/clp/SysFileReader.hpp | 95 +++++++++++++++++++++++ 3 files changed, 165 insertions(+) create mode 100644 components/core/src/clp/SysFileReader.cpp create mode 100644 components/core/src/clp/SysFileReader.hpp diff --git a/components/core/CMakeLists.txt b/components/core/CMakeLists.txt index 4d7eab5c4..b1bc19495 100644 --- a/components/core/CMakeLists.txt +++ b/components/core/CMakeLists.txt @@ -410,6 +410,8 @@ set(SOURCE_FILES_unitTest src/clp/SQLiteDB.hpp src/clp/SQLitePreparedStatement.cpp src/clp/SQLitePreparedStatement.hpp + src/clp/SysFileReader.cpp + src/clp/SysFileReader.hpp src/clp/Stopwatch.cpp src/clp/Stopwatch.hpp src/clp/streaming_archive/ArchiveMetadata.cpp @@ -495,6 +497,7 @@ set(SOURCE_FILES_unitTest tests/test-Stopwatch.cpp tests/test-StreamingCompression.cpp tests/test-string_utils.cpp + tests/test-SysFileReader.cpp tests/test-TimestampPattern.cpp tests/test-utf8_utils.cpp tests/test-Utils.cpp diff --git a/components/core/src/clp/SysFileReader.cpp b/components/core/src/clp/SysFileReader.cpp new file mode 100644 index 000000000..3d6776bc7 --- /dev/null +++ b/components/core/src/clp/SysFileReader.cpp @@ -0,0 +1,67 @@ +#include "SysFileReader.hpp" + +#include +#include +#include +#include + +#include + +using std::string; + +namespace clp { + auto SysFileReader::try_read(char* buf, size_t num_bytes_to_read, size_t& num_bytes_read) + -> ErrorCode { + if (nullptr == buf) { + return ErrorCode_BadParam; + } + + num_bytes_read = 0; + while (true) { + auto const bytes_read = ::read(m_fd.get_raw_fd(), buf, num_bytes_to_read); + if (0 == bytes_read) { + break; + } + if (bytes_read < 0) { + return ErrorCode_errno; + } + + buf += bytes_read; + num_bytes_read += bytes_read; + num_bytes_to_read -= bytes_read; + if (num_bytes_read == num_bytes_to_read) { + return ErrorCode_Success; + } + } + if (0 == num_bytes_read) { + return ErrorCode_EndOfFile; + } + return ErrorCode_Success; + } + + auto SysFileReader::try_seek_from_begin(size_t pos) -> ErrorCode { + if (auto const offset = lseek(m_fd.get_raw_fd(), static_cast(pos), SEEK_SET); + -1 == offset) + { + return ErrorCode_errno; + } + + return ErrorCode_Success; + } + + auto SysFileReader::try_get_pos(size_t& pos) -> ErrorCode { + pos = lseek(m_fd.get_raw_fd(), 0, SEEK_CUR); + if (static_cast(-1) == pos) { + return ErrorCode_errno; + } + + return ErrorCode_Success; + } + + auto SysFileReader::try_fstat(struct stat& stat_buffer) const -> ErrorCode { + if (auto const return_value = fstat(m_fd.get_raw_fd(), &stat_buffer); 0 != return_value) { + return ErrorCode_errno; + } + return ErrorCode_Success; + } +} // namespace clp diff --git a/components/core/src/clp/SysFileReader.hpp b/components/core/src/clp/SysFileReader.hpp new file mode 100644 index 000000000..1dca2d747 --- /dev/null +++ b/components/core/src/clp/SysFileReader.hpp @@ -0,0 +1,95 @@ +#ifndef CLP_SYSFILEREADER_HPP +#define CLP_SYSFILEREADER_HPP + +#include + +#include +#include +#include + +#include "ErrorCode.hpp" +#include "FileDescriptor.hpp" +#include "ReaderInterface.hpp" +#include "TraceableException.hpp" + +namespace clp { +/** + * Class for performing reads from an on-disk file directly using C style system call. + * Unlike reader classes using FILE stream interface, This class operates on raw fd and does not + * internally buffer any data. Instead, the user of this class is expected to buffer and read the + * data efficiently. + * + * Note: If you don't plan to handle the data buffering yourself, do not use this class. Use + * FileReader instead. + */ +class SysFileReader : public ReaderInterface { +public: + // Types + class OperationFailed : public TraceableException { + public: + // Constructors + OperationFailed(ErrorCode error_code, char const* const filename, int line_number) + : TraceableException(error_code, filename, line_number) {} + + // Methods + char const* what() const noexcept override { return "FileReader operation failed"; } + }; + + explicit SysFileReader(std::string path) + : m_path{std::move(path)}, + m_fd{m_path, FileDescriptor::OpenMode::ReadOnly} {} + + // Explicitly disable copy constructor and assignment operator + SysFileReader(SysFileReader const&) = delete; + SysFileReader& operator=(SysFileReader const&) = delete; + + // Explicitly disable move constructor and assignment operator + SysFileReader(SysFileReader&&) = delete; + SysFileReader& operator=(SysFileReader&&) = delete; + + // Methods implementing the ReaderInterface + /** + * Tries to get the current position of the read head in the file + * @param pos Position of the read head in the file + * @return ErrorCode_errno on error + * @return ErrorCode_Success on success + */ + [[nodiscard]] auto try_get_pos(size_t& pos) -> ErrorCode override; + /** + * Tries to seek from the beginning of the file to the given position + * @param pos + * @return ErrorCode_errno on error + * @return ErrorCode_Success on success + */ + [[nodiscard]] auto try_seek_from_begin(size_t pos) -> ErrorCode override; + + /** + * Tries to read up to a given number of bytes from the file + * @param buf + * @param num_bytes_to_read The number of bytes to try and read + * @param num_bytes_read The actual number of bytes read + * @return ErrorCode_BadParam if buf is invalid + * @return ErrorCode_errno on error + * @return ErrorCode_EndOfFile on EOF + * @return ErrorCode_Success on success + */ + [[nodiscard]] auto + try_read(char* buf, size_t num_bytes_to_read, size_t& num_bytes_read) -> ErrorCode override; + + [[nodiscard]] auto get_path() const -> std::string_view { return m_path; } + + /** + * Tries to stat the current file + * @param stat_buffer + * @return ErrorCode_errno on error + * @return ErrorCode_Success on success + */ + [[nodiscard]] auto try_fstat(struct stat& stat_buffer) const -> ErrorCode; + +private: + std::string m_path; + FileDescriptor m_fd; +}; +} // namespace clp + +#endif // CLP_SYSFILEREADER_HPP From d5db2a6abdceb56610b4a3180e91caf2f9e7c937 Mon Sep 17 00:00:00 2001 From: Haiqi Xu <14502009+haiqi96@users.noreply.github.com> Date: Wed, 14 Aug 2024 17:46:36 -0400 Subject: [PATCH 28/62] Small fix --- components/core/src/clp/SysFileReader.cpp | 89 +++++++++++------------ 1 file changed, 43 insertions(+), 46 deletions(-) diff --git a/components/core/src/clp/SysFileReader.cpp b/components/core/src/clp/SysFileReader.cpp index 3d6776bc7..4d014ddb2 100644 --- a/components/core/src/clp/SysFileReader.cpp +++ b/components/core/src/clp/SysFileReader.cpp @@ -1,67 +1,64 @@ #include "SysFileReader.hpp" -#include #include #include #include -#include - -using std::string; +#include "ErrorCode.hpp" namespace clp { - auto SysFileReader::try_read(char* buf, size_t num_bytes_to_read, size_t& num_bytes_read) - -> ErrorCode { - if (nullptr == buf) { - return ErrorCode_BadParam; - } - - num_bytes_read = 0; - while (true) { - auto const bytes_read = ::read(m_fd.get_raw_fd(), buf, num_bytes_to_read); - if (0 == bytes_read) { - break; - } - if (bytes_read < 0) { - return ErrorCode_errno; - } +auto SysFileReader::try_read(char* buf, size_t num_bytes_to_read, size_t& num_bytes_read) + -> ErrorCode { + if (nullptr == buf) { + return ErrorCode_BadParam; + } - buf += bytes_read; - num_bytes_read += bytes_read; - num_bytes_to_read -= bytes_read; - if (num_bytes_read == num_bytes_to_read) { - return ErrorCode_Success; - } + num_bytes_read = 0; + while (true) { + auto const bytes_read = ::read(m_fd.get_raw_fd(), buf, num_bytes_to_read); + if (0 == bytes_read) { + break; } - if (0 == num_bytes_read) { - return ErrorCode_EndOfFile; + if (bytes_read < 0) { + return ErrorCode_errno; } - return ErrorCode_Success; - } - auto SysFileReader::try_seek_from_begin(size_t pos) -> ErrorCode { - if (auto const offset = lseek(m_fd.get_raw_fd(), static_cast(pos), SEEK_SET); - -1 == offset) - { - return ErrorCode_errno; + buf += bytes_read; + num_bytes_read += bytes_read; + num_bytes_to_read -= bytes_read; + if (num_bytes_read == num_bytes_to_read) { + return ErrorCode_Success; } + } + if (0 == num_bytes_read) { + return ErrorCode_EndOfFile; + } + return ErrorCode_Success; +} - return ErrorCode_Success; +auto SysFileReader::try_seek_from_begin(size_t pos) -> ErrorCode { + if (auto const offset = lseek(m_fd.get_raw_fd(), static_cast(pos), SEEK_SET); + -1 == offset) + { + return ErrorCode_errno; } - auto SysFileReader::try_get_pos(size_t& pos) -> ErrorCode { - pos = lseek(m_fd.get_raw_fd(), 0, SEEK_CUR); - if (static_cast(-1) == pos) { - return ErrorCode_errno; - } + return ErrorCode_Success; +} - return ErrorCode_Success; +auto SysFileReader::try_get_pos(size_t& pos) -> ErrorCode { + pos = lseek(m_fd.get_raw_fd(), 0, SEEK_CUR); + if (static_cast(-1) == pos) { + return ErrorCode_errno; } - auto SysFileReader::try_fstat(struct stat& stat_buffer) const -> ErrorCode { - if (auto const return_value = fstat(m_fd.get_raw_fd(), &stat_buffer); 0 != return_value) { - return ErrorCode_errno; - } - return ErrorCode_Success; + return ErrorCode_Success; +} + +auto SysFileReader::try_fstat(struct stat& stat_buffer) const -> ErrorCode { + if (auto const return_value = fstat(m_fd.get_raw_fd(), &stat_buffer); 0 != return_value) { + return ErrorCode_errno; } + return ErrorCode_Success; +} } // namespace clp From 39c9bb359a1497c88a2470868b4c931f12227d09 Mon Sep 17 00:00:00 2001 From: Haiqi Xu <14502009+haiqi96@users.noreply.github.com> Date: Wed, 14 Aug 2024 22:31:40 -0400 Subject: [PATCH 29/62] Add missing unit test file --- components/core/tests/test-SysFileReader.cpp | 82 ++++++++++++++++++++ 1 file changed, 82 insertions(+) create mode 100644 components/core/tests/test-SysFileReader.cpp diff --git a/components/core/tests/test-SysFileReader.cpp b/components/core/tests/test-SysFileReader.cpp new file mode 100644 index 000000000..80fb5b07a --- /dev/null +++ b/components/core/tests/test-SysFileReader.cpp @@ -0,0 +1,82 @@ +#include +#include +#include +#include +#include +#include +#include + +#include + +#include "../src/clp/ErrorCode.hpp" +#include "../src/clp/FileReader.hpp" +#include "../src/clp/ReaderInterface.hpp" +#include "../src/clp/SysFileReader.hpp" + +namespace { +// Reused code starts +constexpr size_t cDefaultReaderBufferSize{1024}; + +[[nodiscard]] auto get_test_input_local_path() -> std::string; + +[[nodiscard]] auto get_test_input_path_relative_to_tests_dir() -> std::filesystem::path; + +/** + * @param reader + * @param read_buf_size The size of the buffer to use for individual reads from the reader. + * @return All data read from the given reader. + */ +auto get_content(clp::ReaderInterface& reader, size_t read_buf_size = cDefaultReaderBufferSize) + -> std::vector; + +auto get_test_input_local_path() -> std::string { + std::filesystem::path const current_file_path{__FILE__}; + auto const tests_dir{current_file_path.parent_path()}; + return (tests_dir / get_test_input_path_relative_to_tests_dir()).string(); +} + +auto get_test_input_path_relative_to_tests_dir() -> std::filesystem::path { + return std::filesystem::path{"test_log_files"} / "log.txt"; +} + +auto get_content(clp::ReaderInterface& reader, size_t read_buf_size) -> std::vector { + std::vector buf; + // NOLINTNEXTLINE(cppcoreguidelines-avoid-c-arrays) + auto const read_buf{std::make_unique(read_buf_size)}; + for (bool has_more_content{true}; has_more_content;) { + size_t num_bytes_read{}; + has_more_content = reader.read(read_buf.get(), read_buf_size, num_bytes_read); + std::string_view const view{read_buf.get(), num_bytes_read}; + buf.insert(buf.cend(), view.cbegin(), view.cend()); + } + return buf; +} +} // namespace + +// Reused code ends + +TEST_CASE("sys_file_reader_basic", "[SysFileReader]") { + clp::FileReader ref_reader{get_test_input_local_path()}; + auto const expected{get_content(ref_reader)}; + + clp::SysFileReader reader{get_test_input_local_path()}; + auto const actual{get_content(reader)}; + REQUIRE((actual == expected)); +} + +TEST_CASE("sys_file_reader_with_offset_and_seek", "[SysFileReader]") { + constexpr size_t cOffset{319}; + + clp::FileReader ref_reader{get_test_input_local_path()}; + ref_reader.seek_from_begin(cOffset); + auto const expected{get_content(ref_reader)}; + auto const ref_end_pos{ref_reader.get_pos()}; + + clp::SysFileReader reader(get_test_input_local_path()); + reader.seek_from_begin(cOffset); + auto const actual{get_content(reader)}; + auto const actual_end_pos{reader.get_pos()}; + + REQUIRE(actual_end_pos == ref_end_pos); + REQUIRE((actual == expected)); +} From 8909eabf813e728f519ee3f53adf14031c09bf75 Mon Sep 17 00:00:00 2001 From: haiqi96 <14502009+haiqi96@users.noreply.github.com> Date: Thu, 15 Aug 2024 12:59:43 -0400 Subject: [PATCH 30/62] Fix clang tidy issues --- components/core/src/clp/ReaderInterface.hpp | 3 +++ components/core/src/clp/SysFileReader.cpp | 4 +++- components/core/src/clp/SysFileReader.hpp | 12 +++++++++--- components/core/tests/test-SysFileReader.cpp | 3 +-- 4 files changed, 16 insertions(+), 6 deletions(-) diff --git a/components/core/src/clp/ReaderInterface.hpp b/components/core/src/clp/ReaderInterface.hpp index 39f914c2d..0a33598ce 100644 --- a/components/core/src/clp/ReaderInterface.hpp +++ b/components/core/src/clp/ReaderInterface.hpp @@ -22,6 +22,9 @@ class ReaderInterface { char const* what() const noexcept override { return "ReaderInterface operation failed"; } }; + // Destructor + virtual ~ReaderInterface() = default; + // Methods virtual ErrorCode try_read(char* buf, size_t num_bytes_to_read, size_t& num_bytes_read) = 0; virtual ErrorCode try_seek_from_begin(size_t pos) = 0; diff --git a/components/core/src/clp/SysFileReader.cpp b/components/core/src/clp/SysFileReader.cpp index 4d014ddb2..cf098012d 100644 --- a/components/core/src/clp/SysFileReader.cpp +++ b/components/core/src/clp/SysFileReader.cpp @@ -1,9 +1,11 @@ #include "SysFileReader.hpp" #include -#include #include +#include +#include + #include "ErrorCode.hpp" namespace clp { diff --git a/components/core/src/clp/SysFileReader.hpp b/components/core/src/clp/SysFileReader.hpp index 1dca2d747..0a7659195 100644 --- a/components/core/src/clp/SysFileReader.hpp +++ b/components/core/src/clp/SysFileReader.hpp @@ -3,6 +3,7 @@ #include +#include #include #include #include @@ -32,7 +33,9 @@ class SysFileReader : public ReaderInterface { : TraceableException(error_code, filename, line_number) {} // Methods - char const* what() const noexcept override { return "FileReader operation failed"; } + [[nodiscard]] auto what() const noexcept -> char const* override { + return "FileReader operation failed"; + } }; explicit SysFileReader(std::string path) @@ -41,11 +44,14 @@ class SysFileReader : public ReaderInterface { // Explicitly disable copy constructor and assignment operator SysFileReader(SysFileReader const&) = delete; - SysFileReader& operator=(SysFileReader const&) = delete; + auto operator=(SysFileReader const&) -> SysFileReader& = delete; // Explicitly disable move constructor and assignment operator SysFileReader(SysFileReader&&) = delete; - SysFileReader& operator=(SysFileReader&&) = delete; + auto operator=(SysFileReader&&) -> SysFileReader& = delete; + + // Destructor + ~SysFileReader() override = default; // Methods implementing the ReaderInterface /** diff --git a/components/core/tests/test-SysFileReader.cpp b/components/core/tests/test-SysFileReader.cpp index 80fb5b07a..6ee9e2da7 100644 --- a/components/core/tests/test-SysFileReader.cpp +++ b/components/core/tests/test-SysFileReader.cpp @@ -1,5 +1,4 @@ #include -#include #include #include #include @@ -77,6 +76,6 @@ TEST_CASE("sys_file_reader_with_offset_and_seek", "[SysFileReader]") { auto const actual{get_content(reader)}; auto const actual_end_pos{reader.get_pos()}; - REQUIRE(actual_end_pos == ref_end_pos); + REQUIRE((actual_end_pos == ref_end_pos)); REQUIRE((actual == expected)); } From c5cb60311584d6214cb03b117cb0c2c2b60f560b Mon Sep 17 00:00:00 2001 From: haiqi96 <14502009+haiqi96@users.noreply.github.com> Date: Thu, 15 Aug 2024 13:20:01 -0400 Subject: [PATCH 31/62] suppress pointer warning for now --- components/core/src/clp/SysFileReader.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/components/core/src/clp/SysFileReader.cpp b/components/core/src/clp/SysFileReader.cpp index cf098012d..9b657e9b4 100644 --- a/components/core/src/clp/SysFileReader.cpp +++ b/components/core/src/clp/SysFileReader.cpp @@ -24,7 +24,7 @@ auto SysFileReader::try_read(char* buf, size_t num_bytes_to_read, size_t& num_by if (bytes_read < 0) { return ErrorCode_errno; } - + // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) buf += bytes_read; num_bytes_read += bytes_read; num_bytes_to_read -= bytes_read; From cfcd0f79a4d80bf8b93f4520825ac3eff8a81a7f Mon Sep 17 00:00:00 2001 From: haiqi96 <14502009+haiqi96@users.noreply.github.com> Date: Thu, 15 Aug 2024 23:22:30 -0400 Subject: [PATCH 32/62] Apply suggestions from code review Co-authored-by: Lin Zhihao <59785146+LinZhihao-723@users.noreply.github.com> --- components/core/src/clp/SysFileReader.cpp | 8 ++++---- components/core/src/clp/SysFileReader.hpp | 10 +++++----- components/core/tests/test-SysFileReader.cpp | 1 - 3 files changed, 9 insertions(+), 10 deletions(-) diff --git a/components/core/src/clp/SysFileReader.cpp b/components/core/src/clp/SysFileReader.cpp index 9b657e9b4..8a7a2c0f9 100644 --- a/components/core/src/clp/SysFileReader.cpp +++ b/components/core/src/clp/SysFileReader.cpp @@ -40,7 +40,7 @@ auto SysFileReader::try_read(char* buf, size_t num_bytes_to_read, size_t& num_by auto SysFileReader::try_seek_from_begin(size_t pos) -> ErrorCode { if (auto const offset = lseek(m_fd.get_raw_fd(), static_cast(pos), SEEK_SET); - -1 == offset) + static_cast(-1) == offset) { return ErrorCode_errno; } @@ -49,11 +49,11 @@ auto SysFileReader::try_seek_from_begin(size_t pos) -> ErrorCode { } auto SysFileReader::try_get_pos(size_t& pos) -> ErrorCode { - pos = lseek(m_fd.get_raw_fd(), 0, SEEK_CUR); - if (static_cast(-1) == pos) { + auto const curr_offset = lseek(m_fd.get_raw_fd(), 0, SEEK_CUR); + if (static_cast(-1) == curr_offset) { return ErrorCode_errno; } - + pos = static_cast(curr_offset); return ErrorCode_Success; } diff --git a/components/core/src/clp/SysFileReader.hpp b/components/core/src/clp/SysFileReader.hpp index 0a7659195..c47133534 100644 --- a/components/core/src/clp/SysFileReader.hpp +++ b/components/core/src/clp/SysFileReader.hpp @@ -16,12 +16,12 @@ namespace clp { /** * Class for performing reads from an on-disk file directly using C style system call. - * Unlike reader classes using FILE stream interface, This class operates on raw fd and does not - * internally buffer any data. Instead, the user of this class is expected to buffer and read the - * data efficiently. + * Unlike reader classes using `FILE` stream interface, This class operates on raw file descriptor + * and does not internally buffer any data. Instead, the user of this class is expected to buffer + * and read the data efficiently. * * Note: If you don't plan to handle the data buffering yourself, do not use this class. Use - * FileReader instead. + * `FileReader` instead. */ class SysFileReader : public ReaderInterface { public: @@ -81,7 +81,7 @@ class SysFileReader : public ReaderInterface { */ [[nodiscard]] auto try_read(char* buf, size_t num_bytes_to_read, size_t& num_bytes_read) -> ErrorCode override; - + // Methods [[nodiscard]] auto get_path() const -> std::string_view { return m_path; } /** diff --git a/components/core/tests/test-SysFileReader.cpp b/components/core/tests/test-SysFileReader.cpp index 6ee9e2da7..e9bef8dcd 100644 --- a/components/core/tests/test-SysFileReader.cpp +++ b/components/core/tests/test-SysFileReader.cpp @@ -7,7 +7,6 @@ #include -#include "../src/clp/ErrorCode.hpp" #include "../src/clp/FileReader.hpp" #include "../src/clp/ReaderInterface.hpp" #include "../src/clp/SysFileReader.hpp" From 948b31596066281a58a06519da149eb5b91a8c13 Mon Sep 17 00:00:00 2001 From: Haiqi Xu <14502009+haiqi96@users.noreply.github.com> Date: Fri, 16 Aug 2024 00:13:46 -0400 Subject: [PATCH 33/62] fixes --- components/core/src/clp/FileDescriptor.cpp | 7 +++++ components/core/src/clp/FileDescriptor.hpp | 9 ++++++ components/core/src/clp/SysFileReader.cpp | 9 +----- components/core/src/clp/SysFileReader.hpp | 33 ++++++++++++---------- 4 files changed, 35 insertions(+), 23 deletions(-) diff --git a/components/core/src/clp/FileDescriptor.cpp b/components/core/src/clp/FileDescriptor.cpp index 2e17bfe05..e40357651 100644 --- a/components/core/src/clp/FileDescriptor.cpp +++ b/components/core/src/clp/FileDescriptor.cpp @@ -60,4 +60,11 @@ auto FileDescriptor::get_size() const -> size_t { } return static_cast(stat_result.st_size); } + +auto FileDescriptor::try_fstat(struct stat& stat_buffer) const -> ErrorCode { + if (auto const return_value = fstat(m_fd, &stat_buffer); 0 != return_value) { + return ErrorCode_errno; + } + return ErrorCode_Success; +} } // namespace clp diff --git a/components/core/src/clp/FileDescriptor.hpp b/components/core/src/clp/FileDescriptor.hpp index a704326bf..287e0acf4 100644 --- a/components/core/src/clp/FileDescriptor.hpp +++ b/components/core/src/clp/FileDescriptor.hpp @@ -2,6 +2,7 @@ #define CLP_FILEDESCRIPTOR_HPP #include +#include #include #include @@ -83,6 +84,14 @@ class FileDescriptor { */ [[nodiscard]] auto get_open_mode() const -> OpenMode { return m_open_mode; } + /** + * Tries to stat the current file + * @param stat_buffer + * @return ErrorCode_errno on error + * @return ErrorCode_Success on success + */ + [[nodiscard]] auto try_fstat(struct stat& stat_buffer) const -> ErrorCode; + private: int m_fd{-1}; OpenMode m_open_mode; diff --git a/components/core/src/clp/SysFileReader.cpp b/components/core/src/clp/SysFileReader.cpp index 8a7a2c0f9..46e991ff3 100644 --- a/components/core/src/clp/SysFileReader.cpp +++ b/components/core/src/clp/SysFileReader.cpp @@ -28,7 +28,7 @@ auto SysFileReader::try_read(char* buf, size_t num_bytes_to_read, size_t& num_by buf += bytes_read; num_bytes_read += bytes_read; num_bytes_to_read -= bytes_read; - if (num_bytes_read == num_bytes_to_read) { + if (0 == num_bytes_to_read) { return ErrorCode_Success; } } @@ -56,11 +56,4 @@ auto SysFileReader::try_get_pos(size_t& pos) -> ErrorCode { pos = static_cast(curr_offset); return ErrorCode_Success; } - -auto SysFileReader::try_fstat(struct stat& stat_buffer) const -> ErrorCode { - if (auto const return_value = fstat(m_fd.get_raw_fd(), &stat_buffer); 0 != return_value) { - return ErrorCode_errno; - } - return ErrorCode_Success; -} } // namespace clp diff --git a/components/core/src/clp/SysFileReader.hpp b/components/core/src/clp/SysFileReader.hpp index c47133534..18efe0369 100644 --- a/components/core/src/clp/SysFileReader.hpp +++ b/components/core/src/clp/SysFileReader.hpp @@ -34,7 +34,7 @@ class SysFileReader : public ReaderInterface { // Methods [[nodiscard]] auto what() const noexcept -> char const* override { - return "FileReader operation failed"; + return "clp::SysFileReader operation failed"; } }; @@ -55,12 +55,18 @@ class SysFileReader : public ReaderInterface { // Methods implementing the ReaderInterface /** - * Tries to get the current position of the read head in the file - * @param pos Position of the read head in the file + * Tries to read up to a given number of bytes from the file + * @param buf + * @param num_bytes_to_read The number of bytes to try and read + * @param num_bytes_read The actual number of bytes read + * @return ErrorCode_BadParam if buf is invalid * @return ErrorCode_errno on error + * @return ErrorCode_EndOfFile on EOF * @return ErrorCode_Success on success */ - [[nodiscard]] auto try_get_pos(size_t& pos) -> ErrorCode override; + [[nodiscard]] auto + try_read(char* buf, size_t num_bytes_to_read, size_t& num_bytes_read) -> ErrorCode override; + /** * Tries to seek from the beginning of the file to the given position * @param pos @@ -70,27 +76,24 @@ class SysFileReader : public ReaderInterface { [[nodiscard]] auto try_seek_from_begin(size_t pos) -> ErrorCode override; /** - * Tries to read up to a given number of bytes from the file - * @param buf - * @param num_bytes_to_read The number of bytes to try and read - * @param num_bytes_read The actual number of bytes read - * @return ErrorCode_BadParam if buf is invalid + * Tries to get the current position of the read head in the file + * @param pos Position of the read head in the file * @return ErrorCode_errno on error - * @return ErrorCode_EndOfFile on EOF * @return ErrorCode_Success on success */ - [[nodiscard]] auto - try_read(char* buf, size_t num_bytes_to_read, size_t& num_bytes_read) -> ErrorCode override; + [[nodiscard]] auto try_get_pos(size_t& pos) -> ErrorCode override; + // Methods [[nodiscard]] auto get_path() const -> std::string_view { return m_path; } /** * Tries to stat the current file * @param stat_buffer - * @return ErrorCode_errno on error - * @return ErrorCode_Success on success + * @return Same as FileDescriptor::try_fstat */ - [[nodiscard]] auto try_fstat(struct stat& stat_buffer) const -> ErrorCode; + [[nodiscard]] auto try_fstat(struct stat& stat_buffer) const -> ErrorCode { + return m_fd.try_fstat(stat_buffer); + } private: std::string m_path; From ee41996281830367d71dd0db19b4ed65a764faa8 Mon Sep 17 00:00:00 2001 From: Haiqi Xu <14502009+haiqi96@users.noreply.github.com> Date: Tue, 20 Aug 2024 10:58:17 -0400 Subject: [PATCH 34/62] Use span to replace pointer arithmetic. --- components/core/src/clp/SysFileReader.cpp | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/components/core/src/clp/SysFileReader.cpp b/components/core/src/clp/SysFileReader.cpp index 46e991ff3..0d8a32aa2 100644 --- a/components/core/src/clp/SysFileReader.cpp +++ b/components/core/src/clp/SysFileReader.cpp @@ -5,9 +5,12 @@ #include #include +#include #include "ErrorCode.hpp" +using std::span; + namespace clp { auto SysFileReader::try_read(char* buf, size_t num_bytes_to_read, size_t& num_bytes_read) -> ErrorCode { @@ -16,16 +19,16 @@ auto SysFileReader::try_read(char* buf, size_t num_bytes_to_read, size_t& num_by } num_bytes_read = 0; + span dst_view{buf, num_bytes_to_read}; while (true) { - auto const bytes_read = ::read(m_fd.get_raw_fd(), buf, num_bytes_to_read); + auto const bytes_read = ::read(m_fd.get_raw_fd(), dst_view.data(), num_bytes_to_read); if (0 == bytes_read) { break; } if (bytes_read < 0) { return ErrorCode_errno; } - // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) - buf += bytes_read; + dst_view = dst_view.subspan(num_bytes_to_read); num_bytes_read += bytes_read; num_bytes_to_read -= bytes_read; if (0 == num_bytes_to_read) { From d9b4de4bb96484867e49f593cbeaba3fa394e4d5 Mon Sep 17 00:00:00 2001 From: Haiqi Xu <14502009+haiqi96@users.noreply.github.com> Date: Tue, 20 Aug 2024 11:11:24 -0400 Subject: [PATCH 35/62] Use CLP array to avoid buffer warning --- components/core/tests/test-SysFileReader.cpp | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/components/core/tests/test-SysFileReader.cpp b/components/core/tests/test-SysFileReader.cpp index e9bef8dcd..8192aa309 100644 --- a/components/core/tests/test-SysFileReader.cpp +++ b/components/core/tests/test-SysFileReader.cpp @@ -1,6 +1,5 @@ #include #include -#include #include #include #include @@ -10,6 +9,9 @@ #include "../src/clp/FileReader.hpp" #include "../src/clp/ReaderInterface.hpp" #include "../src/clp/SysFileReader.hpp" +#include "../src/clp/Array.hpp" + +using clp::Array; namespace { // Reused code starts @@ -39,12 +41,11 @@ auto get_test_input_path_relative_to_tests_dir() -> std::filesystem::path { auto get_content(clp::ReaderInterface& reader, size_t read_buf_size) -> std::vector { std::vector buf; - // NOLINTNEXTLINE(cppcoreguidelines-avoid-c-arrays) - auto const read_buf{std::make_unique(read_buf_size)}; + Array read_buf{read_buf_size}; for (bool has_more_content{true}; has_more_content;) { size_t num_bytes_read{}; - has_more_content = reader.read(read_buf.get(), read_buf_size, num_bytes_read); - std::string_view const view{read_buf.get(), num_bytes_read}; + has_more_content = reader.read(read_buf.data(), read_buf_size, num_bytes_read); + std::string_view const view{read_buf.data(), num_bytes_read}; buf.insert(buf.cend(), view.cbegin(), view.cend()); } return buf; From 707817a9bde60fa6edbbc9fdc0d6e262ebea6463 Mon Sep 17 00:00:00 2001 From: Haiqi Xu <14502009+haiqi96@users.noreply.github.com> Date: Tue, 20 Aug 2024 11:18:50 -0400 Subject: [PATCH 36/62] Linter --- components/core/tests/test-SysFileReader.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/components/core/tests/test-SysFileReader.cpp b/components/core/tests/test-SysFileReader.cpp index 8192aa309..da6d1e80b 100644 --- a/components/core/tests/test-SysFileReader.cpp +++ b/components/core/tests/test-SysFileReader.cpp @@ -6,10 +6,10 @@ #include +#include "../src/clp/Array.hpp" #include "../src/clp/FileReader.hpp" #include "../src/clp/ReaderInterface.hpp" #include "../src/clp/SysFileReader.hpp" -#include "../src/clp/Array.hpp" using clp::Array; From 2a6248c507f916887dbe6cc5c719aceb979b71b7 Mon Sep 17 00:00:00 2001 From: haiqi96 <14502009+haiqi96@users.noreply.github.com> Date: Wed, 21 Aug 2024 11:34:30 -0400 Subject: [PATCH 37/62] Apply suggestions from code review Co-authored-by: Lin Zhihao <59785146+LinZhihao-723@users.noreply.github.com> --- components/core/src/clp/FileDescriptor.cpp | 2 +- components/core/src/clp/FileDescriptor.hpp | 8 ++++---- components/core/src/clp/SysFileReader.hpp | 9 ++++----- 3 files changed, 9 insertions(+), 10 deletions(-) diff --git a/components/core/src/clp/FileDescriptor.cpp b/components/core/src/clp/FileDescriptor.cpp index e40357651..88932060d 100644 --- a/components/core/src/clp/FileDescriptor.cpp +++ b/components/core/src/clp/FileDescriptor.cpp @@ -62,7 +62,7 @@ auto FileDescriptor::get_size() const -> size_t { } auto FileDescriptor::try_fstat(struct stat& stat_buffer) const -> ErrorCode { - if (auto const return_value = fstat(m_fd, &stat_buffer); 0 != return_value) { + if (0 != fstat(m_fd, &stat_buffer)) { return ErrorCode_errno; } return ErrorCode_Success; diff --git a/components/core/src/clp/FileDescriptor.hpp b/components/core/src/clp/FileDescriptor.hpp index 287e0acf4..fcc40ef01 100644 --- a/components/core/src/clp/FileDescriptor.hpp +++ b/components/core/src/clp/FileDescriptor.hpp @@ -85,10 +85,10 @@ class FileDescriptor { [[nodiscard]] auto get_open_mode() const -> OpenMode { return m_open_mode; } /** - * Tries to stat the current file - * @param stat_buffer - * @return ErrorCode_errno on error - * @return ErrorCode_Success on success + * Obtains information about the open file associated with the underlying file descriptor. + * @param stat_buffer Returns the stat results. + * @return ErrorCode_Success on success. + * @return ErrorCode_errno on error. */ [[nodiscard]] auto try_fstat(struct stat& stat_buffer) const -> ErrorCode; diff --git a/components/core/src/clp/SysFileReader.hpp b/components/core/src/clp/SysFileReader.hpp index 18efe0369..6fcfe694e 100644 --- a/components/core/src/clp/SysFileReader.hpp +++ b/components/core/src/clp/SysFileReader.hpp @@ -55,7 +55,7 @@ class SysFileReader : public ReaderInterface { // Methods implementing the ReaderInterface /** - * Tries to read up to a given number of bytes from the file + * Tries to read up to a given number of bytes from the file. * @param buf * @param num_bytes_to_read The number of bytes to try and read * @param num_bytes_read The actual number of bytes read @@ -68,7 +68,7 @@ class SysFileReader : public ReaderInterface { try_read(char* buf, size_t num_bytes_to_read, size_t& num_bytes_read) -> ErrorCode override; /** - * Tries to seek from the beginning of the file to the given position + * Tries to seek to the given position, relative to the beginning of the file. * @param pos * @return ErrorCode_errno on error * @return ErrorCode_Success on success @@ -76,8 +76,7 @@ class SysFileReader : public ReaderInterface { [[nodiscard]] auto try_seek_from_begin(size_t pos) -> ErrorCode override; /** - * Tries to get the current position of the read head in the file - * @param pos Position of the read head in the file + @param pos Returns the position of the read head in the buffer. * @return ErrorCode_errno on error * @return ErrorCode_Success on success */ @@ -89,7 +88,7 @@ class SysFileReader : public ReaderInterface { /** * Tries to stat the current file * @param stat_buffer - * @return Same as FileDescriptor::try_fstat + * @return Same as `FileDescriptor::try_fstat` */ [[nodiscard]] auto try_fstat(struct stat& stat_buffer) const -> ErrorCode { return m_fd.try_fstat(stat_buffer); From aff7a2e14817989d035d781bf2ad48b4b44a5a5a Mon Sep 17 00:00:00 2001 From: Haiqi Xu <14502009+haiqi96@users.noreply.github.com> Date: Wed, 21 Aug 2024 13:19:56 -0400 Subject: [PATCH 38/62] Fixes --- components/core/src/clp/FileDescriptor.cpp | 4 ++-- components/core/src/clp/FileDescriptor.hpp | 2 +- components/core/src/clp/SysFileReader.cpp | 12 ++++-------- components/core/src/clp/SysFileReader.hpp | 8 ++++---- 4 files changed, 11 insertions(+), 15 deletions(-) diff --git a/components/core/src/clp/FileDescriptor.cpp b/components/core/src/clp/FileDescriptor.cpp index 88932060d..a5c359477 100644 --- a/components/core/src/clp/FileDescriptor.cpp +++ b/components/core/src/clp/FileDescriptor.cpp @@ -50,7 +50,7 @@ FileDescriptor::~FileDescriptor() { auto FileDescriptor::get_size() const -> size_t { struct stat stat_result {}; - if (0 != fstat(m_fd, &stat_result)) { + if (0 != stat(stat_result)) { throw OperationFailed( ErrorCode_errno, __FILE__, @@ -61,7 +61,7 @@ auto FileDescriptor::get_size() const -> size_t { return static_cast(stat_result.st_size); } -auto FileDescriptor::try_fstat(struct stat& stat_buffer) const -> ErrorCode { +auto FileDescriptor::stat(struct stat& stat_buffer) const -> ErrorCode { if (0 != fstat(m_fd, &stat_buffer)) { return ErrorCode_errno; } diff --git a/components/core/src/clp/FileDescriptor.hpp b/components/core/src/clp/FileDescriptor.hpp index fcc40ef01..6770f3aef 100644 --- a/components/core/src/clp/FileDescriptor.hpp +++ b/components/core/src/clp/FileDescriptor.hpp @@ -90,7 +90,7 @@ class FileDescriptor { * @return ErrorCode_Success on success. * @return ErrorCode_errno on error. */ - [[nodiscard]] auto try_fstat(struct stat& stat_buffer) const -> ErrorCode; + [[nodiscard]] auto stat(struct stat& stat_buffer) const -> ErrorCode; private: int m_fd{-1}; diff --git a/components/core/src/clp/SysFileReader.cpp b/components/core/src/clp/SysFileReader.cpp index 0d8a32aa2..31d99d7ff 100644 --- a/components/core/src/clp/SysFileReader.cpp +++ b/components/core/src/clp/SysFileReader.cpp @@ -20,22 +20,18 @@ auto SysFileReader::try_read(char* buf, size_t num_bytes_to_read, size_t& num_by num_bytes_read = 0; span dst_view{buf, num_bytes_to_read}; - while (true) { - auto const bytes_read = ::read(m_fd.get_raw_fd(), dst_view.data(), num_bytes_to_read); + while (false == dst_view.empty()) { + auto const bytes_read = ::read(m_fd.get_raw_fd(), dst_view.data(), dst_view.size()); if (0 == bytes_read) { break; } if (bytes_read < 0) { return ErrorCode_errno; } - dst_view = dst_view.subspan(num_bytes_to_read); num_bytes_read += bytes_read; - num_bytes_to_read -= bytes_read; - if (0 == num_bytes_to_read) { - return ErrorCode_Success; - } + dst_view = dst_view.subspan(bytes_read); } - if (0 == num_bytes_read) { + if (dst_view.size() == num_bytes_to_read) { return ErrorCode_EndOfFile; } return ErrorCode_Success; diff --git a/components/core/src/clp/SysFileReader.hpp b/components/core/src/clp/SysFileReader.hpp index 6fcfe694e..c5cfaddc1 100644 --- a/components/core/src/clp/SysFileReader.hpp +++ b/components/core/src/clp/SysFileReader.hpp @@ -86,12 +86,12 @@ class SysFileReader : public ReaderInterface { [[nodiscard]] auto get_path() const -> std::string_view { return m_path; } /** - * Tries to stat the current file - * @param stat_buffer - * @return Same as `FileDescriptor::try_fstat` + * Obtains information about the open file associated with the underlying file descriptor. + * @param stat_buffer Returns the stat results. + * @return Same as `FileDescriptor::fstat` */ [[nodiscard]] auto try_fstat(struct stat& stat_buffer) const -> ErrorCode { - return m_fd.try_fstat(stat_buffer); + return m_fd.stat(stat_buffer); } private: From f807eeab2f6c2be87431039b24abe644d45536e2 Mon Sep 17 00:00:00 2001 From: haiqi96 <14502009+haiqi96@users.noreply.github.com> Date: Wed, 21 Aug 2024 15:21:14 -0400 Subject: [PATCH 39/62] Rename class and slight modify the comments --- components/core/CMakeLists.txt | 6 ++-- ...ileReader.cpp => FileDescriptorReader.cpp} | 8 ++--- ...ileReader.hpp => FileDescriptorReader.hpp} | 30 +++++++++---------- ...ader.cpp => test-FileDescriptorReader.cpp} | 10 +++---- 4 files changed, 27 insertions(+), 27 deletions(-) rename components/core/src/clp/{SysFileReader.cpp => FileDescriptorReader.cpp} (82%) rename components/core/src/clp/{SysFileReader.hpp => FileDescriptorReader.hpp} (75%) rename components/core/tests/{test-SysFileReader.cpp => test-FileDescriptorReader.cpp} (87%) diff --git a/components/core/CMakeLists.txt b/components/core/CMakeLists.txt index 8dd8753ee..8121e3f73 100644 --- a/components/core/CMakeLists.txt +++ b/components/core/CMakeLists.txt @@ -345,6 +345,8 @@ set(SOURCE_FILES_unitTest src/clp/ffi/Value.hpp src/clp/FileDescriptor.cpp src/clp/FileDescriptor.hpp + src/clp/FileDescriptorReader.cpp + src/clp/FileDescriptorReader.hpp src/clp/FileReader.cpp src/clp/FileReader.hpp src/clp/FileWriter.cpp @@ -413,8 +415,6 @@ set(SOURCE_FILES_unitTest src/clp/SQLiteDB.hpp src/clp/SQLitePreparedStatement.cpp src/clp/SQLitePreparedStatement.hpp - src/clp/SysFileReader.cpp - src/clp/SysFileReader.hpp src/clp/Stopwatch.cpp src/clp/Stopwatch.hpp src/clp/streaming_archive/ArchiveMetadata.cpp @@ -483,6 +483,7 @@ set(SOURCE_FILES_unitTest tests/test-encoding_methods.cpp tests/test-ffi_KeyValuePairLogEvent.cpp tests/test-ffi_SchemaTree.cpp + tests/test-FileDescriptorReader.cpp tests/test-Grep.cpp tests/test-hash_utils.cpp tests/test-ir_encoding_methods.cpp @@ -501,7 +502,6 @@ set(SOURCE_FILES_unitTest tests/test-Stopwatch.cpp tests/test-StreamingCompression.cpp tests/test-string_utils.cpp - tests/test-SysFileReader.cpp tests/test-TimestampPattern.cpp tests/test-utf8_utils.cpp tests/test-Utils.cpp diff --git a/components/core/src/clp/SysFileReader.cpp b/components/core/src/clp/FileDescriptorReader.cpp similarity index 82% rename from components/core/src/clp/SysFileReader.cpp rename to components/core/src/clp/FileDescriptorReader.cpp index 31d99d7ff..4beb926a9 100644 --- a/components/core/src/clp/SysFileReader.cpp +++ b/components/core/src/clp/FileDescriptorReader.cpp @@ -1,4 +1,4 @@ -#include "SysFileReader.hpp" +#include "FileDescriptorReader.hpp" #include #include @@ -12,7 +12,7 @@ using std::span; namespace clp { -auto SysFileReader::try_read(char* buf, size_t num_bytes_to_read, size_t& num_bytes_read) +auto FileDescriptorReader::try_read(char* buf, size_t num_bytes_to_read, size_t& num_bytes_read) -> ErrorCode { if (nullptr == buf) { return ErrorCode_BadParam; @@ -37,7 +37,7 @@ auto SysFileReader::try_read(char* buf, size_t num_bytes_to_read, size_t& num_by return ErrorCode_Success; } -auto SysFileReader::try_seek_from_begin(size_t pos) -> ErrorCode { +auto FileDescriptorReader::try_seek_from_begin(size_t pos) -> ErrorCode { if (auto const offset = lseek(m_fd.get_raw_fd(), static_cast(pos), SEEK_SET); static_cast(-1) == offset) { @@ -47,7 +47,7 @@ auto SysFileReader::try_seek_from_begin(size_t pos) -> ErrorCode { return ErrorCode_Success; } -auto SysFileReader::try_get_pos(size_t& pos) -> ErrorCode { +auto FileDescriptorReader::try_get_pos(size_t& pos) -> ErrorCode { auto const curr_offset = lseek(m_fd.get_raw_fd(), 0, SEEK_CUR); if (static_cast(-1) == curr_offset) { return ErrorCode_errno; diff --git a/components/core/src/clp/SysFileReader.hpp b/components/core/src/clp/FileDescriptorReader.hpp similarity index 75% rename from components/core/src/clp/SysFileReader.hpp rename to components/core/src/clp/FileDescriptorReader.hpp index c5cfaddc1..3978d63bf 100644 --- a/components/core/src/clp/SysFileReader.hpp +++ b/components/core/src/clp/FileDescriptorReader.hpp @@ -1,5 +1,5 @@ -#ifndef CLP_SYSFILEREADER_HPP -#define CLP_SYSFILEREADER_HPP +#ifndef CLP_FILEDESCRIPTORREADER_HPP +#define CLP_FILEDESCRIPTORREADER_HPP #include @@ -15,15 +15,15 @@ namespace clp { /** - * Class for performing reads from an on-disk file directly using C style system call. - * Unlike reader classes using `FILE` stream interface, This class operates on raw file descriptor - * and does not internally buffer any data. Instead, the user of this class is expected to buffer - * and read the data efficiently. + * Class for performing reads from an on-disk file directly using clp::FileDescriptor and C style + * system call. Unlike clp::FileReader that relies on `FILE` stream interface to buffer read data, + * This class does not internally buffer any data. Instead, the user of this class is expected to + * buffer and read the data efficiently. * * Note: If you don't plan to handle the data buffering yourself, do not use this class. Use * `FileReader` instead. */ -class SysFileReader : public ReaderInterface { +class FileDescriptorReader : public ReaderInterface { public: // Types class OperationFailed : public TraceableException { @@ -34,24 +34,24 @@ class SysFileReader : public ReaderInterface { // Methods [[nodiscard]] auto what() const noexcept -> char const* override { - return "clp::SysFileReader operation failed"; + return "clp::FileDescriptorReader operation failed"; } }; - explicit SysFileReader(std::string path) + explicit FileDescriptorReader(std::string path) : m_path{std::move(path)}, m_fd{m_path, FileDescriptor::OpenMode::ReadOnly} {} // Explicitly disable copy constructor and assignment operator - SysFileReader(SysFileReader const&) = delete; - auto operator=(SysFileReader const&) -> SysFileReader& = delete; + FileDescriptorReader(FileDescriptorReader const&) = delete; + auto operator=(FileDescriptorReader const&) -> FileDescriptorReader& = delete; // Explicitly disable move constructor and assignment operator - SysFileReader(SysFileReader&&) = delete; - auto operator=(SysFileReader&&) -> SysFileReader& = delete; + FileDescriptorReader(FileDescriptorReader&&) = delete; + auto operator=(FileDescriptorReader&&) -> FileDescriptorReader& = delete; // Destructor - ~SysFileReader() override = default; + ~FileDescriptorReader() override = default; // Methods implementing the ReaderInterface /** @@ -100,4 +100,4 @@ class SysFileReader : public ReaderInterface { }; } // namespace clp -#endif // CLP_SYSFILEREADER_HPP +#endif // CLP_FILEDESCRIPTORREADER_HPP diff --git a/components/core/tests/test-SysFileReader.cpp b/components/core/tests/test-FileDescriptorReader.cpp similarity index 87% rename from components/core/tests/test-SysFileReader.cpp rename to components/core/tests/test-FileDescriptorReader.cpp index da6d1e80b..eba592e85 100644 --- a/components/core/tests/test-SysFileReader.cpp +++ b/components/core/tests/test-FileDescriptorReader.cpp @@ -7,9 +7,9 @@ #include #include "../src/clp/Array.hpp" +#include "../src/clp/FileDescriptorReader.hpp" #include "../src/clp/FileReader.hpp" #include "../src/clp/ReaderInterface.hpp" -#include "../src/clp/SysFileReader.hpp" using clp::Array; @@ -54,16 +54,16 @@ auto get_content(clp::ReaderInterface& reader, size_t read_buf_size) -> std::vec // Reused code ends -TEST_CASE("sys_file_reader_basic", "[SysFileReader]") { +TEST_CASE("file_descriptor_reader_basic", "[FileDescriptorReader]") { clp::FileReader ref_reader{get_test_input_local_path()}; auto const expected{get_content(ref_reader)}; - clp::SysFileReader reader{get_test_input_local_path()}; + clp::FileDescriptorReader reader{get_test_input_local_path()}; auto const actual{get_content(reader)}; REQUIRE((actual == expected)); } -TEST_CASE("sys_file_reader_with_offset_and_seek", "[SysFileReader]") { +TEST_CASE("file_descriptor_reader_with_offset_and_seek", "[FileDescriptorReader]") { constexpr size_t cOffset{319}; clp::FileReader ref_reader{get_test_input_local_path()}; @@ -71,7 +71,7 @@ TEST_CASE("sys_file_reader_with_offset_and_seek", "[SysFileReader]") { auto const expected{get_content(ref_reader)}; auto const ref_end_pos{ref_reader.get_pos()}; - clp::SysFileReader reader(get_test_input_local_path()); + clp::FileDescriptorReader reader(get_test_input_local_path()); reader.seek_from_begin(cOffset); auto const actual{get_content(reader)}; auto const actual_end_pos{reader.get_pos()}; From 435e12a7bf3eb28ce56f0647f5b7dc0a635ad637 Mon Sep 17 00:00:00 2001 From: haiqi96 <14502009+haiqi96@users.noreply.github.com> Date: Wed, 21 Aug 2024 15:26:44 -0400 Subject: [PATCH 40/62] small touch --- components/core/src/clp/FileDescriptorReader.hpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/components/core/src/clp/FileDescriptorReader.hpp b/components/core/src/clp/FileDescriptorReader.hpp index 3978d63bf..51d7f97ec 100644 --- a/components/core/src/clp/FileDescriptorReader.hpp +++ b/components/core/src/clp/FileDescriptorReader.hpp @@ -15,9 +15,9 @@ namespace clp { /** - * Class for performing reads from an on-disk file directly using clp::FileDescriptor and C style - * system call. Unlike clp::FileReader that relies on `FILE` stream interface to buffer read data, - * This class does not internally buffer any data. Instead, the user of this class is expected to + * Class for performing direct reads from an on-disk file using clp::FileDescriptor and C-style + * system call. Unlike clp::FileReader, which uses on `FILE` stream interface to buffer read data, + * this class does not buffer data internally. Instead, the user of this class is expected to * buffer and read the data efficiently. * * Note: If you don't plan to handle the data buffering yourself, do not use this class. Use From 0c99da6769df6cc138b84790ce6cee3f4fc0a892 Mon Sep 17 00:00:00 2001 From: haiqi96 <14502009+haiqi96@users.noreply.github.com> Date: Wed, 21 Aug 2024 15:44:46 -0400 Subject: [PATCH 41/62] missing changes in merge --- components/core/CMakeLists.txt | 2 - components/core/src/clp/SysFileReader.cpp | 67 ------------- components/core/src/clp/SysFileReader.hpp | 93 ------------------- components/core/src/clp/clp/CMakeLists.txt | 2 - .../core/tests/test-BufferedFileReader.cpp | 4 +- 5 files changed, 2 insertions(+), 166 deletions(-) delete mode 100644 components/core/src/clp/SysFileReader.cpp delete mode 100644 components/core/src/clp/SysFileReader.hpp diff --git a/components/core/CMakeLists.txt b/components/core/CMakeLists.txt index aac60678e..8121e3f73 100644 --- a/components/core/CMakeLists.txt +++ b/components/core/CMakeLists.txt @@ -454,8 +454,6 @@ set(SOURCE_FILES_unitTest src/clp/streaming_compression/zstd/Decompressor.hpp src/clp/StringReader.cpp src/clp/StringReader.hpp - src/clp/SysFileReader.cpp - src/clp/SysFileReader.hpp src/clp/Thread.cpp src/clp/Thread.hpp src/clp/time_types.hpp diff --git a/components/core/src/clp/SysFileReader.cpp b/components/core/src/clp/SysFileReader.cpp deleted file mode 100644 index 67df6e93d..000000000 --- a/components/core/src/clp/SysFileReader.cpp +++ /dev/null @@ -1,67 +0,0 @@ -#include "SysFileReader.hpp" - -#include -#include -#include -#include - -#include - -using std::string; - -namespace clp { -auto SysFileReader::try_read(char* buf, size_t num_bytes_to_read, size_t& num_bytes_read) - -> ErrorCode { - if (nullptr == buf) { - return ErrorCode_BadParam; - } - - num_bytes_read = 0; - while (true) { - auto const bytes_read = ::read(m_fd.get_raw_fd(), buf, num_bytes_to_read); - if (0 == bytes_read) { - break; - } - if (bytes_read < 0) { - return ErrorCode_errno; - } - - buf += bytes_read; - num_bytes_read += bytes_read; - num_bytes_to_read -= bytes_read; - if (num_bytes_read == num_bytes_to_read) { - return ErrorCode_Success; - } - } - if (0 == num_bytes_read) { - return ErrorCode_EndOfFile; - } - return ErrorCode_Success; -} - -auto SysFileReader::try_seek_from_begin(size_t pos) -> ErrorCode { - if (auto const offset = lseek(m_fd.get_raw_fd(), static_cast(pos), SEEK_SET); - -1 == offset) - { - return ErrorCode_errno; - } - - return ErrorCode_Success; -} - -auto SysFileReader::try_get_pos(size_t& pos) -> ErrorCode { - pos = lseek(m_fd.get_raw_fd(), 0, SEEK_CUR); - if (static_cast(-1) == pos) { - return ErrorCode_errno; - } - - return ErrorCode_Success; -} - -auto SysFileReader::try_fstat(struct stat& stat_buffer) const -> ErrorCode { - if (auto const return_value = fstat(m_fd.get_raw_fd(), &stat_buffer); 0 != return_value) { - return ErrorCode_errno; - } - return ErrorCode_Success; -} -} // namespace clp diff --git a/components/core/src/clp/SysFileReader.hpp b/components/core/src/clp/SysFileReader.hpp deleted file mode 100644 index 0026d75eb..000000000 --- a/components/core/src/clp/SysFileReader.hpp +++ /dev/null @@ -1,93 +0,0 @@ -#ifndef CLP_SYSFILEREADER_HPP -#define CLP_SYSFILEREADER_HPP - -#include - -#include -#include -#include - -#include "ErrorCode.hpp" -#include "FileDescriptor.hpp" -#include "ReaderInterface.hpp" -#include "TraceableException.hpp" - -namespace clp { -/** - * Class for performing reads from an on-disk file directly using C style system call. - * Unlike reader classes using FILE stream interface, This class operates on raw fd and does not - * internally buffer any data. Instead, the user of this class is expected to buffer and read the - * data efficiently. - * - * Note: If you don't plan to handle the data buffering yourself, do not use this class. Use - * FileReader instead. - */ -class SysFileReader : public ReaderInterface { -public: - // Types - class OperationFailed : public TraceableException { - public: - // Constructors - OperationFailed(ErrorCode error_code, char const* const filename, int line_number) - : TraceableException(error_code, filename, line_number) {} - - // Methods - char const* what() const noexcept override { return "FileReader operation failed"; } - }; - - explicit SysFileReader(std::string path) - : m_path{std::move(path)}, - m_fd{m_path, FileDescriptor::OpenMode::ReadOnly} {} - - // Explicitly disable copy and move constructor and assignment operator - SysFileReader(SysFileReader const&) = delete; - SysFileReader& operator=(SysFileReader const&) = delete; - SysFileReader(SysFileReader&&) = delete; - SysFileReader& operator=(SysFileReader&&) = delete; - - // Methods implementing the ReaderInterface - /** - * Tries to get the current position of the read head in the file - * @param pos Position of the read head in the file - * @return ErrorCode_errno on error - * @return ErrorCode_Success on success - */ - [[nodiscard]] auto try_get_pos(size_t& pos) -> ErrorCode override; - /** - * Tries to seek from the beginning of the file to the given position - * @param pos - * @return ErrorCode_errno on error - * @return ErrorCode_Success on success - */ - [[nodiscard]] auto try_seek_from_begin(size_t pos) -> ErrorCode override; - - /** - * Tries to read up to a given number of bytes from the file - * @param buf - * @param num_bytes_to_read The number of bytes to try and read - * @param num_bytes_read The actual number of bytes read - * @return ErrorCode_BadParam if buf is invalid - * @return ErrorCode_errno on error - * @return ErrorCode_EndOfFile on EOF - * @return ErrorCode_Success on success - */ - [[nodiscard]] auto - try_read(char* buf, size_t num_bytes_to_read, size_t& num_bytes_read) -> ErrorCode override; - - [[nodiscard]] auto get_path() const -> std::string_view { return m_path; } - - /** - * Tries to stat the current file - * @param stat_buffer - * @return ErrorCode_errno on error - * @return ErrorCode_Success on success - */ - [[nodiscard]] auto try_fstat(struct stat& stat_buffer) const -> ErrorCode; - -private: - std::string m_path; - FileDescriptor m_fd; -}; -} // namespace clp - -#endif // CLP_SYSFILEREADER_HPP diff --git a/components/core/src/clp/clp/CMakeLists.txt b/components/core/src/clp/clp/CMakeLists.txt index 483b8fb32..53342f3a9 100644 --- a/components/core/src/clp/clp/CMakeLists.txt +++ b/components/core/src/clp/clp/CMakeLists.txt @@ -130,8 +130,6 @@ set( ../streaming_compression/zstd/Decompressor.hpp ../StringReader.cpp ../StringReader.hpp - ../SysFileReader.cpp - ../SysFileReader.hpp ../time_types.hpp ../TimestampPattern.cpp ../TimestampPattern.hpp diff --git a/components/core/tests/test-BufferedFileReader.cpp b/components/core/tests/test-BufferedFileReader.cpp index 72b7acceb..f29714cff 100644 --- a/components/core/tests/test-BufferedFileReader.cpp +++ b/components/core/tests/test-BufferedFileReader.cpp @@ -4,19 +4,19 @@ #include #include "../src/clp/BufferedFileReader.hpp" +#include "../src/clp/FileDescriptorReader.hpp" #include "../src/clp/FileReader.hpp" #include "../src/clp/FileWriter.hpp" -#include "../src/clp/FileDescriptorReader.hpp" using clp::BufferedFileReader; using clp::ErrorCode; using clp::ErrorCode_EndOfFile; using clp::ErrorCode_Success; using clp::ErrorCode_Unsupported; +using clp::FileDescriptorReader; using clp::FileReader; using clp::FileWriter; using clp::ReaderInterface; -using clp::FileDescriptorReader; using std::make_unique; using std::string; From fd4eb366f40652fbe6e7ea95d4f06b6b43d5b224 Mon Sep 17 00:00:00 2001 From: haiqi96 <14502009+haiqi96@users.noreply.github.com> Date: Thu, 22 Aug 2024 15:38:14 -0400 Subject: [PATCH 42/62] Fixes --- .../core/src/clp/BufferedFileReader.cpp | 57 +++++++++++-------- .../core/src/clp/BufferedFileReader.hpp | 7 ++- 2 files changed, 39 insertions(+), 25 deletions(-) diff --git a/components/core/src/clp/BufferedFileReader.cpp b/components/core/src/clp/BufferedFileReader.cpp index 347da3e3c..6483225f4 100644 --- a/components/core/src/clp/BufferedFileReader.cpp +++ b/components/core/src/clp/BufferedFileReader.cpp @@ -1,14 +1,22 @@ #include "BufferedFileReader.hpp" -#include - +#include #include - -#include - +#include +#include +#include +#include +#include + +#include "BufferReader.hpp" +#include "ErrorCode.hpp" #include "math_utils.hpp" +#include "ReaderInterface.hpp" +using std::make_unique; +using std::span; using std::string; +using std::unique_ptr; namespace clp { namespace { @@ -61,7 +69,7 @@ BufferedFileReader::BufferedFileReader( } m_file_pos = 0; m_buffer_begin_pos = 0; - m_buffer_reader.emplace(m_buffer.data(), 0); + m_buffer_reader = make_unique(m_buffer.data(), 0); m_highest_read_pos = 0; } @@ -141,7 +149,7 @@ auto BufferedFileReader::try_seek_from_begin(size_t pos) -> ErrorCode { { return error_code; } - m_buffer_reader.emplace(m_buffer.data(), 0); + m_buffer_reader = make_unique(m_buffer.data(), 0); m_buffer_begin_pos = pos; } else { auto const num_bytes_to_refill = pos - get_buffer_end_pos(); @@ -173,31 +181,28 @@ auto BufferedFileReader::try_read(char* buf, size_t num_bytes_to_read, size_t& n return ErrorCode_Success; } + span dst_view{buf, num_bytes_to_read}; num_bytes_read = 0; - while (true) { + while (false == dst_view.empty()) { size_t bytes_read{0}; - auto error_code = m_buffer_reader->try_read(buf, num_bytes_to_read, bytes_read); + auto error_code = m_buffer_reader->try_read(dst_view.data(), dst_view.size(), bytes_read); if (ErrorCode_Success == error_code) { - buf += bytes_read; + dst_view = dst_view.subspan(bytes_read); num_bytes_read += bytes_read; - num_bytes_to_read -= bytes_read; update_file_pos(m_file_pos + bytes_read); - if (0 == num_bytes_to_read) { + } else if (ErrorCode_EndOfFile == error_code) { + error_code = refill_reader_buffer(m_base_buffer_size); + if (ErrorCode_EndOfFile == error_code) { break; } - } else if (ErrorCode_EndOfFile != error_code) { - return error_code; - } - - error_code = refill_reader_buffer(m_base_buffer_size); - if (ErrorCode_EndOfFile == error_code) { - break; - } - if (ErrorCode_Success != error_code) { + if (ErrorCode_Success != error_code) { + return error_code; + } + } else { return error_code; } } - if (0 == num_bytes_read) { + if (dst_view.size() == num_bytes_to_read) { return ErrorCode_EndOfFile; } return ErrorCode_Success; @@ -287,7 +292,11 @@ auto BufferedFileReader::refill_reader_buffer(size_t num_bytes_to_refill) -> Err return error_code; } // NOTE: We still want to set the buffer reader if no bytes were read on EOF - m_buffer_reader.emplace(m_buffer.data(), next_buffer_pos + num_bytes_read, next_buffer_pos); + m_buffer_reader = make_unique( + m_buffer.data(), + next_buffer_pos + num_bytes_read, + next_buffer_pos + ); m_buffer_begin_pos = next_buffer_begin_pos; return error_code; } @@ -301,7 +310,7 @@ auto BufferedFileReader::drop_content_before_current_pos() -> void { m_buffer.resize(new_buffer_size); m_buffer_begin_pos += buffer_reader_pos; - m_buffer_reader.emplace(m_buffer.data(), new_data_size); + m_buffer_reader = make_unique(m_buffer.data(), new_data_size); } auto BufferedFileReader::update_file_pos(size_t pos) -> void { diff --git a/components/core/src/clp/BufferedFileReader.hpp b/components/core/src/clp/BufferedFileReader.hpp index ed17211b3..1403bb057 100644 --- a/components/core/src/clp/BufferedFileReader.hpp +++ b/components/core/src/clp/BufferedFileReader.hpp @@ -1,9 +1,11 @@ #ifndef CLP_BUFFEREDFILEREADER_HPP #define CLP_BUFFEREDFILEREADER_HPP +#include #include #include #include +#include #include #include "BufferReader.hpp" @@ -77,6 +79,9 @@ class BufferedFileReader : public ReaderInterface { BufferedFileReader(std::unique_ptr reader_interface) : BufferedFileReader(std::move(reader_interface), cDefaultBufferSize) {} + // Destructor + ~BufferedFileReader() override = default; + // Disable copy/move construction/assignment BufferedFileReader(BufferedFileReader const&) = delete; BufferedFileReader(BufferedFileReader&&) = delete; @@ -228,7 +233,7 @@ class BufferedFileReader : public ReaderInterface { // Buffer specific data std::vector m_buffer; size_t m_base_buffer_size; - std::optional m_buffer_reader; + std::unique_ptr m_buffer_reader; size_t m_buffer_begin_pos{0}; // Variables for checkpoint support From 5c301d7fb2a9b9bbe29b32855841c5863757b5e9 Mon Sep 17 00:00:00 2001 From: haiqi96 <14502009+haiqi96@users.noreply.github.com> Date: Thu, 22 Aug 2024 16:13:53 -0400 Subject: [PATCH 43/62] A few more fixes --- .../core/src/clp/clp/FileCompressor.cpp | 2 ++ .../core/src/clp/clp/FileCompressor.hpp | 21 ++++++++++--------- 2 files changed, 13 insertions(+), 10 deletions(-) diff --git a/components/core/src/clp/clp/FileCompressor.cpp b/components/core/src/clp/clp/FileCompressor.cpp index c676e93a8..ade077a70 100644 --- a/components/core/src/clp/clp/FileCompressor.cpp +++ b/components/core/src/clp/clp/FileCompressor.cpp @@ -2,7 +2,9 @@ #include #include +#include #include +#include #include #include diff --git a/components/core/src/clp/clp/FileCompressor.hpp b/components/core/src/clp/clp/FileCompressor.hpp index 24949fb99..ac5a075d3 100644 --- a/components/core/src/clp/clp/FileCompressor.hpp +++ b/components/core/src/clp/clp/FileCompressor.hpp @@ -54,6 +54,17 @@ class FileCompressor { static constexpr size_t cUtfMaxValidationLen = 4096; // Methods + /** + * Parses and encodes content from the given reader into the given archive_writer. + * @param target_data_size_of_dicts + * @param archive_user_config + * @param target_encoded_file_size + * @param path_for_compression + * @param group_id + * @param archive_writer + * @param reader + * @param use_heuristic + */ auto parse_and_encode( size_t target_data_size_of_dicts, streaming_archive::writer::Archive::UserConfig& archive_user_config, @@ -65,16 +76,6 @@ class FileCompressor { bool use_heuristic ) -> void; - /** - * Parses and encodes content from the given reader into the given archive_writer - * @param target_data_size_of_dicts - * @param archive_user_config - * @param target_encoded_file_size - * @param path_for_compression - * @param group_id - * @param archive_writer - * @param reader - */ void parse_and_encode_with_library( size_t target_data_size_of_dicts, streaming_archive::writer::Archive::UserConfig& archive_user_config, From ce7376bf24d80beeb3f25dadd63c13f65b00315f Mon Sep 17 00:00:00 2001 From: haiqi96 <14502009+haiqi96@users.noreply.github.com> Date: Thu, 22 Aug 2024 16:38:43 -0400 Subject: [PATCH 44/62] A small change --- .../core/src/clp/BufferedFileReader.cpp | 6 +-- .../core/tests/test-BufferedFileReader.cpp | 41 ++++++++++++++++++- 2 files changed, 41 insertions(+), 6 deletions(-) diff --git a/components/core/src/clp/BufferedFileReader.cpp b/components/core/src/clp/BufferedFileReader.cpp index 6483225f4..6df61c457 100644 --- a/components/core/src/clp/BufferedFileReader.cpp +++ b/components/core/src/clp/BufferedFileReader.cpp @@ -63,11 +63,7 @@ BufferedFileReader::BufferedFileReader( m_base_buffer_size = base_buffer_size; m_buffer.resize(m_base_buffer_size); - // TODO: anyway to get rid of this? - if (0 != m_file_reader->get_pos()) { - throw OperationFailed(ErrorCode_Failure, __FILENAME__, __LINE__); - } - m_file_pos = 0; + m_file_pos = m_file_reader->get_pos(); m_buffer_begin_pos = 0; m_buffer_reader = make_unique(m_buffer.data(), 0); m_highest_read_pos = 0; diff --git a/components/core/tests/test-BufferedFileReader.cpp b/components/core/tests/test-BufferedFileReader.cpp index f29714cff..83375fcba 100644 --- a/components/core/tests/test-BufferedFileReader.cpp +++ b/components/core/tests/test-BufferedFileReader.cpp @@ -16,7 +16,6 @@ using clp::ErrorCode_Unsupported; using clp::FileDescriptorReader; using clp::FileReader; using clp::FileWriter; -using clp::ReaderInterface; using std::make_unique; using std::string; @@ -297,3 +296,43 @@ TEST_CASE("Test delimiter", "[BufferedFileReader]") { boost::filesystem::remove(test_file_path); } + +TEST_CASE("Test reading with beginning offset", "[BufferedFileReader]") { + // Initialize data for testing + size_t const test_data_size = 1L * 1024 * 1024 + 1; // 1MB + size_t const cReaderBeginOffset = 6; + auto test_data_uniq_ptr = make_unique>(); + auto& test_data = *test_data_uniq_ptr; + for (size_t i = 0; i < test_data.size(); ++i) { + test_data[i] = static_cast('a' + (std::rand() % (cNumAlphabets))); + } + + // Write to test file + string const test_file_path{"BufferedFileReader.offset.test"}; + FileWriter file_writer; + file_writer.open(test_file_path, FileWriter::OpenMode::CREATE_FOR_WRITING); + file_writer.write(test_data.data(), test_data_size); + file_writer.close(); + + // Instantiate BufferedFileReader and the reference FileReader from a non-zero pos + auto fd_reader = make_unique(test_file_path); + fd_reader->seek_from_begin(cReaderBeginOffset); + BufferedFileReader reader{std::move(fd_reader)}; + + FileReader ref_file_reader{test_file_path}; + ref_file_reader.seek_from_begin(cReaderBeginOffset); + + ErrorCode error_code{ErrorCode_Success}; + auto delimiter = (char)('a' + (std::rand() % (cNumAlphabets))); + string ref_string; + string test_string; + // Validate that a FileReader and a BufferedFileReader return the same strings. + while (ErrorCode_EndOfFile != error_code) { + error_code = ref_file_reader.try_read_to_delimiter(delimiter, true, false, ref_string); + auto error_code2 = reader.try_read_to_delimiter(delimiter, true, false, test_string); + REQUIRE(error_code2 == error_code); + REQUIRE(test_string == ref_string); + } + + boost::filesystem::remove(test_file_path); +} From 1bbf2a4c2948f71cb9155cb6b5ba60e8662cf059 Mon Sep 17 00:00:00 2001 From: haiqi96 <14502009+haiqi96@users.noreply.github.com> Date: Mon, 9 Sep 2024 10:46:22 -0400 Subject: [PATCH 45/62] Apply suggestions from code review Co-authored-by: kirkrodrigues <2454684+kirkrodrigues@users.noreply.github.com> --- components/core/src/clp/clp/FileCompressor.cpp | 2 +- components/core/tests/test-BufferedFileReader.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/components/core/src/clp/clp/FileCompressor.cpp b/components/core/src/clp/clp/FileCompressor.cpp index ade077a70..c0842c13f 100644 --- a/components/core/src/clp/clp/FileCompressor.cpp +++ b/components/core/src/clp/clp/FileCompressor.cpp @@ -126,7 +126,7 @@ bool FileCompressor::compress_file( PROFILER_SPDLOG_INFO("Start parsing {}", file_name) Profiler::start_continuous_measurement(); - BufferedFileReader buffered_file_reader(make_unique(file_to_compress.get_path())); + BufferedFileReader buffered_file_reader{make_unique(file_to_compress.get_path())}; // Check that file is UTF-8 encoded if (auto error_code = buffered_file_reader.try_refill_buffer_if_empty(); diff --git a/components/core/tests/test-BufferedFileReader.cpp b/components/core/tests/test-BufferedFileReader.cpp index 83375fcba..e946ad198 100644 --- a/components/core/tests/test-BufferedFileReader.cpp +++ b/components/core/tests/test-BufferedFileReader.cpp @@ -40,7 +40,7 @@ TEST_CASE("Test reading data", "[BufferedFileReader]") { auto read_buf_uniq_ptr = make_unique>(); auto& read_buf = *read_buf_uniq_ptr; size_t const base_buffer_size = BufferedFileReader::cMinBufferSize << 4; - BufferedFileReader reader(make_unique(test_file_path), base_buffer_size); + BufferedFileReader reader{make_unique(test_file_path), base_buffer_size}; size_t num_bytes_read{0}; size_t buf_pos{0}; From 1260043c19c2d172874bd6021607b3d96c513f56 Mon Sep 17 00:00:00 2001 From: haiqi96 <14502009+haiqi96@users.noreply.github.com> Date: Mon, 9 Sep 2024 11:03:39 -0400 Subject: [PATCH 46/62] small fixes --- components/core/src/clp/BufferedFileReader.hpp | 6 +++++- components/core/tests/test-BufferedFileReader.cpp | 2 +- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/components/core/src/clp/BufferedFileReader.hpp b/components/core/src/clp/BufferedFileReader.hpp index 1403bb057..a87fc217e 100644 --- a/components/core/src/clp/BufferedFileReader.hpp +++ b/components/core/src/clp/BufferedFileReader.hpp @@ -30,6 +30,9 @@ namespace clp { * * NOTE 2: This class restricts the buffer size to a multiple of the page size and we avoid reading * anything less than a page to avoid multiple page faults. + * + * NOTE 3: Although the FILE stream interface provided by glibc also performs buffered reads, it + * does not allow us to control the buffering. */ class BufferedFileReader : public ReaderInterface { public: @@ -68,6 +71,7 @@ class BufferedFileReader : public ReaderInterface { // Constructors /** + * @param reader_interface * @param base_buffer_size The size for the fixed-size buffer used when no checkpoint is set. It * must be a multiple of BufferedFileReader::cMinBufferSize. */ @@ -76,7 +80,7 @@ class BufferedFileReader : public ReaderInterface { size_t base_buffer_size ); - BufferedFileReader(std::unique_ptr reader_interface) + explicit BufferedFileReader(std::unique_ptr reader_interface) : BufferedFileReader(std::move(reader_interface), cDefaultBufferSize) {} // Destructor diff --git a/components/core/tests/test-BufferedFileReader.cpp b/components/core/tests/test-BufferedFileReader.cpp index e946ad198..8b8c63e09 100644 --- a/components/core/tests/test-BufferedFileReader.cpp +++ b/components/core/tests/test-BufferedFileReader.cpp @@ -277,7 +277,7 @@ TEST_CASE("Test delimiter", "[BufferedFileReader]") { file_writer.write(test_data.data(), test_data_size); file_writer.close(); - BufferedFileReader reader(make_unique(test_file_path)); + BufferedFileReader reader{make_unique(test_file_path)}; string test_string; FileReader ref_file_reader{test_file_path}; From aaa8c64956856220b38facf644684ba81aa02edc Mon Sep 17 00:00:00 2001 From: haiqi96 <14502009+haiqi96@users.noreply.github.com> Date: Mon, 9 Sep 2024 11:18:28 -0400 Subject: [PATCH 47/62] allow BufferReader to support null initialization --- components/core/src/clp/BufferReader.cpp | 2 +- components/core/src/clp/BufferReader.hpp | 2 ++ .../core/src/clp/BufferedFileReader.cpp | 30 +++++++++---------- .../core/src/clp/BufferedFileReader.hpp | 5 ++-- 4 files changed, 21 insertions(+), 18 deletions(-) diff --git a/components/core/src/clp/BufferReader.cpp b/components/core/src/clp/BufferReader.cpp index b116b8080..c50b4e028 100644 --- a/components/core/src/clp/BufferReader.cpp +++ b/components/core/src/clp/BufferReader.cpp @@ -5,7 +5,7 @@ namespace clp { BufferReader::BufferReader(char const* data, size_t data_size, size_t pos) { - if (nullptr == data) { + if (nullptr == data && data_size != 0) { throw OperationFailed(ErrorCode_BadParam, __FILENAME__, __LINE__); } m_internal_buf = data; diff --git a/components/core/src/clp/BufferReader.hpp b/components/core/src/clp/BufferReader.hpp index 349697d34..4a64502ce 100644 --- a/components/core/src/clp/BufferReader.hpp +++ b/components/core/src/clp/BufferReader.hpp @@ -23,6 +23,8 @@ class BufferReader : public ReaderInterface { }; // Constructors + BufferReader() : BufferReader(nullptr, 0, 0) {} + BufferReader(char const* data, size_t data_size) : BufferReader(data, data_size, 0) {} BufferReader(char const* data, size_t data_size, size_t pos); diff --git a/components/core/src/clp/BufferedFileReader.cpp b/components/core/src/clp/BufferedFileReader.cpp index 6df61c457..d5b740992 100644 --- a/components/core/src/clp/BufferedFileReader.cpp +++ b/components/core/src/clp/BufferedFileReader.cpp @@ -65,12 +65,12 @@ BufferedFileReader::BufferedFileReader( m_file_pos = m_file_reader->get_pos(); m_buffer_begin_pos = 0; - m_buffer_reader = make_unique(m_buffer.data(), 0); + m_buffer_reader = BufferReader{m_buffer.data(), 0}; m_highest_read_pos = 0; } auto BufferedFileReader::try_refill_buffer_if_empty() -> ErrorCode { - if (m_buffer_reader->get_buffer_size() > 0) { + if (m_buffer_reader.get_buffer_size() > 0) { return ErrorCode_Success; } return refill_reader_buffer(m_base_buffer_size); @@ -85,7 +85,7 @@ void BufferedFileReader::refill_buffer_if_empty() { auto BufferedFileReader::try_peek_buffered_data(char const*& buf, size_t& peek_size) const -> ErrorCode { - m_buffer_reader->peek_buffer(buf, peek_size); + m_buffer_reader.peek_buffer(buf, peek_size); return ErrorCode_Success; } @@ -98,7 +98,7 @@ void BufferedFileReader::peek_buffered_data(char const*& buf, size_t& peek_size) auto BufferedFileReader::set_checkpoint() -> size_t { if (m_checkpoint_pos.has_value() && m_checkpoint_pos < m_file_pos - && m_buffer_reader->get_buffer_size() != m_base_buffer_size) + && m_buffer_reader.get_buffer_size() != m_base_buffer_size) { drop_content_before_current_pos(); } @@ -135,7 +135,7 @@ auto BufferedFileReader::try_seek_from_begin(size_t pos) -> ErrorCode { return ErrorCode_Unsupported; } - auto error_code = m_buffer_reader->try_seek_from_begin(get_buffer_relative_pos(pos)); + auto error_code = m_buffer_reader.try_seek_from_begin(get_buffer_relative_pos(pos)); if (ErrorCode_Truncated == error_code) { if (false == m_checkpoint_pos.has_value()) { // If checkpoint is not set, simply move the file_pos and invalidate @@ -145,7 +145,7 @@ auto BufferedFileReader::try_seek_from_begin(size_t pos) -> ErrorCode { { return error_code; } - m_buffer_reader = make_unique(m_buffer.data(), 0); + m_buffer_reader = BufferReader{m_buffer.data(), 0}; m_buffer_begin_pos = pos; } else { auto const num_bytes_to_refill = pos - get_buffer_end_pos(); @@ -156,7 +156,7 @@ auto BufferedFileReader::try_seek_from_begin(size_t pos) -> ErrorCode { if (ErrorCode_Success != error_code) { return error_code; } - error_code = m_buffer_reader->try_seek_from_begin(get_buffer_relative_pos(pos)); + error_code = m_buffer_reader.try_seek_from_begin(get_buffer_relative_pos(pos)); if (ErrorCode_Success != error_code) { return error_code; } @@ -181,7 +181,7 @@ auto BufferedFileReader::try_read(char* buf, size_t num_bytes_to_read, size_t& n num_bytes_read = 0; while (false == dst_view.empty()) { size_t bytes_read{0}; - auto error_code = m_buffer_reader->try_read(dst_view.data(), dst_view.size(), bytes_read); + auto error_code = m_buffer_reader.try_read(dst_view.data(), dst_view.size(), bytes_read); if (ErrorCode_Success == error_code) { dst_view = dst_view.subspan(bytes_read); num_bytes_read += bytes_read; @@ -217,7 +217,7 @@ auto BufferedFileReader::try_read_to_delimiter( size_t total_num_bytes_read{0}; while (true) { size_t num_bytes_read{0}; - if (auto ret_code = m_buffer_reader->try_read_to_delimiter( + if (auto ret_code = m_buffer_reader.try_read_to_delimiter( delim, keep_delimiter, str, @@ -250,7 +250,7 @@ auto BufferedFileReader::try_read_to_delimiter( auto BufferedFileReader::refill_reader_buffer(size_t num_bytes_to_refill) -> ErrorCode { auto const buffer_end_pos = get_buffer_end_pos(); - auto const data_size = m_buffer_reader->get_buffer_size(); + auto const data_size = m_buffer_reader.get_buffer_size(); auto const available_buffer_space = m_buffer.size() - data_size; size_t num_bytes_to_read{0}; @@ -288,25 +288,25 @@ auto BufferedFileReader::refill_reader_buffer(size_t num_bytes_to_refill) -> Err return error_code; } // NOTE: We still want to set the buffer reader if no bytes were read on EOF - m_buffer_reader = make_unique( + m_buffer_reader = BufferReader{ m_buffer.data(), next_buffer_pos + num_bytes_read, next_buffer_pos - ); + }; m_buffer_begin_pos = next_buffer_begin_pos; return error_code; } auto BufferedFileReader::drop_content_before_current_pos() -> void { - auto buffer_reader_pos = m_buffer_reader->get_pos(); - auto const new_data_size = m_buffer_reader->get_buffer_size() - buffer_reader_pos; + auto buffer_reader_pos = m_buffer_reader.get_pos(); + auto const new_data_size = m_buffer_reader.get_buffer_size() - buffer_reader_pos; auto const new_buffer_size = int_round_up_to_multiple(new_data_size, m_base_buffer_size); m_buffer.erase(m_buffer.begin(), m_buffer.begin() + static_cast(buffer_reader_pos)); m_buffer.resize(new_buffer_size); m_buffer_begin_pos += buffer_reader_pos; - m_buffer_reader = make_unique(m_buffer.data(), new_data_size); + m_buffer_reader = BufferReader{m_buffer.data(), new_data_size}; } auto BufferedFileReader::update_file_pos(size_t pos) -> void { diff --git a/components/core/src/clp/BufferedFileReader.hpp b/components/core/src/clp/BufferedFileReader.hpp index a87fc217e..05583205a 100644 --- a/components/core/src/clp/BufferedFileReader.hpp +++ b/components/core/src/clp/BufferedFileReader.hpp @@ -8,6 +8,7 @@ #include #include +#include "Array.hpp" #include "BufferReader.hpp" #include "ErrorCode.hpp" #include "ReaderInterface.hpp" @@ -222,7 +223,7 @@ class BufferedFileReader : public ReaderInterface { } [[nodiscard]] auto get_buffer_end_pos() const -> size_t { - return m_buffer_begin_pos + m_buffer_reader->get_buffer_size(); + return m_buffer_begin_pos + m_buffer_reader.get_buffer_size(); } auto update_file_pos(size_t pos) -> void; @@ -237,7 +238,7 @@ class BufferedFileReader : public ReaderInterface { // Buffer specific data std::vector m_buffer; size_t m_base_buffer_size; - std::unique_ptr m_buffer_reader; + BufferReader m_buffer_reader; size_t m_buffer_begin_pos{0}; // Variables for checkpoint support From c147b411b6dc0777da9edc9bfd9b1df91157c7aa Mon Sep 17 00:00:00 2001 From: haiqi96 <14502009+haiqi96@users.noreply.github.com> Date: Mon, 9 Sep 2024 11:33:20 -0400 Subject: [PATCH 48/62] A few more polishing --- components/core/src/clp/BufferReader.cpp | 12 ++++++------ components/core/src/clp/BufferedFileReader.cpp | 7 ++----- components/core/src/clp/BufferedFileReader.hpp | 2 +- 3 files changed, 9 insertions(+), 12 deletions(-) diff --git a/components/core/src/clp/BufferReader.cpp b/components/core/src/clp/BufferReader.cpp index c50b4e028..0dfe58c9d 100644 --- a/components/core/src/clp/BufferReader.cpp +++ b/components/core/src/clp/BufferReader.cpp @@ -4,18 +4,18 @@ #include namespace clp { -BufferReader::BufferReader(char const* data, size_t data_size, size_t pos) { - if (nullptr == data && data_size != 0) { +BufferReader::BufferReader(char const* data, size_t data_size, size_t pos) + : m_internal_buf{data}, + m_internal_buf_size{data_size}, + m_internal_buf_pos{pos} { + if (nullptr == data && (data_size != 0 || pos != 0)) { throw OperationFailed(ErrorCode_BadParam, __FILENAME__, __LINE__); } - m_internal_buf = data; - m_internal_buf_size = data_size; - m_internal_buf_pos = pos; } auto BufferReader::peek_buffer(char const*& buf, size_t& peek_size) const -> void { peek_size = get_remaining_data_size(); - buf = m_internal_buf + m_internal_buf_pos; + buf = 0 == peek_size ? nullptr : m_internal_buf + m_internal_buf_pos; } auto BufferReader::try_read_to_delimiter( diff --git a/components/core/src/clp/BufferedFileReader.cpp b/components/core/src/clp/BufferedFileReader.cpp index d5b740992..a9662cad1 100644 --- a/components/core/src/clp/BufferedFileReader.cpp +++ b/components/core/src/clp/BufferedFileReader.cpp @@ -288,11 +288,8 @@ auto BufferedFileReader::refill_reader_buffer(size_t num_bytes_to_refill) -> Err return error_code; } // NOTE: We still want to set the buffer reader if no bytes were read on EOF - m_buffer_reader = BufferReader{ - m_buffer.data(), - next_buffer_pos + num_bytes_read, - next_buffer_pos - }; + m_buffer_reader + = BufferReader{m_buffer.data(), next_buffer_pos + num_bytes_read, next_buffer_pos}; m_buffer_begin_pos = next_buffer_begin_pos; return error_code; } diff --git a/components/core/src/clp/BufferedFileReader.hpp b/components/core/src/clp/BufferedFileReader.hpp index 05583205a..f719365c5 100644 --- a/components/core/src/clp/BufferedFileReader.hpp +++ b/components/core/src/clp/BufferedFileReader.hpp @@ -81,7 +81,7 @@ class BufferedFileReader : public ReaderInterface { size_t base_buffer_size ); - explicit BufferedFileReader(std::unique_ptr reader_interface) + explicit BufferedFileReader(std::unique_ptr reader_interface) : BufferedFileReader(std::move(reader_interface), cDefaultBufferSize) {} // Destructor From 02b0d10bdc996387cbf26d7bd65d118f4187b06f Mon Sep 17 00:00:00 2001 From: haiqi96 <14502009+haiqi96@users.noreply.github.com> Date: Mon, 9 Sep 2024 11:37:56 -0400 Subject: [PATCH 49/62] A few more polishing again --- components/core/src/clp/BufferedFileReader.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/components/core/src/clp/BufferedFileReader.hpp b/components/core/src/clp/BufferedFileReader.hpp index f719365c5..065052edc 100644 --- a/components/core/src/clp/BufferedFileReader.hpp +++ b/components/core/src/clp/BufferedFileReader.hpp @@ -82,7 +82,7 @@ class BufferedFileReader : public ReaderInterface { ); explicit BufferedFileReader(std::unique_ptr reader_interface) - : BufferedFileReader(std::move(reader_interface), cDefaultBufferSize) {} + : BufferedFileReader{std::move(reader_interface), cDefaultBufferSize} {} // Destructor ~BufferedFileReader() override = default; From 810a77b5d03e00de736037da70577fc16d80203b Mon Sep 17 00:00:00 2001 From: haiqi96 <14502009+haiqi96@users.noreply.github.com> Date: Wed, 11 Sep 2024 10:28:36 -0400 Subject: [PATCH 50/62] Apply suggestions from code review Co-authored-by: kirkrodrigues <2454684+kirkrodrigues@users.noreply.github.com> --- components/core/src/clp/BufferedFileReader.cpp | 1 - components/core/src/clp/BufferedFileReader.hpp | 1 - 2 files changed, 2 deletions(-) diff --git a/components/core/src/clp/BufferedFileReader.cpp b/components/core/src/clp/BufferedFileReader.cpp index a9662cad1..8e6a846ca 100644 --- a/components/core/src/clp/BufferedFileReader.cpp +++ b/components/core/src/clp/BufferedFileReader.cpp @@ -13,7 +13,6 @@ #include "math_utils.hpp" #include "ReaderInterface.hpp" -using std::make_unique; using std::span; using std::string; using std::unique_ptr; diff --git a/components/core/src/clp/BufferedFileReader.hpp b/components/core/src/clp/BufferedFileReader.hpp index f719365c5..dc0295b60 100644 --- a/components/core/src/clp/BufferedFileReader.hpp +++ b/components/core/src/clp/BufferedFileReader.hpp @@ -8,7 +8,6 @@ #include #include -#include "Array.hpp" #include "BufferReader.hpp" #include "ErrorCode.hpp" #include "ReaderInterface.hpp" From 654eb35edcf3c3d38f26224b18fabd4037c34902 Mon Sep 17 00:00:00 2001 From: haiqi96 <14502009+haiqi96@users.noreply.github.com> Date: Mon, 16 Sep 2024 11:17:10 -0400 Subject: [PATCH 51/62] update unit-test --- .../core/tests/test-BufferedFileReader.cpp | 62 ++++++------------- 1 file changed, 20 insertions(+), 42 deletions(-) diff --git a/components/core/tests/test-BufferedFileReader.cpp b/components/core/tests/test-BufferedFileReader.cpp index 8b8c63e09..8796c7aa6 100644 --- a/components/core/tests/test-BufferedFileReader.cpp +++ b/components/core/tests/test-BufferedFileReader.cpp @@ -277,59 +277,37 @@ TEST_CASE("Test delimiter", "[BufferedFileReader]") { file_writer.write(test_data.data(), test_data_size); file_writer.close(); - BufferedFileReader reader{make_unique(test_file_path)}; + size_t const reader_begin_offset = GENERATE( + 0, + 127 + ); + + // Instantiate BufferedFileReader and the reference FileReader from a non-zero pos + auto fd_reader = make_unique(test_file_path); + fd_reader->seek_from_begin(reader_begin_offset); + BufferedFileReader buffered_file_reader{std::move(fd_reader)}; string test_string; FileReader ref_file_reader{test_file_path}; + ref_file_reader.seek_from_begin(reader_begin_offset); string ref_string; + // Validate a clearing a checkpoint without any reading wouldn't change the beginning offset + buffered_file_reader.clear_checkpoint(); + REQUIRE(reader_begin_offset == buffered_file_reader.get_pos()); + // Validate that a FileReader and a BufferedFileReader return the same strings (split by // delimiters) ErrorCode error_code{ErrorCode_Success}; auto delimiter = (char)('a' + (std::rand() % (cNumAlphabets))); while (ErrorCode_EndOfFile != error_code) { error_code = ref_file_reader.try_read_to_delimiter(delimiter, true, false, ref_string); - auto error_code2 = reader.try_read_to_delimiter(delimiter, true, false, test_string); - REQUIRE(error_code2 == error_code); - REQUIRE(test_string == ref_string); - } - - boost::filesystem::remove(test_file_path); -} - -TEST_CASE("Test reading with beginning offset", "[BufferedFileReader]") { - // Initialize data for testing - size_t const test_data_size = 1L * 1024 * 1024 + 1; // 1MB - size_t const cReaderBeginOffset = 6; - auto test_data_uniq_ptr = make_unique>(); - auto& test_data = *test_data_uniq_ptr; - for (size_t i = 0; i < test_data.size(); ++i) { - test_data[i] = static_cast('a' + (std::rand() % (cNumAlphabets))); - } - - // Write to test file - string const test_file_path{"BufferedFileReader.offset.test"}; - FileWriter file_writer; - file_writer.open(test_file_path, FileWriter::OpenMode::CREATE_FOR_WRITING); - file_writer.write(test_data.data(), test_data_size); - file_writer.close(); - - // Instantiate BufferedFileReader and the reference FileReader from a non-zero pos - auto fd_reader = make_unique(test_file_path); - fd_reader->seek_from_begin(cReaderBeginOffset); - BufferedFileReader reader{std::move(fd_reader)}; - - FileReader ref_file_reader{test_file_path}; - ref_file_reader.seek_from_begin(cReaderBeginOffset); - - ErrorCode error_code{ErrorCode_Success}; - auto delimiter = (char)('a' + (std::rand() % (cNumAlphabets))); - string ref_string; - string test_string; - // Validate that a FileReader and a BufferedFileReader return the same strings. - while (ErrorCode_EndOfFile != error_code) { - error_code = ref_file_reader.try_read_to_delimiter(delimiter, true, false, ref_string); - auto error_code2 = reader.try_read_to_delimiter(delimiter, true, false, test_string); + auto const error_code2 = buffered_file_reader.try_read_to_delimiter( + delimiter, + true, + false, + test_string + ); REQUIRE(error_code2 == error_code); REQUIRE(test_string == ref_string); } From c2b63333f0111e005dbfc61e7bbc990fb4158e84 Mon Sep 17 00:00:00 2001 From: haiqi96 <14502009+haiqi96@users.noreply.github.com> Date: Mon, 16 Sep 2024 13:33:37 -0400 Subject: [PATCH 52/62] linter --- components/core/tests/test-BufferedFileReader.cpp | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/components/core/tests/test-BufferedFileReader.cpp b/components/core/tests/test-BufferedFileReader.cpp index 8796c7aa6..418360b1a 100644 --- a/components/core/tests/test-BufferedFileReader.cpp +++ b/components/core/tests/test-BufferedFileReader.cpp @@ -277,10 +277,7 @@ TEST_CASE("Test delimiter", "[BufferedFileReader]") { file_writer.write(test_data.data(), test_data_size); file_writer.close(); - size_t const reader_begin_offset = GENERATE( - 0, - 127 - ); + size_t const reader_begin_offset = GENERATE(0, 127); // Instantiate BufferedFileReader and the reference FileReader from a non-zero pos auto fd_reader = make_unique(test_file_path); @@ -302,12 +299,8 @@ TEST_CASE("Test delimiter", "[BufferedFileReader]") { auto delimiter = (char)('a' + (std::rand() % (cNumAlphabets))); while (ErrorCode_EndOfFile != error_code) { error_code = ref_file_reader.try_read_to_delimiter(delimiter, true, false, ref_string); - auto const error_code2 = buffered_file_reader.try_read_to_delimiter( - delimiter, - true, - false, - test_string - ); + auto const error_code2 + = buffered_file_reader.try_read_to_delimiter(delimiter, true, false, test_string); REQUIRE(error_code2 == error_code); REQUIRE(test_string == ref_string); } From e4bbd3001ff5e4bb9a8b1b2ed57d2ceb8effa819 Mon Sep 17 00:00:00 2001 From: Haiqi Xu <14502009+haiqi96@users.noreply.github.com> Date: Mon, 7 Oct 2024 20:23:21 -0400 Subject: [PATCH 53/62] Code review suggestions --- .../core/src/clp/BufferedFileReader.cpp | 18 +---------------- .../core/src/clp/BufferedFileReader.hpp | 20 ++----------------- 2 files changed, 3 insertions(+), 35 deletions(-) diff --git a/components/core/src/clp/BufferedFileReader.cpp b/components/core/src/clp/BufferedFileReader.cpp index 8e6a846ca..1dad268e4 100644 --- a/components/core/src/clp/BufferedFileReader.cpp +++ b/components/core/src/clp/BufferedFileReader.cpp @@ -75,24 +75,8 @@ auto BufferedFileReader::try_refill_buffer_if_empty() -> ErrorCode { return refill_reader_buffer(m_base_buffer_size); } -void BufferedFileReader::refill_buffer_if_empty() { - auto error_code = try_refill_buffer_if_empty(); - if (ErrorCode_Success != error_code) { - throw OperationFailed(error_code, __FILENAME__, __LINE__); - } -} - -auto BufferedFileReader::try_peek_buffered_data(char const*& buf, size_t& peek_size) const - -> ErrorCode { - m_buffer_reader.peek_buffer(buf, peek_size); - return ErrorCode_Success; -} - void BufferedFileReader::peek_buffered_data(char const*& buf, size_t& peek_size) const { - auto error_code = try_peek_buffered_data(buf, peek_size); - if (ErrorCode_Success != error_code) { - throw OperationFailed(error_code, __FILENAME__, __LINE__); - } + m_buffer_reader.peek_buffer(buf, peek_size); } auto BufferedFileReader::set_checkpoint() -> size_t { diff --git a/components/core/src/clp/BufferedFileReader.hpp b/components/core/src/clp/BufferedFileReader.hpp index 2cdb1510d..dc041d9c1 100644 --- a/components/core/src/clp/BufferedFileReader.hpp +++ b/components/core/src/clp/BufferedFileReader.hpp @@ -95,28 +95,11 @@ class BufferedFileReader : public ReaderInterface { // Methods /** * Tries to fill the internal buffer if it's empty - * @return ErrorCode_errno on error reading from the underlying file - * @return ErrorCode_EndOfFile on EOF + * @return Same as refill_reader_buffer if it fails * @return ErrorCode_Success on success */ [[nodiscard]] auto try_refill_buffer_if_empty() -> ErrorCode; - /** - * Fills the internal buffer if it's empty - */ - void refill_buffer_if_empty(); - - /** - * Tries to peek the remaining buffered content without advancing the read head. - * - * NOTE: Any subsequent read or seek operations may invalidate the returned buffer. - * @param buf Returns a pointer to the remaining content in the buffer - * @param peek_size Returns the size of the remaining content in the buffer - * @return ErrorCode_Success on success - */ - [[nodiscard]] auto - try_peek_buffered_data(char const*& buf, size_t& peek_size) const -> ErrorCode; - /** * Peeks the remaining buffered content without advancing the read head. * @@ -173,6 +156,7 @@ class BufferedFileReader : public ReaderInterface { * @return ErrorCode_BadParam if buf is null * @return ErrorCode_errno on error reading from the underlying file * @return ErrorCode_EndOfFile on EOF + * @return Same as BufferReader::try_read if it fails * @return ErrorCode_Success on success */ [[nodiscard]] auto From 7fc448af1fcf5b7ef20e1eb6c27b6ecedfdcea00 Mon Sep 17 00:00:00 2001 From: haiqi96 <14502009+haiqi96@users.noreply.github.com> Date: Mon, 7 Oct 2024 22:45:35 -0400 Subject: [PATCH 54/62] Apply suggestions from code review Co-authored-by: coderabbitai[bot] <136622811+coderabbitai[bot]@users.noreply.github.com> --- components/core/src/clp/BufferReader.cpp | 5 +++++ components/core/src/clp/BufferedFileReader.cpp | 2 +- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/components/core/src/clp/BufferReader.cpp b/components/core/src/clp/BufferReader.cpp index 0dfe58c9d..d2efa4e59 100644 --- a/components/core/src/clp/BufferReader.cpp +++ b/components/core/src/clp/BufferReader.cpp @@ -11,6 +11,11 @@ BufferReader::BufferReader(char const* data, size_t data_size, size_t pos) if (nullptr == data && (data_size != 0 || pos != 0)) { throw OperationFailed(ErrorCode_BadParam, __FILENAME__, __LINE__); } + else if (pos > data_size) { + throw OperationFailed(ErrorCode_BadParam, __FILENAME__, __LINE__); + } + throw OperationFailed(ErrorCode_BadParam, __FILENAME__, __LINE__); + } } auto BufferReader::peek_buffer(char const*& buf, size_t& peek_size) const -> void { diff --git a/components/core/src/clp/BufferedFileReader.cpp b/components/core/src/clp/BufferedFileReader.cpp index 1dad268e4..9e2d83738 100644 --- a/components/core/src/clp/BufferedFileReader.cpp +++ b/components/core/src/clp/BufferedFileReader.cpp @@ -65,7 +65,7 @@ BufferedFileReader::BufferedFileReader( m_file_pos = m_file_reader->get_pos(); m_buffer_begin_pos = 0; m_buffer_reader = BufferReader{m_buffer.data(), 0}; - m_highest_read_pos = 0; + m_highest_read_pos = m_file_pos; } auto BufferedFileReader::try_refill_buffer_if_empty() -> ErrorCode { From 4e5d3ffd980438dba6ddc89e34d3efebf54c7696 Mon Sep 17 00:00:00 2001 From: Haiqi Xu <14502009+haiqi96@users.noreply.github.com> Date: Mon, 7 Oct 2024 22:53:26 -0400 Subject: [PATCH 55/62] update code based on comments --- components/core/src/clp/BufferedFileReader.cpp | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/components/core/src/clp/BufferedFileReader.cpp b/components/core/src/clp/BufferedFileReader.cpp index 9e2d83738..329c45394 100644 --- a/components/core/src/clp/BufferedFileReader.cpp +++ b/components/core/src/clp/BufferedFileReader.cpp @@ -56,6 +56,13 @@ BufferedFileReader::BufferedFileReader( size_t base_buffer_size ) : m_file_reader(std::move(reader_interface)) { + if (nullptr == m_file_reader) { + throw OperationFailed( + ErrorCode_BadParam, + __FILENAME__, + __LINE__, + "reader_interface cannot be null"); + } if (base_buffer_size % cMinBufferSize != 0) { throw OperationFailed(ErrorCode_BadParam, __FILENAME__, __LINE__); } @@ -278,11 +285,11 @@ auto BufferedFileReader::refill_reader_buffer(size_t num_bytes_to_refill) -> Err } auto BufferedFileReader::drop_content_before_current_pos() -> void { - auto buffer_reader_pos = m_buffer_reader.get_pos(); + auto const buffer_reader_pos = m_buffer_reader.get_pos(); auto const new_data_size = m_buffer_reader.get_buffer_size() - buffer_reader_pos; auto const new_buffer_size = int_round_up_to_multiple(new_data_size, m_base_buffer_size); - m_buffer.erase(m_buffer.begin(), m_buffer.begin() + static_cast(buffer_reader_pos)); + m_buffer.erase(m_buffer.begin(), m_buffer.begin() + buffer_reader_pos); m_buffer.resize(new_buffer_size); m_buffer_begin_pos += buffer_reader_pos; From 676dc3c4cb4a9982735eb6aa9f60a1d7b3d89923 Mon Sep 17 00:00:00 2001 From: Haiqi Xu <14502009+haiqi96@users.noreply.github.com> Date: Mon, 7 Oct 2024 22:55:11 -0400 Subject: [PATCH 56/62] fix --- components/core/src/clp/BufferReader.cpp | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/components/core/src/clp/BufferReader.cpp b/components/core/src/clp/BufferReader.cpp index d2efa4e59..cc27c0601 100644 --- a/components/core/src/clp/BufferReader.cpp +++ b/components/core/src/clp/BufferReader.cpp @@ -11,9 +11,7 @@ BufferReader::BufferReader(char const* data, size_t data_size, size_t pos) if (nullptr == data && (data_size != 0 || pos != 0)) { throw OperationFailed(ErrorCode_BadParam, __FILENAME__, __LINE__); } - else if (pos > data_size) { - throw OperationFailed(ErrorCode_BadParam, __FILENAME__, __LINE__); - } + if (pos > data_size) { throw OperationFailed(ErrorCode_BadParam, __FILENAME__, __LINE__); } } From dfb1211b302f02bd7a425c9188a2c196c5f7f179 Mon Sep 17 00:00:00 2001 From: Haiqi Xu <14502009+haiqi96@users.noreply.github.com> Date: Mon, 7 Oct 2024 23:00:24 -0400 Subject: [PATCH 57/62] Update unit test --- components/core/tests/test-BufferedFileReader.cpp | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/components/core/tests/test-BufferedFileReader.cpp b/components/core/tests/test-BufferedFileReader.cpp index 418360b1a..3d1b1249a 100644 --- a/components/core/tests/test-BufferedFileReader.cpp +++ b/components/core/tests/test-BufferedFileReader.cpp @@ -262,12 +262,19 @@ TEST_CASE("Test reading data", "[BufferedFileReader]") { } TEST_CASE("Test delimiter", "[BufferedFileReader]") { + // Initialize random number generator + std::random_device rd; + std::mt19937 gen(rd()); + std::uniform_int_distribution<> uniformly_distributed_alphabet('a', 'a' + cNumAlphabets - 1); + // Initialize data for testing size_t const test_data_size = 1L * 1024 * 1024 + 1; // 1MB auto test_data_uniq_ptr = make_unique>(); auto& test_data = *test_data_uniq_ptr; + + for (size_t i = 0; i < test_data.size(); ++i) { - test_data[i] = static_cast('a' + (std::rand() % (cNumAlphabets))); + test_data[i] = static_cast(uniformly_distributed_alphabet(gen)); } // Write to test file @@ -296,7 +303,7 @@ TEST_CASE("Test delimiter", "[BufferedFileReader]") { // Validate that a FileReader and a BufferedFileReader return the same strings (split by // delimiters) ErrorCode error_code{ErrorCode_Success}; - auto delimiter = (char)('a' + (std::rand() % (cNumAlphabets))); + auto delimiter = static_cast(uniformly_distributed_alphabet(gen)); while (ErrorCode_EndOfFile != error_code) { error_code = ref_file_reader.try_read_to_delimiter(delimiter, true, false, ref_string); auto const error_code2 From 38076be319e85cde8d3c5fd1bec9d15649df3157 Mon Sep 17 00:00:00 2001 From: Haiqi Xu <14502009+haiqi96@users.noreply.github.com> Date: Mon, 7 Oct 2024 23:00:50 -0400 Subject: [PATCH 58/62] linter --- components/core/src/clp/BufferedFileReader.cpp | 9 +++++---- components/core/tests/test-BufferedFileReader.cpp | 1 - 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/components/core/src/clp/BufferedFileReader.cpp b/components/core/src/clp/BufferedFileReader.cpp index 329c45394..53d06b288 100644 --- a/components/core/src/clp/BufferedFileReader.cpp +++ b/components/core/src/clp/BufferedFileReader.cpp @@ -58,10 +58,11 @@ BufferedFileReader::BufferedFileReader( : m_file_reader(std::move(reader_interface)) { if (nullptr == m_file_reader) { throw OperationFailed( - ErrorCode_BadParam, - __FILENAME__, - __LINE__, - "reader_interface cannot be null"); + ErrorCode_BadParam, + __FILENAME__, + __LINE__, + "reader_interface cannot be null" + ); } if (base_buffer_size % cMinBufferSize != 0) { throw OperationFailed(ErrorCode_BadParam, __FILENAME__, __LINE__); diff --git a/components/core/tests/test-BufferedFileReader.cpp b/components/core/tests/test-BufferedFileReader.cpp index 3d1b1249a..2e77790bc 100644 --- a/components/core/tests/test-BufferedFileReader.cpp +++ b/components/core/tests/test-BufferedFileReader.cpp @@ -272,7 +272,6 @@ TEST_CASE("Test delimiter", "[BufferedFileReader]") { auto test_data_uniq_ptr = make_unique>(); auto& test_data = *test_data_uniq_ptr; - for (size_t i = 0; i < test_data.size(); ++i) { test_data[i] = static_cast(uniformly_distributed_alphabet(gen)); } From a4eb75f23f53ceefe6ac4bd657ca1967d83c351f Mon Sep 17 00:00:00 2001 From: Haiqi Xu <14502009+haiqi96@users.noreply.github.com> Date: Mon, 7 Oct 2024 23:30:49 -0400 Subject: [PATCH 59/62] Remove unnecessary function --- .../core/src/clp/BufferedFileReader.cpp | 36 +------------------ 1 file changed, 1 insertion(+), 35 deletions(-) diff --git a/components/core/src/clp/BufferedFileReader.cpp b/components/core/src/clp/BufferedFileReader.cpp index 53d06b288..ec07403bc 100644 --- a/components/core/src/clp/BufferedFileReader.cpp +++ b/components/core/src/clp/BufferedFileReader.cpp @@ -18,39 +18,6 @@ using std::string; using std::unique_ptr; namespace clp { -namespace { -/** - * Reads from the given file descriptor - * @param fd - * @param buf - * @param num_bytes_to_read - * @param num_bytes_read - * @return ErrorCode_errno on error - * @return ErrorCode_EndOfFile on EOF - * @return ErrorCode_Success on success - */ -auto read_into_buffer( - ReaderInterface* reader, - char* buf, - size_t num_bytes_to_read, - size_t& num_bytes_read -) -> ErrorCode; - -auto read_into_buffer( - ReaderInterface* reader, - char* buf, - size_t num_bytes_to_read, - size_t& num_bytes_read -) -> ErrorCode { - num_bytes_read = 0; - auto const error_code = reader->try_read(buf, num_bytes_to_read, num_bytes_read); - if (0 == num_bytes_read) { - return ErrorCode_EndOfFile; - } - return error_code; -} -} // namespace - BufferedFileReader::BufferedFileReader( std::unique_ptr reader_interface, size_t base_buffer_size @@ -269,8 +236,7 @@ auto BufferedFileReader::refill_reader_buffer(size_t num_bytes_to_refill) -> Err } size_t num_bytes_read{0}; - auto error_code = read_into_buffer( - m_file_reader.get(), + auto const error_code = m_file_reader->try_read( &m_buffer[next_buffer_pos], num_bytes_to_read, num_bytes_read From 17e8cdbffbb3e8a4be0aaac8882b46f33adaef89 Mon Sep 17 00:00:00 2001 From: Haiqi Xu <14502009+haiqi96@users.noreply.github.com> Date: Mon, 7 Oct 2024 23:33:31 -0400 Subject: [PATCH 60/62] Missing docstrings --- components/core/src/clp/clp/FileCompressor.hpp | 1 + 1 file changed, 1 insertion(+) diff --git a/components/core/src/clp/clp/FileCompressor.hpp b/components/core/src/clp/clp/FileCompressor.hpp index ac5a075d3..b1b286492 100644 --- a/components/core/src/clp/clp/FileCompressor.hpp +++ b/components/core/src/clp/clp/FileCompressor.hpp @@ -103,6 +103,7 @@ class FileCompressor { * @param target_encoded_file_size * @param file_to_compress * @param archive_writer + * @param file_reader * @param use_heuristic * @return true if all files were compressed successfully, false otherwise */ From 8cc9e89c860d23f174889b17e512e4f7bc042b47 Mon Sep 17 00:00:00 2001 From: Haiqi Xu <14502009+haiqi96@users.noreply.github.com> Date: Mon, 7 Oct 2024 23:40:08 -0400 Subject: [PATCH 61/62] Other docstrings fixes --- components/core/src/clp/BufferedFileReader.hpp | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/components/core/src/clp/BufferedFileReader.hpp b/components/core/src/clp/BufferedFileReader.hpp index dc041d9c1..40659f92d 100644 --- a/components/core/src/clp/BufferedFileReader.hpp +++ b/components/core/src/clp/BufferedFileReader.hpp @@ -142,8 +142,8 @@ class BufferedFileReader : public ReaderInterface { * head's position. * @return ErrorCode_Truncated if we reached the end of the file before we reached the given * position - * @return ErrorCode_errno on error reading from the underlying file * @return Same as BufferReader::try_seek_from_begin if it fails + * @return Same as ReaderInterface::try_seek_from_begin if it fails * @return ErrorCode_Success on success */ [[nodiscard]] auto try_seek_from_begin(size_t pos) -> ErrorCode override; @@ -154,7 +154,6 @@ class BufferedFileReader : public ReaderInterface { * @param num_bytes_to_read The number of bytes to try and read * @param num_bytes_read The actual number of bytes read * @return ErrorCode_BadParam if buf is null - * @return ErrorCode_errno on error reading from the underlying file * @return ErrorCode_EndOfFile on EOF * @return Same as BufferReader::try_read if it fails * @return ErrorCode_Success on success @@ -169,8 +168,8 @@ class BufferedFileReader : public ReaderInterface { * @param append Whether to append to the given string or replace its contents * @param str Returns the content read * @return ErrorCode_EndOfFile on EOF - * @return ErrorCode_errno on error reading from the underlying file * @return Same as BufferReader::try_read_to_delimiter if it fails + * @return Same as refill_reader_buffer if it fails * @return ErrorCode_Success on success */ [[nodiscard]] auto try_read_to_delimiter( @@ -187,8 +186,8 @@ class BufferedFileReader : public ReaderInterface { * * NOTE: Callers must ensure the current buffer has been exhausted before calling this method * (i.e., the read head is at the end of the buffer). - * @param refill_size - * @return Same as read_into_buffer + * @param num_bytes_to_refill + * @return Same as ReaderInterface::try_read */ [[nodiscard]] auto refill_reader_buffer(size_t num_bytes_to_refill) -> ErrorCode; @@ -199,7 +198,7 @@ class BufferedFileReader : public ReaderInterface { /** * @param file_pos - * @return \p file_pos relative to the beginning of the buffer + * @return file_pos relative to the beginning of the buffer */ [[nodiscard]] auto get_buffer_relative_pos(size_t file_pos) const -> size_t { return file_pos - m_buffer_begin_pos; From 3d843ec5119121f5b5d7a0e2a01ec25a81da2335 Mon Sep 17 00:00:00 2001 From: haiqi96 <14502009+haiqi96@users.noreply.github.com> Date: Thu, 24 Oct 2024 10:51:18 -0400 Subject: [PATCH 62/62] variable name refactor --- .../core/src/clp/BufferedFileReader.cpp | 41 +++++++++---------- .../core/src/clp/BufferedFileReader.hpp | 6 +-- 2 files changed, 22 insertions(+), 25 deletions(-) diff --git a/components/core/src/clp/BufferedFileReader.cpp b/components/core/src/clp/BufferedFileReader.cpp index ec07403bc..7f5f8026b 100644 --- a/components/core/src/clp/BufferedFileReader.cpp +++ b/components/core/src/clp/BufferedFileReader.cpp @@ -22,8 +22,8 @@ BufferedFileReader::BufferedFileReader( std::unique_ptr reader_interface, size_t base_buffer_size ) - : m_file_reader(std::move(reader_interface)) { - if (nullptr == m_file_reader) { + : m_reader(std::move(reader_interface)) { + if (nullptr == m_reader) { throw OperationFailed( ErrorCode_BadParam, __FILENAME__, @@ -37,10 +37,10 @@ BufferedFileReader::BufferedFileReader( m_base_buffer_size = base_buffer_size; m_buffer.resize(m_base_buffer_size); - m_file_pos = m_file_reader->get_pos(); + m_pos = m_reader->get_pos(); m_buffer_begin_pos = 0; m_buffer_reader = BufferReader{m_buffer.data(), 0}; - m_highest_read_pos = m_file_pos; + m_highest_read_pos = m_pos; } auto BufferedFileReader::try_refill_buffer_if_empty() -> ErrorCode { @@ -55,13 +55,13 @@ void BufferedFileReader::peek_buffered_data(char const*& buf, size_t& peek_size) } auto BufferedFileReader::set_checkpoint() -> size_t { - if (m_checkpoint_pos.has_value() && m_checkpoint_pos < m_file_pos + if (m_checkpoint_pos.has_value() && m_checkpoint_pos < m_pos && m_buffer_reader.get_buffer_size() != m_base_buffer_size) { drop_content_before_current_pos(); } - m_checkpoint_pos = m_file_pos; - return m_file_pos; + m_checkpoint_pos = m_pos; + return m_pos; } auto BufferedFileReader::clear_checkpoint() -> void { @@ -79,16 +79,16 @@ auto BufferedFileReader::clear_checkpoint() -> void { } auto BufferedFileReader::try_get_pos(size_t& pos) -> ErrorCode { - pos = m_file_pos; + pos = m_pos; return ErrorCode_Success; } auto BufferedFileReader::try_seek_from_begin(size_t pos) -> ErrorCode { - if (pos == m_file_pos) { + if (pos == m_pos) { return ErrorCode_Success; } - auto seek_lower_bound = m_checkpoint_pos.has_value() ? m_checkpoint_pos.value() : m_file_pos; + auto seek_lower_bound = m_checkpoint_pos.has_value() ? m_checkpoint_pos.value() : m_pos; if (pos < seek_lower_bound) { return ErrorCode_Unsupported; } @@ -98,7 +98,7 @@ auto BufferedFileReader::try_seek_from_begin(size_t pos) -> ErrorCode { if (false == m_checkpoint_pos.has_value()) { // If checkpoint is not set, simply move the file_pos and invalidate // the buffer reader - if (auto const error_code = m_file_reader->try_seek_from_begin(pos); + if (auto const error_code = m_reader->try_seek_from_begin(pos); error_code != ErrorCode_Success) { return error_code; @@ -122,7 +122,7 @@ auto BufferedFileReader::try_seek_from_begin(size_t pos) -> ErrorCode { } else if (ErrorCode_Success != error_code) { return error_code; } - update_file_pos(pos); + update_pos(pos); return ErrorCode_Success; } @@ -143,7 +143,7 @@ auto BufferedFileReader::try_read(char* buf, size_t num_bytes_to_read, size_t& n if (ErrorCode_Success == error_code) { dst_view = dst_view.subspan(bytes_read); num_bytes_read += bytes_read; - update_file_pos(m_file_pos + bytes_read); + update_pos(m_pos + bytes_read); } else if (ErrorCode_EndOfFile == error_code) { error_code = refill_reader_buffer(m_base_buffer_size); if (ErrorCode_EndOfFile == error_code) { @@ -186,7 +186,7 @@ auto BufferedFileReader::try_read_to_delimiter( { return ret_code; } - update_file_pos(m_file_pos + num_bytes_read); + update_pos(m_pos + num_bytes_read); total_num_bytes_read += num_bytes_read; if (found_delim) { break; @@ -236,11 +236,8 @@ auto BufferedFileReader::refill_reader_buffer(size_t num_bytes_to_refill) -> Err } size_t num_bytes_read{0}; - auto const error_code = m_file_reader->try_read( - &m_buffer[next_buffer_pos], - num_bytes_to_read, - num_bytes_read - ); + auto const error_code + = m_reader->try_read(&m_buffer[next_buffer_pos], num_bytes_to_read, num_bytes_read); if (error_code != ErrorCode_Success && ErrorCode_EndOfFile != error_code) { return error_code; } @@ -263,8 +260,8 @@ auto BufferedFileReader::drop_content_before_current_pos() -> void { m_buffer_reader = BufferReader{m_buffer.data(), new_data_size}; } -auto BufferedFileReader::update_file_pos(size_t pos) -> void { - m_file_pos = pos; - m_highest_read_pos = std::max(m_file_pos, m_highest_read_pos); +auto BufferedFileReader::update_pos(size_t pos) -> void { + m_pos = pos; + m_highest_read_pos = std::max(m_pos, m_highest_read_pos); } } // namespace clp diff --git a/components/core/src/clp/BufferedFileReader.hpp b/components/core/src/clp/BufferedFileReader.hpp index 40659f92d..f3e50cd1d 100644 --- a/components/core/src/clp/BufferedFileReader.hpp +++ b/components/core/src/clp/BufferedFileReader.hpp @@ -208,14 +208,14 @@ class BufferedFileReader : public ReaderInterface { return m_buffer_begin_pos + m_buffer_reader.get_buffer_size(); } - auto update_file_pos(size_t pos) -> void; + auto update_pos(size_t pos) -> void; // Constants static constexpr size_t cDefaultBufferSize = (16 * cMinBufferSize); // Variables - size_t m_file_pos{0}; - std::unique_ptr m_file_reader; + size_t m_pos{0}; + std::unique_ptr m_reader; // Buffer specific data std::vector m_buffer;