From 60af4551f345f868238895c366f542207ed99717 Mon Sep 17 00:00:00 2001 From: Abigail Matthews Date: Mon, 2 Dec 2024 14:26:55 -0500 Subject: [PATCH 1/6] IRv2 to archive command line interface --- .../core/src/clp_s/CommandLineArguments.cpp | 150 +++++++++++++++++- .../core/src/clp_s/CommandLineArguments.hpp | 5 +- components/core/src/clp_s/clp-s.cpp | 66 ++++++++ 3 files changed, 219 insertions(+), 2 deletions(-) diff --git a/components/core/src/clp_s/CommandLineArguments.cpp b/components/core/src/clp_s/CommandLineArguments.cpp index 99539b627..4118fabed 100644 --- a/components/core/src/clp_s/CommandLineArguments.cpp +++ b/components/core/src/clp_s/CommandLineArguments.cpp @@ -106,11 +106,13 @@ CommandLineArguments::parse_arguments(int argc, char const** argv) { std::cerr << " c - compress" << std::endl; std::cerr << " x - decompress" << std::endl; std::cerr << " s - search" << std::endl; + std::cerr << " i - compress IR format" << std::endl; std::cerr << std::endl; std::cerr << "Try " << " c --help OR" << " x --help OR" - << " s --help for command-specific details." << std::endl; + << " s --help OR" + << " i --help for command-specific details." << std::endl; po::options_description visible_options; visible_options.add(general_options); @@ -125,6 +127,7 @@ CommandLineArguments::parse_arguments(int argc, char const** argv) { case (char)Command::Compress: case (char)Command::Extract: case (char)Command::Search: + case (char)Command::IrCompress: m_command = (Command)command_input; break; default: @@ -696,6 +699,147 @@ CommandLineArguments::parse_arguments(int argc, char const** argv) { "The --count-by-time and --count options are mutually exclusive." ); } + } else if (Command::IrCompress == m_command) { + po::options_description compression_positional_options; + // clang-format off + compression_positional_options.add_options()( + "archives-dir", + po::value(&m_archives_dir)->value_name("DIR"), + "output directory" + )( + "input-paths", + po::value>(&m_file_paths)->value_name("PATHS"), + "input paths" + ); + // clang-format on + + po::options_description compression_options("Compression options"); + std::string metadata_db_config_file_path; + std::string input_path_list_file_path; + // clang-format off + compression_options.add_options()( + "compression-level", + po::value(&m_compression_level)->value_name("LEVEL")-> + default_value(m_compression_level), + "1 (fast/low compression) to 9 (slow/high compression)." + )( + "target-encoded-size", + po::value(&m_target_encoded_size)->value_name("TARGET_ENCODED_SIZE")-> + default_value(m_target_encoded_size), + "Target size (B) for the dictionaries and encoded messages before a new " + "archive is created." + )( + "min-table-size", + po::value(&m_minimum_table_size)->value_name("MIN_TABLE_SIZE")-> + default_value(m_minimum_table_size), + "Minimum size (B) for a packed table before it gets compressed." + )( + "max-document-size", + po::value(&m_max_document_size)->value_name("DOC_SIZE")-> + default_value(m_max_document_size), + "Maximum allowed size (B) for a single document before compression fails." + )( + "timestamp-key", + po::value(&m_timestamp_key)->value_name("TIMESTAMP_COLUMN_KEY")-> + default_value(m_timestamp_key), + "Path (e.g. x.y) for the field containing the log event's timestamp." + )( + "db-config-file", + po::value(&metadata_db_config_file_path)->value_name("FILE")-> + default_value(metadata_db_config_file_path), + "Global metadata DB YAML config" + )( + "files-from,f", + po::value(&input_path_list_file_path) + ->value_name("FILE") + ->default_value(input_path_list_file_path), + "Compress files specified in FILE" + )( + "print-archive-stats", + po::bool_switch(&m_print_archive_stats), + "Print statistics (json) about the archive after it's compressed." + )( + "single-file-archive", + po::bool_switch(&m_single_file_archive), + "Create a single archive file instead of multiple files." + )( + "disable-log-order", + po::bool_switch(&m_disable_log_order), + "Do not record log order at ingestion time." + ); + // clang-format on + + po::positional_options_description positional_options; + positional_options.add("archives-dir", 1); + positional_options.add("input-paths", -1); + + po::options_description all_compression_options; + all_compression_options.add(compression_options); + all_compression_options.add(compression_positional_options); + + std::vector unrecognized_options + = po::collect_unrecognized(parsed.options, po::include_positional); + unrecognized_options.erase(unrecognized_options.begin()); + po::store( + po::command_line_parser(unrecognized_options) + .options(all_compression_options) + .positional(positional_options) + .run(), + parsed_command_line_options + ); + po::notify(parsed_command_line_options); + + if (parsed_command_line_options.count("help")) { + print_ir_compression_usage(); + + std::cerr << "Examples:\n"; + std::cerr << " # Compress file1.ir and dir1 into archives-dir\n"; + std::cerr << " " << m_program_name << " i archives-dir file1.ir dir1\n"; + + po::options_description visible_options; + visible_options.add(general_options); + visible_options.add(compression_options); + std::cerr << visible_options << '\n'; + return ParsingResult::InfoCommand; + } + + if (m_archives_dir.empty()) { + throw std::invalid_argument("No archives directory specified."); + } + + if (false == input_path_list_file_path.empty()) { + if (false == read_paths_from_file(input_path_list_file_path, m_file_paths)) { + SPDLOG_ERROR("Failed to read paths from {}", input_path_list_file_path); + return ParsingResult::Failure; + } + } + + if (m_file_paths.empty()) { + throw std::invalid_argument("No input paths specified."); + } + + // Parse and validate global metadata DB config + if (false == metadata_db_config_file_path.empty()) { + clp::GlobalMetadataDBConfig metadata_db_config; + try { + metadata_db_config.parse_config_file(metadata_db_config_file_path); + } catch (std::exception& e) { + SPDLOG_ERROR("Failed to validate metadata database config - {}.", e.what()); + return ParsingResult::Failure; + } + + if (clp::GlobalMetadataDBConfig::MetadataDBType::MySQL + != metadata_db_config.get_metadata_db_type()) + { + SPDLOG_ERROR( + "Invalid metadata database type for {}; only supported type is MySQL.", + m_program_name + ); + return ParsingResult::Failure; + } + + m_metadata_db_config = std::move(metadata_db_config); + } } } catch (std::exception& e) { SPDLOG_ERROR("{}", e.what()); @@ -809,4 +953,8 @@ void CommandLineArguments::print_search_usage() const { " [OUTPUT_HANDLER [OUTPUT_HANDLER_OPTIONS]]" << std::endl; } + +void CommandLineArguments::print_ir_compression_usage() const { + std::cerr << "Usage: " << m_program_name << " i [OPTIONS] ARCHIVES_DIR [FILE/DIR ...]\n"; +} } // namespace clp_s diff --git a/components/core/src/clp_s/CommandLineArguments.hpp b/components/core/src/clp_s/CommandLineArguments.hpp index a87e9b6bd..679ba7260 100644 --- a/components/core/src/clp_s/CommandLineArguments.hpp +++ b/components/core/src/clp_s/CommandLineArguments.hpp @@ -26,7 +26,8 @@ class CommandLineArguments { enum class Command : char { Compress = 'c', Extract = 'x', - Search = 's' + Search = 's', + IrCompress = 'i' }; enum class OutputHandlerType : uint8_t { @@ -163,6 +164,8 @@ class CommandLineArguments { void print_decompression_usage() const; + void print_ir_compression_usage() const; + void print_search_usage() const; // Variables diff --git a/components/core/src/clp_s/clp-s.cpp b/components/core/src/clp_s/clp-s.cpp index b76683caf..b566fd570 100644 --- a/components/core/src/clp_s/clp-s.cpp +++ b/components/core/src/clp_s/clp-s.cpp @@ -50,6 +50,13 @@ namespace { */ bool compress(CommandLineArguments const& command_line_arguments); +/** + * Compresses the input IR files specified by the command line arguments into an archive. + * @param command_line_arguments + * @return Whether compression was successful + */ +auto ir_compress(CommandLineArguments const& command_line_arguments) -> bool; + /** * Decompresses the archive specified by the given JsonConstructorOption. * @param json_constructor_option @@ -121,6 +128,61 @@ bool compress(CommandLineArguments const& command_line_arguments) { return true; } +auto setup_compression_options( + CommandLineArguments const& command_line_arguments, + clp_s::JsonParserOption& option +) -> bool { + auto archives_dir = std::filesystem::path(command_line_arguments.get_archives_dir()); + // Create output directory in case it doesn't exist + try { + std::filesystem::create_directory(archives_dir.string()); + } catch (std::exception& e) { + SPDLOG_ERROR( + "Failed to create archives directory {} - {}", + archives_dir.string(), + e.what() + ); + return false; + } + option.file_paths = command_line_arguments.get_file_paths(); + option.archives_dir = archives_dir.string(); + option.target_encoded_size = command_line_arguments.get_target_encoded_size(); + option.max_document_size = command_line_arguments.get_max_document_size(); + option.min_table_size = command_line_arguments.get_minimum_table_size(); + option.compression_level = command_line_arguments.get_compression_level(); + option.timestamp_key = command_line_arguments.get_timestamp_key(); + option.print_archive_stats = command_line_arguments.print_archive_stats(); + option.single_file_archive = command_line_arguments.get_single_file_archive(); + option.record_log_order = command_line_arguments.get_record_log_order(); + + auto const& db_config_container = command_line_arguments.get_metadata_db_config(); + if (db_config_container.has_value()) { + auto const& db_config = db_config_container.value(); + option.metadata_db = std::make_shared( + db_config.get_metadata_db_host(), + db_config.get_metadata_db_port(), + db_config.get_metadata_db_username(), + db_config.get_metadata_db_password(), + db_config.get_metadata_db_name(), + db_config.get_metadata_table_prefix() + ); + } + return true; +} + +auto ir_compress(CommandLineArguments const& command_line_arguments) -> bool { + clp_s::JsonParserOption option{}; + if (false == setup_compression_options(command_line_arguments, option)) { + return false; + } + + // Functionality Coming in later PR + // -->Instantiate Json Parser + // -->Call new parsing function in Json Parser to parse IRv2 to archive + // -->Store Archive + return true; +} + void decompress_archive(clp_s::JsonConstructorOption const& json_constructor_option) { clp_s::JsonConstructor constructor(json_constructor_option); constructor.store(); @@ -290,6 +352,10 @@ int main(int argc, char const* argv[]) { if (false == compress(command_line_arguments)) { return 1; } + } else if (CommandLineArguments::Command::IrCompress == command_line_arguments.get_command()) { + if (false == ir_compress(command_line_arguments)) { + return 1; + } } else if (CommandLineArguments::Command::Extract == command_line_arguments.get_command()) { auto const& archives_dir = command_line_arguments.get_archives_dir(); if (false == std::filesystem::is_directory(archives_dir)) { From 2734aa03da258239d7e63ef229d12486271a40fd Mon Sep 17 00:00:00 2001 From: Abigail Matthews Date: Wed, 4 Dec 2024 13:34:20 -0500 Subject: [PATCH 2/6] combine new i command line option into the c command line option --- .../core/src/clp_s/CommandLineArguments.cpp | 154 +----------------- .../core/src/clp_s/CommandLineArguments.hpp | 8 +- components/core/src/clp_s/clp-s.cpp | 81 +++------ 3 files changed, 29 insertions(+), 214 deletions(-) diff --git a/components/core/src/clp_s/CommandLineArguments.cpp b/components/core/src/clp_s/CommandLineArguments.cpp index 4118fabed..aa070e6ef 100644 --- a/components/core/src/clp_s/CommandLineArguments.cpp +++ b/components/core/src/clp_s/CommandLineArguments.cpp @@ -106,13 +106,11 @@ CommandLineArguments::parse_arguments(int argc, char const** argv) { std::cerr << " c - compress" << std::endl; std::cerr << " x - decompress" << std::endl; std::cerr << " s - search" << std::endl; - std::cerr << " i - compress IR format" << std::endl; std::cerr << std::endl; std::cerr << "Try " << " c --help OR" << " x --help OR" - << " s --help OR" - << " i --help for command-specific details." << std::endl; + << " s --help for command-specific details." << std::endl; po::options_description visible_options; visible_options.add(general_options); @@ -127,7 +125,6 @@ CommandLineArguments::parse_arguments(int argc, char const** argv) { case (char)Command::Compress: case (char)Command::Extract: case (char)Command::Search: - case (char)Command::IrCompress: m_command = (Command)command_input; break; default: @@ -205,6 +202,11 @@ CommandLineArguments::parse_arguments(int argc, char const** argv) { "disable-log-order", po::bool_switch(&m_disable_log_order), "Do not record log order at ingestion time." + )( + "file-type", + po::value(&m_file_type)->value_name("FILE_TYPE")-> + default_value(m_file_type), + "The type of file that is to be compressed to archive (e.g Json or IR)" ); // clang-format on @@ -699,147 +701,6 @@ CommandLineArguments::parse_arguments(int argc, char const** argv) { "The --count-by-time and --count options are mutually exclusive." ); } - } else if (Command::IrCompress == m_command) { - po::options_description compression_positional_options; - // clang-format off - compression_positional_options.add_options()( - "archives-dir", - po::value(&m_archives_dir)->value_name("DIR"), - "output directory" - )( - "input-paths", - po::value>(&m_file_paths)->value_name("PATHS"), - "input paths" - ); - // clang-format on - - po::options_description compression_options("Compression options"); - std::string metadata_db_config_file_path; - std::string input_path_list_file_path; - // clang-format off - compression_options.add_options()( - "compression-level", - po::value(&m_compression_level)->value_name("LEVEL")-> - default_value(m_compression_level), - "1 (fast/low compression) to 9 (slow/high compression)." - )( - "target-encoded-size", - po::value(&m_target_encoded_size)->value_name("TARGET_ENCODED_SIZE")-> - default_value(m_target_encoded_size), - "Target size (B) for the dictionaries and encoded messages before a new " - "archive is created." - )( - "min-table-size", - po::value(&m_minimum_table_size)->value_name("MIN_TABLE_SIZE")-> - default_value(m_minimum_table_size), - "Minimum size (B) for a packed table before it gets compressed." - )( - "max-document-size", - po::value(&m_max_document_size)->value_name("DOC_SIZE")-> - default_value(m_max_document_size), - "Maximum allowed size (B) for a single document before compression fails." - )( - "timestamp-key", - po::value(&m_timestamp_key)->value_name("TIMESTAMP_COLUMN_KEY")-> - default_value(m_timestamp_key), - "Path (e.g. x.y) for the field containing the log event's timestamp." - )( - "db-config-file", - po::value(&metadata_db_config_file_path)->value_name("FILE")-> - default_value(metadata_db_config_file_path), - "Global metadata DB YAML config" - )( - "files-from,f", - po::value(&input_path_list_file_path) - ->value_name("FILE") - ->default_value(input_path_list_file_path), - "Compress files specified in FILE" - )( - "print-archive-stats", - po::bool_switch(&m_print_archive_stats), - "Print statistics (json) about the archive after it's compressed." - )( - "single-file-archive", - po::bool_switch(&m_single_file_archive), - "Create a single archive file instead of multiple files." - )( - "disable-log-order", - po::bool_switch(&m_disable_log_order), - "Do not record log order at ingestion time." - ); - // clang-format on - - po::positional_options_description positional_options; - positional_options.add("archives-dir", 1); - positional_options.add("input-paths", -1); - - po::options_description all_compression_options; - all_compression_options.add(compression_options); - all_compression_options.add(compression_positional_options); - - std::vector unrecognized_options - = po::collect_unrecognized(parsed.options, po::include_positional); - unrecognized_options.erase(unrecognized_options.begin()); - po::store( - po::command_line_parser(unrecognized_options) - .options(all_compression_options) - .positional(positional_options) - .run(), - parsed_command_line_options - ); - po::notify(parsed_command_line_options); - - if (parsed_command_line_options.count("help")) { - print_ir_compression_usage(); - - std::cerr << "Examples:\n"; - std::cerr << " # Compress file1.ir and dir1 into archives-dir\n"; - std::cerr << " " << m_program_name << " i archives-dir file1.ir dir1\n"; - - po::options_description visible_options; - visible_options.add(general_options); - visible_options.add(compression_options); - std::cerr << visible_options << '\n'; - return ParsingResult::InfoCommand; - } - - if (m_archives_dir.empty()) { - throw std::invalid_argument("No archives directory specified."); - } - - if (false == input_path_list_file_path.empty()) { - if (false == read_paths_from_file(input_path_list_file_path, m_file_paths)) { - SPDLOG_ERROR("Failed to read paths from {}", input_path_list_file_path); - return ParsingResult::Failure; - } - } - - if (m_file_paths.empty()) { - throw std::invalid_argument("No input paths specified."); - } - - // Parse and validate global metadata DB config - if (false == metadata_db_config_file_path.empty()) { - clp::GlobalMetadataDBConfig metadata_db_config; - try { - metadata_db_config.parse_config_file(metadata_db_config_file_path); - } catch (std::exception& e) { - SPDLOG_ERROR("Failed to validate metadata database config - {}.", e.what()); - return ParsingResult::Failure; - } - - if (clp::GlobalMetadataDBConfig::MetadataDBType::MySQL - != metadata_db_config.get_metadata_db_type()) - { - SPDLOG_ERROR( - "Invalid metadata database type for {}; only supported type is MySQL.", - m_program_name - ); - return ParsingResult::Failure; - } - - m_metadata_db_config = std::move(metadata_db_config); - } } } catch (std::exception& e) { SPDLOG_ERROR("{}", e.what()); @@ -954,7 +815,4 @@ void CommandLineArguments::print_search_usage() const { << std::endl; } -void CommandLineArguments::print_ir_compression_usage() const { - std::cerr << "Usage: " << m_program_name << " i [OPTIONS] ARCHIVES_DIR [FILE/DIR ...]\n"; -} } // namespace clp_s diff --git a/components/core/src/clp_s/CommandLineArguments.hpp b/components/core/src/clp_s/CommandLineArguments.hpp index 679ba7260..d8bc25f84 100644 --- a/components/core/src/clp_s/CommandLineArguments.hpp +++ b/components/core/src/clp_s/CommandLineArguments.hpp @@ -26,8 +26,7 @@ class CommandLineArguments { enum class Command : char { Compress = 'c', Extract = 'x', - Search = 's', - IrCompress = 'i' + Search = 's' }; enum class OutputHandlerType : uint8_t { @@ -117,6 +116,8 @@ class CommandLineArguments { bool get_record_log_order() const { return false == m_disable_log_order; } + [[nodiscard]] auto get_file_type() const -> std::string { return m_file_type; } + private: // Methods /** @@ -164,8 +165,6 @@ class CommandLineArguments { void print_decompression_usage() const; - void print_ir_compression_usage() const; - void print_search_usage() const; // Variables @@ -187,6 +186,7 @@ class CommandLineArguments { size_t m_target_ordered_chunk_size{}; size_t m_minimum_table_size{1ULL * 1024 * 1024}; // 1 MB bool m_disable_log_order{false}; + std::string m_file_type{"Json"}; // Metadata db variables std::optional m_metadata_db_config; diff --git a/components/core/src/clp_s/clp-s.cpp b/components/core/src/clp_s/clp-s.cpp index b566fd570..87bdb797c 100644 --- a/components/core/src/clp_s/clp-s.cpp +++ b/components/core/src/clp_s/clp-s.cpp @@ -79,6 +79,16 @@ bool search_archive( ); bool compress(CommandLineArguments const& command_line_arguments) { + auto file_type = command_line_arguments.get_file_type(); + if ("IR" != file_type && "Json" != file_type) { + SPDLOG_ERROR("File Type specified is Invalid"); + return false; + } + if ("IR" == file_type && command_line_arguments.get_structurize_arrays()) { + SPDLOG_ERROR("ERROR: structurized arrays are not supported for IR files"); + return false; + } + auto archives_dir = std::filesystem::path(command_line_arguments.get_archives_dir()); // Create output directory in case it doesn't exist @@ -120,69 +130,20 @@ bool compress(CommandLineArguments const& command_line_arguments) { } clp_s::JsonParser parser(option); - if (false == parser.parse()) { - SPDLOG_ERROR("Encountered error while parsing input"); - return false; + if ("IR" == file_type) { + // Functionality Coming in later PR + // -->Call new parsing function in Json Parser to parse IRv2 to archive + // -->Check for error from parsing function + } else { + if (false == parser.parse()) { + SPDLOG_ERROR("Encountered error while parsing input"); + return false; + } } parser.store(); return true; } -auto setup_compression_options( - CommandLineArguments const& command_line_arguments, - clp_s::JsonParserOption& option -) -> bool { - auto archives_dir = std::filesystem::path(command_line_arguments.get_archives_dir()); - // Create output directory in case it doesn't exist - try { - std::filesystem::create_directory(archives_dir.string()); - } catch (std::exception& e) { - SPDLOG_ERROR( - "Failed to create archives directory {} - {}", - archives_dir.string(), - e.what() - ); - return false; - } - option.file_paths = command_line_arguments.get_file_paths(); - option.archives_dir = archives_dir.string(); - option.target_encoded_size = command_line_arguments.get_target_encoded_size(); - option.max_document_size = command_line_arguments.get_max_document_size(); - option.min_table_size = command_line_arguments.get_minimum_table_size(); - option.compression_level = command_line_arguments.get_compression_level(); - option.timestamp_key = command_line_arguments.get_timestamp_key(); - option.print_archive_stats = command_line_arguments.print_archive_stats(); - option.single_file_archive = command_line_arguments.get_single_file_archive(); - option.record_log_order = command_line_arguments.get_record_log_order(); - - auto const& db_config_container = command_line_arguments.get_metadata_db_config(); - if (db_config_container.has_value()) { - auto const& db_config = db_config_container.value(); - option.metadata_db = std::make_shared( - db_config.get_metadata_db_host(), - db_config.get_metadata_db_port(), - db_config.get_metadata_db_username(), - db_config.get_metadata_db_password(), - db_config.get_metadata_db_name(), - db_config.get_metadata_table_prefix() - ); - } - return true; -} - -auto ir_compress(CommandLineArguments const& command_line_arguments) -> bool { - clp_s::JsonParserOption option{}; - if (false == setup_compression_options(command_line_arguments, option)) { - return false; - } - - // Functionality Coming in later PR - // -->Instantiate Json Parser - // -->Call new parsing function in Json Parser to parse IRv2 to archive - // -->Store Archive - return true; -} - void decompress_archive(clp_s::JsonConstructorOption const& json_constructor_option) { clp_s::JsonConstructor constructor(json_constructor_option); constructor.store(); @@ -352,10 +313,6 @@ int main(int argc, char const* argv[]) { if (false == compress(command_line_arguments)) { return 1; } - } else if (CommandLineArguments::Command::IrCompress == command_line_arguments.get_command()) { - if (false == ir_compress(command_line_arguments)) { - return 1; - } } else if (CommandLineArguments::Command::Extract == command_line_arguments.get_command()) { auto const& archives_dir = command_line_arguments.get_archives_dir(); if (false == std::filesystem::is_directory(archives_dir)) { From 99091900906956eba0804496c4df3d2d4369c8d4 Mon Sep 17 00:00:00 2001 From: Abigail Matthews Date: Wed, 4 Dec 2024 13:54:05 -0500 Subject: [PATCH 3/6] remove unused function prototype --- components/core/src/clp_s/clp-s.cpp | 7 ------- 1 file changed, 7 deletions(-) diff --git a/components/core/src/clp_s/clp-s.cpp b/components/core/src/clp_s/clp-s.cpp index 87bdb797c..dd6d8f265 100644 --- a/components/core/src/clp_s/clp-s.cpp +++ b/components/core/src/clp_s/clp-s.cpp @@ -50,13 +50,6 @@ namespace { */ bool compress(CommandLineArguments const& command_line_arguments); -/** - * Compresses the input IR files specified by the command line arguments into an archive. - * @param command_line_arguments - * @return Whether compression was successful - */ -auto ir_compress(CommandLineArguments const& command_line_arguments) -> bool; - /** * Decompresses the archive specified by the given JsonConstructorOption. * @param json_constructor_option From 2f3365ff677c1fdc40f41d089873fdae44fc7a1a Mon Sep 17 00:00:00 2001 From: Abigail Matthews Date: Wed, 4 Dec 2024 16:51:29 -0500 Subject: [PATCH 4/6] change to use enum for file-type and add file_type to JsonParserOption --- .../core/src/clp_s/CommandLineArguments.cpp | 28 ++++++++++++++++--- .../core/src/clp_s/CommandLineArguments.hpp | 9 ++++-- components/core/src/clp_s/JsonParser.hpp | 2 ++ components/core/src/clp_s/clp-s.cpp | 13 ++------- 4 files changed, 35 insertions(+), 17 deletions(-) diff --git a/components/core/src/clp_s/CommandLineArguments.cpp b/components/core/src/clp_s/CommandLineArguments.cpp index aa070e6ef..f7b46bc97 100644 --- a/components/core/src/clp_s/CommandLineArguments.cpp +++ b/components/core/src/clp_s/CommandLineArguments.cpp @@ -148,6 +148,7 @@ CommandLineArguments::parse_arguments(int argc, char const** argv) { po::options_description compression_options("Compression options"); std::string metadata_db_config_file_path; std::string input_path_list_file_path; + std::string file_type; // clang-format off compression_options.add_options()( "compression-level", @@ -204,9 +205,8 @@ CommandLineArguments::parse_arguments(int argc, char const** argv) { "Do not record log order at ingestion time." )( "file-type", - po::value(&m_file_type)->value_name("FILE_TYPE")-> - default_value(m_file_type), - "The type of file that is to be compressed to archive (e.g Json or IR)" + po::value(&file_type)->value_name("FILE_TYPE"), + "The type of file that is to be compressed to archive (e.g json or kv-ir)" ); // clang-format on @@ -260,6 +260,27 @@ CommandLineArguments::parse_arguments(int argc, char const** argv) { throw std::invalid_argument("No input paths specified."); } + constexpr std::string_view cJsonFileType{"json"}; + constexpr std::string_view cKeyValueIrFileType{"kv-ir"}; + + if (parsed_command_line_options.count("file-type") > 0) { + if (cJsonFileType == file_type) { + m_file_type = FileType::Json; + } else if (cKeyValueIrFileType == file_type) { + m_file_type = FileType::KeyValueIr; + if (m_structurize_arrays) { + SPDLOG_ERROR( + "Invalid combination of arguments; --file-type {} and " + "--structurize-arrays can't be used together", + cKeyValueIrFileType + ); + return ParsingResult::Failure; + } + } else { + throw std::invalid_argument("Unknown FILE_TYPE: " + file_type); + } + } + // Parse and validate global metadata DB config if (false == metadata_db_config_file_path.empty()) { clp::GlobalMetadataDBConfig metadata_db_config; @@ -814,5 +835,4 @@ void CommandLineArguments::print_search_usage() const { " [OUTPUT_HANDLER [OUTPUT_HANDLER_OPTIONS]]" << std::endl; } - } // namespace clp_s diff --git a/components/core/src/clp_s/CommandLineArguments.hpp b/components/core/src/clp_s/CommandLineArguments.hpp index d8bc25f84..47c244646 100644 --- a/components/core/src/clp_s/CommandLineArguments.hpp +++ b/components/core/src/clp_s/CommandLineArguments.hpp @@ -36,6 +36,11 @@ class CommandLineArguments { Stdout, }; + enum class FileType : uint8_t { + Json = 0, + KeyValueIr + }; + // Constructors explicit CommandLineArguments(std::string const& program_name) : m_program_name(program_name) {} @@ -116,7 +121,7 @@ class CommandLineArguments { bool get_record_log_order() const { return false == m_disable_log_order; } - [[nodiscard]] auto get_file_type() const -> std::string { return m_file_type; } + [[nodiscard]] auto get_file_type() const -> FileType { return m_file_type; } private: // Methods @@ -186,7 +191,7 @@ class CommandLineArguments { size_t m_target_ordered_chunk_size{}; size_t m_minimum_table_size{1ULL * 1024 * 1024}; // 1 MB bool m_disable_log_order{false}; - std::string m_file_type{"Json"}; + FileType m_file_type{FileType::Json}; // Metadata db variables std::optional m_metadata_db_config; diff --git a/components/core/src/clp_s/JsonParser.hpp b/components/core/src/clp_s/JsonParser.hpp index bfd423c22..c05ab9d60 100644 --- a/components/core/src/clp_s/JsonParser.hpp +++ b/components/core/src/clp_s/JsonParser.hpp @@ -12,6 +12,7 @@ #include "../clp/GlobalMySQLMetadataDB.hpp" #include "ArchiveWriter.hpp" +#include "CommandLineArguments.hpp" #include "DictionaryWriter.hpp" #include "FileReader.hpp" #include "FileWriter.hpp" @@ -29,6 +30,7 @@ using namespace simdjson; namespace clp_s { struct JsonParserOption { std::vector file_paths; + CommandLineArguments::FileType input_file_type{CommandLineArguments::FileType::Json}; std::string timestamp_key; std::string archives_dir; size_t target_encoded_size{}; diff --git a/components/core/src/clp_s/clp-s.cpp b/components/core/src/clp_s/clp-s.cpp index dd6d8f265..d6d46aaac 100644 --- a/components/core/src/clp_s/clp-s.cpp +++ b/components/core/src/clp_s/clp-s.cpp @@ -72,16 +72,6 @@ bool search_archive( ); bool compress(CommandLineArguments const& command_line_arguments) { - auto file_type = command_line_arguments.get_file_type(); - if ("IR" != file_type && "Json" != file_type) { - SPDLOG_ERROR("File Type specified is Invalid"); - return false; - } - if ("IR" == file_type && command_line_arguments.get_structurize_arrays()) { - SPDLOG_ERROR("ERROR: structurized arrays are not supported for IR files"); - return false; - } - auto archives_dir = std::filesystem::path(command_line_arguments.get_archives_dir()); // Create output directory in case it doesn't exist @@ -98,6 +88,7 @@ bool compress(CommandLineArguments const& command_line_arguments) { clp_s::JsonParserOption option{}; option.file_paths = command_line_arguments.get_file_paths(); + option.input_file_type = command_line_arguments.get_file_type(); option.archives_dir = archives_dir.string(); option.target_encoded_size = command_line_arguments.get_target_encoded_size(); option.max_document_size = command_line_arguments.get_max_document_size(); @@ -123,7 +114,7 @@ bool compress(CommandLineArguments const& command_line_arguments) { } clp_s::JsonParser parser(option); - if ("IR" == file_type) { + if (CommandLineArguments::FileType::KeyValueIr == option.input_file_type) { // Functionality Coming in later PR // -->Call new parsing function in Json Parser to parse IRv2 to archive // -->Check for error from parsing function From 02159d96498181e138f0219ca56b822dbad309b1 Mon Sep 17 00:00:00 2001 From: Abigail Matthews Date: Thu, 5 Dec 2024 10:24:51 -0500 Subject: [PATCH 5/6] error when hitting compression of kv-ir until functionality is added --- components/core/src/clp_s/CommandLineArguments.cpp | 11 +++++------ components/core/src/clp_s/clp-s.cpp | 2 ++ 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/components/core/src/clp_s/CommandLineArguments.cpp b/components/core/src/clp_s/CommandLineArguments.cpp index f7b46bc97..9527f6c99 100644 --- a/components/core/src/clp_s/CommandLineArguments.cpp +++ b/components/core/src/clp_s/CommandLineArguments.cpp @@ -148,7 +148,9 @@ CommandLineArguments::parse_arguments(int argc, char const** argv) { po::options_description compression_options("Compression options"); std::string metadata_db_config_file_path; std::string input_path_list_file_path; - std::string file_type; + constexpr std::string_view cJsonFileType{"json"}; + constexpr std::string_view cKeyValueIrFileType{"kv-ir"}; + std::string file_type{cJsonFileType}; // clang-format off compression_options.add_options()( "compression-level", @@ -205,8 +207,8 @@ CommandLineArguments::parse_arguments(int argc, char const** argv) { "Do not record log order at ingestion time." )( "file-type", - po::value(&file_type)->value_name("FILE_TYPE"), - "The type of file that is to be compressed to archive (e.g json or kv-ir)" + po::value(&file_type)->value_name("FILE_TYPE")->default_value(file_type), + "The type of file being compressed (json or kv-ir)" ); // clang-format on @@ -260,9 +262,6 @@ CommandLineArguments::parse_arguments(int argc, char const** argv) { throw std::invalid_argument("No input paths specified."); } - constexpr std::string_view cJsonFileType{"json"}; - constexpr std::string_view cKeyValueIrFileType{"kv-ir"}; - if (parsed_command_line_options.count("file-type") > 0) { if (cJsonFileType == file_type) { m_file_type = FileType::Json; diff --git a/components/core/src/clp_s/clp-s.cpp b/components/core/src/clp_s/clp-s.cpp index d6d46aaac..41acdc6ad 100644 --- a/components/core/src/clp_s/clp-s.cpp +++ b/components/core/src/clp_s/clp-s.cpp @@ -118,6 +118,8 @@ bool compress(CommandLineArguments const& command_line_arguments) { // Functionality Coming in later PR // -->Call new parsing function in Json Parser to parse IRv2 to archive // -->Check for error from parsing function + SPDLOG_ERROR("Compressing Key Valur IR Files is not yet supported"); + return false; } else { if (false == parser.parse()) { SPDLOG_ERROR("Encountered error while parsing input"); From 1a8e37209f9696161e09fba5dcc403117d65fa3b Mon Sep 17 00:00:00 2001 From: Abigail Matthews Date: Thu, 5 Dec 2024 12:49:29 -0500 Subject: [PATCH 6/6] fix typo and remove unnecessary conditional --- .../core/src/clp_s/CommandLineArguments.cpp | 28 +++++++++---------- components/core/src/clp_s/clp-s.cpp | 2 +- 2 files changed, 14 insertions(+), 16 deletions(-) diff --git a/components/core/src/clp_s/CommandLineArguments.cpp b/components/core/src/clp_s/CommandLineArguments.cpp index 9527f6c99..c7fb9487e 100644 --- a/components/core/src/clp_s/CommandLineArguments.cpp +++ b/components/core/src/clp_s/CommandLineArguments.cpp @@ -262,22 +262,20 @@ CommandLineArguments::parse_arguments(int argc, char const** argv) { throw std::invalid_argument("No input paths specified."); } - if (parsed_command_line_options.count("file-type") > 0) { - if (cJsonFileType == file_type) { - m_file_type = FileType::Json; - } else if (cKeyValueIrFileType == file_type) { - m_file_type = FileType::KeyValueIr; - if (m_structurize_arrays) { - SPDLOG_ERROR( - "Invalid combination of arguments; --file-type {} and " - "--structurize-arrays can't be used together", - cKeyValueIrFileType - ); - return ParsingResult::Failure; - } - } else { - throw std::invalid_argument("Unknown FILE_TYPE: " + file_type); + if (cJsonFileType == file_type) { + m_file_type = FileType::Json; + } else if (cKeyValueIrFileType == file_type) { + m_file_type = FileType::KeyValueIr; + if (m_structurize_arrays) { + SPDLOG_ERROR( + "Invalid combination of arguments; --file-type {} and " + "--structurize-arrays can't be used together", + cKeyValueIrFileType + ); + return ParsingResult::Failure; } + } else { + throw std::invalid_argument("Unknown FILE_TYPE: " + file_type); } // Parse and validate global metadata DB config diff --git a/components/core/src/clp_s/clp-s.cpp b/components/core/src/clp_s/clp-s.cpp index 41acdc6ad..2c6639290 100644 --- a/components/core/src/clp_s/clp-s.cpp +++ b/components/core/src/clp_s/clp-s.cpp @@ -118,7 +118,7 @@ bool compress(CommandLineArguments const& command_line_arguments) { // Functionality Coming in later PR // -->Call new parsing function in Json Parser to parse IRv2 to archive // -->Check for error from parsing function - SPDLOG_ERROR("Compressing Key Valur IR Files is not yet supported"); + SPDLOG_ERROR("Compressing Key Value IR Files is not yet supported"); return false; } else { if (false == parser.parse()) {