Skip to content

Commit

Permalink
feat(clp-s): Add command line options for stubbed out kv-pair-IR inge…
Browse files Browse the repository at this point in the history
…stion. (y-scope#618)
  • Loading branch information
AVMatthews authored and davidlion committed Dec 19, 2024
1 parent 36892c1 commit 604bd75
Show file tree
Hide file tree
Showing 4 changed files with 44 additions and 2 deletions.
23 changes: 23 additions & 0 deletions components/core/src/clp_s/CommandLineArguments.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -148,6 +148,9 @@ CommandLineArguments::parse_arguments(int argc, char const** argv) {
po::options_description compression_options("Compression options");
std::string metadata_db_config_file_path;
std::string input_path_list_file_path;
constexpr std::string_view cJsonFileType{"json"};
constexpr std::string_view cKeyValueIrFileType{"kv-ir"};
std::string file_type{cJsonFileType};
// clang-format off
compression_options.add_options()(
"compression-level",
Expand Down Expand Up @@ -202,6 +205,10 @@ CommandLineArguments::parse_arguments(int argc, char const** argv) {
"disable-log-order",
po::bool_switch(&m_disable_log_order),
"Do not record log order at ingestion time."
)(
"file-type",
po::value<std::string>(&file_type)->value_name("FILE_TYPE")->default_value(file_type),
"The type of file being compressed (json or kv-ir)"
);
// clang-format on

Expand Down Expand Up @@ -255,6 +262,22 @@ CommandLineArguments::parse_arguments(int argc, char const** argv) {
throw std::invalid_argument("No input paths specified.");
}

if (cJsonFileType == file_type) {
m_file_type = FileType::Json;
} else if (cKeyValueIrFileType == file_type) {
m_file_type = FileType::KeyValueIr;
if (m_structurize_arrays) {
SPDLOG_ERROR(
"Invalid combination of arguments; --file-type {} and "
"--structurize-arrays can't be used together",
cKeyValueIrFileType
);
return ParsingResult::Failure;
}
} else {
throw std::invalid_argument("Unknown FILE_TYPE: " + file_type);
}

// Parse and validate global metadata DB config
if (false == metadata_db_config_file_path.empty()) {
clp::GlobalMetadataDBConfig metadata_db_config;
Expand Down
8 changes: 8 additions & 0 deletions components/core/src/clp_s/CommandLineArguments.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,11 @@ class CommandLineArguments {
Stdout,
};

enum class FileType : uint8_t {
Json = 0,
KeyValueIr
};

// Constructors
explicit CommandLineArguments(std::string const& program_name) : m_program_name(program_name) {}

Expand Down Expand Up @@ -116,6 +121,8 @@ class CommandLineArguments {

bool get_record_log_order() const { return false == m_disable_log_order; }

[[nodiscard]] auto get_file_type() const -> FileType { return m_file_type; }

private:
// Methods
/**
Expand Down Expand Up @@ -184,6 +191,7 @@ class CommandLineArguments {
size_t m_target_ordered_chunk_size{};
size_t m_minimum_table_size{1ULL * 1024 * 1024}; // 1 MB
bool m_disable_log_order{false};
FileType m_file_type{FileType::Json};

// Metadata db variables
std::optional<clp::GlobalMetadataDBConfig> m_metadata_db_config;
Expand Down
2 changes: 2 additions & 0 deletions components/core/src/clp_s/JsonParser.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@

#include "../clp/GlobalMySQLMetadataDB.hpp"
#include "ArchiveWriter.hpp"
#include "CommandLineArguments.hpp"
#include "DictionaryWriter.hpp"
#include "FileReader.hpp"
#include "FileWriter.hpp"
Expand All @@ -29,6 +30,7 @@ using namespace simdjson;
namespace clp_s {
struct JsonParserOption {
std::vector<std::string> file_paths;
CommandLineArguments::FileType input_file_type{CommandLineArguments::FileType::Json};
std::string timestamp_key;
std::string archives_dir;
size_t target_encoded_size{};
Expand Down
13 changes: 11 additions & 2 deletions components/core/src/clp_s/clp-s.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,7 @@ bool compress(CommandLineArguments const& command_line_arguments) {

clp_s::JsonParserOption option{};
option.file_paths = command_line_arguments.get_file_paths();
option.input_file_type = command_line_arguments.get_file_type();
option.archives_dir = archives_dir.string();
option.target_encoded_size = command_line_arguments.get_target_encoded_size();
option.max_document_size = command_line_arguments.get_max_document_size();
Expand All @@ -113,9 +114,17 @@ bool compress(CommandLineArguments const& command_line_arguments) {
}

clp_s::JsonParser parser(option);
if (false == parser.parse()) {
SPDLOG_ERROR("Encountered error while parsing input");
if (CommandLineArguments::FileType::KeyValueIr == option.input_file_type) {
// Functionality Coming in later PR
// -->Call new parsing function in Json Parser to parse IRv2 to archive
// -->Check for error from parsing function
SPDLOG_ERROR("Compressing Key Value IR Files is not yet supported");
return false;
} else {
if (false == parser.parse()) {
SPDLOG_ERROR("Encountered error while parsing input");
return false;
}
}
parser.store();
return true;
Expand Down

0 comments on commit 604bd75

Please sign in to comment.