From c91f625fbb65a91de7caa8422477255a6547dd97 Mon Sep 17 00:00:00 2001 From: Kirk Rodrigues <2454684+kirkrodrigues@users.noreply.github.com> Date: Sat, 16 Sep 2023 14:53:33 -0400 Subject: [PATCH 01/30] Move parsing methods from ffi to ir namespace. --- components/core/CMakeLists.txt | 5 + components/core/src/ffi/encoding_methods.cpp | 107 ---------------- components/core/src/ffi/encoding_methods.hpp | 63 +-------- components/core/src/ffi/encoding_methods.inc | 12 +- .../src/ffi/ir_stream/decoding_methods.inc | 3 +- .../src/ffi/ir_stream/encoding_methods.cpp | 5 +- .../src/ffi/search/CompositeWildcardToken.cpp | 4 +- .../core/src/ffi/search/query_methods.cpp | 5 +- components/core/src/ir/parsing.cpp | 117 +++++++++++++++++ components/core/src/ir/parsing.hpp | 77 +++++++++++ .../core/tests/test-encoding_methods.cpp | 121 +----------------- .../core/tests/test-ir_encoding_methods.cpp | 6 +- components/core/tests/test-ir_parsing.cpp | 120 +++++++++++++++++ components/core/tests/test-query_methods.cpp | 3 +- 14 files changed, 348 insertions(+), 300 deletions(-) create mode 100644 components/core/src/ir/parsing.cpp create mode 100644 components/core/src/ir/parsing.hpp create mode 100644 components/core/tests/test-ir_parsing.cpp diff --git a/components/core/CMakeLists.txt b/components/core/CMakeLists.txt index 1fe764b0c..26e7bbb79 100644 --- a/components/core/CMakeLists.txt +++ b/components/core/CMakeLists.txt @@ -251,6 +251,8 @@ set(SOURCE_FILES_clp src/ir/LogEvent.hpp src/ir/LogEventDeserializer.cpp src/ir/LogEventDeserializer.hpp + src/ir/parsing.cpp + src/ir/parsing.hpp src/ir/utils.cpp src/ir/utils.hpp src/LibarchiveFileReader.cpp @@ -789,6 +791,8 @@ set(SOURCE_FILES_unitTest src/ir/LogEvent.hpp src/ir/LogEventDeserializer.cpp src/ir/LogEventDeserializer.hpp + src/ir/parsing.cpp + src/ir/parsing.hpp src/ir/utils.cpp src/ir/utils.hpp src/LibarchiveFileReader.cpp @@ -890,6 +894,7 @@ set(SOURCE_FILES_unitTest tests/test-encoding_methods.cpp tests/test-Grep.cpp tests/test-ir_encoding_methods.cpp + tests/test-ir_parsing.cpp tests/test-main.cpp tests/test-math_utils.cpp tests/test-ParserWithUserSchema.cpp diff --git a/components/core/src/ffi/encoding_methods.cpp b/components/core/src/ffi/encoding_methods.cpp index 9a7b17d36..43eefb3e6 100644 --- a/components/core/src/ffi/encoding_methods.cpp +++ b/components/core/src/ffi/encoding_methods.cpp @@ -6,113 +6,6 @@ using std::string_view; namespace ffi { -/* - * For performance, we rely on the ASCII ordering of characters to compare - * ranges of characters at a time instead of comparing individual characters - */ -bool is_delim(signed char c) { - return !( - '+' == c || ('-' <= c && c <= '.') || ('0' <= c && c <= '9') || ('A' <= c && c <= 'Z') - || '\\' == c || '_' == c || ('a' <= c && c <= 'z') - ); -} - -bool is_variable_placeholder(char c) { - return (enum_to_underlying_type(VariablePlaceholder::Integer) == c) - || (enum_to_underlying_type(VariablePlaceholder::Dictionary) == c) - || (enum_to_underlying_type(VariablePlaceholder::Float) == c); -} - -bool could_be_multi_digit_hex_value(const string_view str) { - if (str.length() < 2) { - return false; - } - - return std::all_of(str.cbegin(), str.cend(), [](char c) { - return ('a' <= c && c <= 'f') || ('A' <= c && c <= 'F') || ('0' <= c && c <= '9'); - }); -} - -bool is_var(std::string_view value) { - size_t begin_pos = 0; - size_t end_pos = 0; - bool contains_var_placeholder; - if (get_bounds_of_next_var(value, begin_pos, end_pos, contains_var_placeholder)) { - // Ensure the entire value is a variable - return (0 == begin_pos && value.length() == end_pos); - } else { - return false; - } -} - -bool get_bounds_of_next_var( - const string_view str, - size_t& begin_pos, - size_t& end_pos, - bool& contains_var_placeholder -) { - auto const msg_length = str.length(); - if (end_pos >= msg_length) { - return false; - } - - while (true) { - begin_pos = end_pos; - - // Find next non-delimiter - bool char_before_var_was_equals_sign = false; - char previous_char = 0; - for (; begin_pos < msg_length; ++begin_pos) { - auto c = str[begin_pos]; - if (false == is_delim(c)) { - char_before_var_was_equals_sign = ('=' == previous_char); - break; - } - previous_char = c; - - if (enum_to_underlying_type(VariablePlaceholder::Integer) == c - || enum_to_underlying_type(VariablePlaceholder::Dictionary) == c - || enum_to_underlying_type(VariablePlaceholder::Float) == c) - { - contains_var_placeholder = true; - } - } - if (msg_length == begin_pos) { - // Early exit for performance - return false; - } - - bool contains_decimal_digit = false; - bool contains_alphabet = false; - - // Find next delimiter - end_pos = begin_pos; - for (; end_pos < msg_length; ++end_pos) { - auto c = str[end_pos]; - if (is_decimal_digit(c)) { - contains_decimal_digit = true; - } else if (is_alphabet(c)) { - contains_alphabet = true; - } else if (is_delim(c)) { - break; - } - } - - string_view variable(str.cbegin() + begin_pos, end_pos - begin_pos); - // Treat token as variable if: - // - it contains a decimal digit, or - // - it's directly preceded by '=' and contains an alphabet char, or - // - it could be a multi-digit hex value - if (contains_decimal_digit || (char_before_var_was_equals_sign && contains_alphabet) - || could_be_multi_digit_hex_value(variable)) - { - break; - } - } - - return (msg_length != begin_pos); -} - eight_byte_encoded_variable_t encode_four_byte_float_as_eight_byte( four_byte_encoded_variable_t four_byte_encoded_var ) { diff --git a/components/core/src/ffi/encoding_methods.hpp b/components/core/src/ffi/encoding_methods.hpp index 8463368b0..fa26ef675 100644 --- a/components/core/src/ffi/encoding_methods.hpp +++ b/components/core/src/ffi/encoding_methods.hpp @@ -4,6 +4,7 @@ #include #include +#include "../ir/parsing.hpp" #include "../TraceableException.hpp" // TODO Some of the methods in this file are mostly duplicated from code that @@ -15,12 +16,6 @@ using epoch_time_ms_t = int64_t; using eight_byte_encoded_variable_t = int64_t; using four_byte_encoded_variable_t = int32_t; -enum class VariablePlaceholder : char { - Integer = 0x11, - Dictionary = 0x12, - Float = 0x13, -}; - class EncodingException : public TraceableException { public: // Constructors @@ -55,8 +50,6 @@ static constexpr char cVariableEncodingMethodsVersion[] = "com.yscope.clp.VariableEncodingMethodsV1"; static constexpr char cVariablesSchemaVersion[] = "com.yscope.clp.VariablesSchemaV2"; -constexpr char cVariablePlaceholderEscapeCharacter = '\\'; - static constexpr char cTooFewDictionaryVarsErrorMessage[] = "There are fewer dictionary variables than dictionary variable delimiters in the " "logtype."; @@ -70,58 +63,6 @@ constexpr size_t cMaxDigitsInRepresentableFourByteFloatVar = 8; constexpr uint64_t cEightByteEncodedFloatDigitsBitMask = (1ULL << 54) - 1; constexpr uint32_t cFourByteEncodedFloatDigitsBitMask = (1UL << 25) - 1; -/** - * Checks if the given character is a delimiter - * We treat everything *except* the following quoted characters as a - * delimiter: "+-.0-9A-Z\_a-z" - * @param c - * @return Whether c is a delimiter - */ -bool is_delim(signed char c); - -/** - * @param c - * @return Whether the character is a variable placeholder - */ -bool is_variable_placeholder(char c); - -/** - * @param str - * @return Whether the given string could be a multi-digit hex value - */ -bool could_be_multi_digit_hex_value(std::string_view str); - -/** - * @param value - * @return Whether the given value is a variable according to the schemas - * specified in ffi::get_bounds_of_next_var - */ -bool is_var(std::string_view value); - -/** - * Gets the bounds of the next variable in the given string - * A variable is a token (word between two delimiters) that matches one of - * these schemas: - * - ".*[0-9].*" - * - "=(.*[a-zA-Z].*)" (the variable is within the capturing group) - * - "[a-fA-F0-9]{2,}" - * @param str String to search within - * @param begin_pos Begin position of last variable, changes to begin - * position of next variable - * @param end_pos End position of last variable, changes to end position of - * next variable - * @param contains_var_placeholder Whether the string already contains a - * variable placeholder (for efficiency, this is only set to true, so the - * caller must reset it to false if necessary) - * @return true if a variable was found, false otherwise - */ -bool get_bounds_of_next_var( - std::string_view str, - size_t& begin_pos, - size_t& end_pos, - bool& contains_var_placeholder -); - /** * Encodes the given string into a representable float variable if possible * @tparam encoded_variable_t Type of the encoded variable @@ -321,7 +262,7 @@ std::string decode_message( * @param encoded_vars_length * @return true if a match was found, false otherwise */ -template +template bool wildcard_query_matches_any_encoded_var( std::string_view wildcard_query, std::string_view logtype, diff --git a/components/core/src/ffi/encoding_methods.inc b/components/core/src/ffi/encoding_methods.inc index 13868585b..38c5a00ef 100644 --- a/components/core/src/ffi/encoding_methods.inc +++ b/components/core/src/ffi/encoding_methods.inc @@ -3,9 +3,12 @@ #include +#include "../ir/parsing.hpp" #include "../string_utils.hpp" #include "../type_utils.hpp" +using ir::VariablePlaceholder; + namespace ffi { template bool encode_float_string(std::string_view str, encoded_variable_t& encoded_var) { @@ -364,7 +367,7 @@ bool encode_message_generically( size_t constant_begin_pos = 0; logtype.clear(); logtype.reserve(message.length()); - while (get_bounds_of_next_var( + while (ir::get_bounds_of_next_var( message, var_begin_pos, var_end_pos, @@ -428,8 +431,11 @@ bool encode_message( auto final_constant_handler = [&constant_handler](std::string_view constant, std::string& logtype) { // Ensure final constant doesn't contain a variable placeholder - bool contains_variable_placeholder - = std::any_of(constant.cbegin(), constant.cend(), is_variable_placeholder); + bool contains_variable_placeholder = std::any_of( + constant.cbegin(), + constant.cend(), + ir::is_variable_placeholder + ); return constant_handler(constant, contains_variable_placeholder, logtype); }; auto encoded_variable_handler = [&encoded_vars](encoded_variable_t encoded_variable) { diff --git a/components/core/src/ffi/ir_stream/decoding_methods.inc b/components/core/src/ffi/ir_stream/decoding_methods.inc index c02a933dc..139688388 100644 --- a/components/core/src/ffi/ir_stream/decoding_methods.inc +++ b/components/core/src/ffi/ir_stream/decoding_methods.inc @@ -4,6 +4,7 @@ #include #include +#include "../../ir/parsing.hpp" #include "../encoding_methods.hpp" #include "decoding_methods.hpp" #include "protocol_constants.hpp" @@ -97,7 +98,7 @@ void generic_decode_message( break; } - case cVariablePlaceholderEscapeCharacter: { + case ir::cVariablePlaceholderEscapeCharacter: { // Ensure the escape character is followed by a // character that's being escaped if (cur_pos == logtype_length - 1) { diff --git a/components/core/src/ffi/ir_stream/encoding_methods.cpp b/components/core/src/ffi/ir_stream/encoding_methods.cpp index 10e9210e9..4e3ea2b2c 100644 --- a/components/core/src/ffi/ir_stream/encoding_methods.cpp +++ b/components/core/src/ffi/ir_stream/encoding_methods.cpp @@ -2,6 +2,7 @@ #include +#include "../../ir/parsing.hpp" #include "byteswap.hpp" #include "protocol_constants.hpp" @@ -182,9 +183,9 @@ static bool append_constant_to_logtype(string_view constant, bool, string& logty auto constant_len = constant.length(); for (size_t i = 0; i < constant_len; ++i) { auto c = constant[i]; - if (cVariablePlaceholderEscapeCharacter == c || is_variable_placeholder(c)) { + if (ir::cVariablePlaceholderEscapeCharacter == c || ir::is_variable_placeholder(c)) { logtype.append(constant, begin_pos, i - begin_pos); - logtype += cVariablePlaceholderEscapeCharacter; + logtype += ir::cVariablePlaceholderEscapeCharacter; // NOTE: We don't need to append the character of interest // immediately since the next constant copy operation will get it begin_pos = i; diff --git a/components/core/src/ffi/search/CompositeWildcardToken.cpp b/components/core/src/ffi/search/CompositeWildcardToken.cpp index b57c37da5..199acc8dd 100644 --- a/components/core/src/ffi/search/CompositeWildcardToken.cpp +++ b/components/core/src/ffi/search/CompositeWildcardToken.cpp @@ -1,5 +1,7 @@ #include "CompositeWildcardToken.hpp" +#include "../../ir/parsing.hpp" + using std::string; using std::string_view; using std::variant; @@ -256,7 +258,7 @@ void CompositeWildcardToken::try_add_wildcard_variable( ); } else { string_view var(m_query.cbegin() + begin_pos, end_pos - begin_pos); - if (is_var(var)) { + if (ir::is_var(var)) { m_variables.emplace_back( std::in_place_type>, m_query, diff --git a/components/core/src/ffi/search/query_methods.cpp b/components/core/src/ffi/search/query_methods.cpp index cacd27ab7..e343e78fc 100644 --- a/components/core/src/ffi/search/query_methods.cpp +++ b/components/core/src/ffi/search/query_methods.cpp @@ -1,8 +1,10 @@ #include "query_methods.hpp" +#include "../../ir/parsing.hpp" #include "CompositeWildcardToken.hpp" #include "QueryMethodFailed.hpp" +using ir::is_delim; using std::pair; using std::string; using std::string_view; @@ -188,7 +190,7 @@ void tokenize_query( // - it could be a multi-digit hex value if (contains_decimal_digit || (begin_pos > 0 && '=' == wildcard_query[begin_pos - 1] && contains_alphabet) - || ffi::could_be_multi_digit_hex_value(variable)) + || ir::could_be_multi_digit_hex_value(variable)) { tokens.emplace_back( std::in_place_type>, @@ -295,5 +297,4 @@ template void tokenize_query( CompositeWildcardToken>>& tokens, vector& composite_wildcard_token_indexes ); - } // namespace ffi::search diff --git a/components/core/src/ir/parsing.cpp b/components/core/src/ir/parsing.cpp new file mode 100644 index 000000000..507e89a1b --- /dev/null +++ b/components/core/src/ir/parsing.cpp @@ -0,0 +1,117 @@ +#include "parsing.hpp" + +#include + +#include "../string_utils.hpp" +#include "../type_utils.hpp" + +using std::string_view; + +namespace ir { +/* + * For performance, we rely on the ASCII ordering of characters to compare + * ranges of characters at a time instead of comparing individual characters + */ +bool is_delim(signed char c) { + return !( + '+' == c || ('-' <= c && c <= '.') || ('0' <= c && c <= '9') || ('A' <= c && c <= 'Z') + || '\\' == c || '_' == c || ('a' <= c && c <= 'z') + ); +} + +bool is_variable_placeholder(char c) { + return (enum_to_underlying_type(VariablePlaceholder::Integer) == c) + || (enum_to_underlying_type(VariablePlaceholder::Dictionary) == c) + || (enum_to_underlying_type(VariablePlaceholder::Float) == c); +} + +bool could_be_multi_digit_hex_value(string_view const str) { + if (str.length() < 2) { + return false; + } + + return std::all_of(str.cbegin(), str.cend(), [](char c) { + return ('a' <= c && c <= 'f') || ('A' <= c && c <= 'F') || ('0' <= c && c <= '9'); + }); +} + +bool is_var(std::string_view value) { + size_t begin_pos = 0; + size_t end_pos = 0; + bool contains_var_placeholder; + if (get_bounds_of_next_var(value, begin_pos, end_pos, contains_var_placeholder)) { + // Ensure the entire value is a variable + return (0 == begin_pos && value.length() == end_pos); + } else { + return false; + } +} + +bool get_bounds_of_next_var( + string_view const str, + size_t& begin_pos, + size_t& end_pos, + bool& contains_var_placeholder +) { + auto const msg_length = str.length(); + if (end_pos >= msg_length) { + return false; + } + + while (true) { + begin_pos = end_pos; + + // Find next non-delimiter + bool char_before_var_was_equals_sign = false; + char previous_char = 0; + for (; begin_pos < msg_length; ++begin_pos) { + auto c = str[begin_pos]; + if (false == is_delim(c)) { + char_before_var_was_equals_sign = ('=' == previous_char); + break; + } + previous_char = c; + + if (enum_to_underlying_type(VariablePlaceholder::Integer) == c + || enum_to_underlying_type(VariablePlaceholder::Dictionary) == c + || enum_to_underlying_type(VariablePlaceholder::Float) == c) + { + contains_var_placeholder = true; + } + } + if (msg_length == begin_pos) { + // Early exit for performance + return false; + } + + bool contains_decimal_digit = false; + bool contains_alphabet = false; + + // Find next delimiter + end_pos = begin_pos; + for (; end_pos < msg_length; ++end_pos) { + auto c = str[end_pos]; + if (is_decimal_digit(c)) { + contains_decimal_digit = true; + } else if (is_alphabet(c)) { + contains_alphabet = true; + } else if (is_delim(c)) { + break; + } + } + + string_view variable(str.cbegin() + begin_pos, end_pos - begin_pos); + // Treat token as variable if: + // - it contains a decimal digit, or + // - it's directly preceded by '=' and contains an alphabet char, or + // - it could be a multi-digit hex value + if (contains_decimal_digit || (char_before_var_was_equals_sign && contains_alphabet) + || could_be_multi_digit_hex_value(variable)) + { + break; + } + } + + return (msg_length != begin_pos); +} +} // namespace ir diff --git a/components/core/src/ir/parsing.hpp b/components/core/src/ir/parsing.hpp new file mode 100644 index 000000000..fa9bdeca0 --- /dev/null +++ b/components/core/src/ir/parsing.hpp @@ -0,0 +1,77 @@ +#ifndef IR_PARSING_HPP +#define IR_PARSING_HPP + +/** + * TODO Technically, the methods in this file are more general than for their + * use in generating CLP's IR. However, introducing a parsing namespace in the + * root source directory would be confusing since we also have the + * compressor_frontend namespace. Once most of compressor_frontend is moved into + * https://github.com/y-scope/log-surgeon, we should reconsider the placement of + * the methods in this file. + */ + +#include + +namespace ir { +enum class VariablePlaceholder : char { + Integer = 0x11, + Dictionary = 0x12, + Float = 0x13, +}; + +constexpr char cVariablePlaceholderEscapeCharacter = '\\'; + +/** + * Checks if the given character is a delimiter + * We treat everything *except* the following quoted characters as a + * delimiter: "+-.0-9A-Z\_a-z" + * @param c + * @return Whether c is a delimiter + */ +bool is_delim(signed char c); + +/** + * @param c + * @return Whether the character is a variable placeholder + */ +bool is_variable_placeholder(char c); + +/** + * @param str + * @return Whether the given string could be a multi-digit hex value + */ +bool could_be_multi_digit_hex_value(std::string_view str); + +/** + * @param value + * @return Whether the given value is a variable according to the schemas + * specified in ffi::get_bounds_of_next_var + */ +bool is_var(std::string_view value); + +/** + * Gets the bounds of the next variable in the given string + * A variable is a token (word between two delimiters) that matches one of + * these schemas: + * - ".*[0-9].*" + * - "=(.*[a-zA-Z].*)" (the variable is within the capturing group) + * - "[a-fA-F0-9]{2,}" + * @param str String to search within + * @param begin_pos Begin position of last variable, changes to begin + * position of next variable + * @param end_pos End position of last variable, changes to end position of + * next variable + * @param contains_var_placeholder Whether the string already contains a + * variable placeholder (for efficiency, this is only set to true, so the + * caller must reset it to false if necessary) + * @return true if a variable was found, false otherwise + */ +bool get_bounds_of_next_var( + std::string_view str, + size_t& begin_pos, + size_t& end_pos, + bool& contains_var_placeholder +); +} // namespace ir + +#endif // IR_PARSING_HPP diff --git a/components/core/tests/test-encoding_methods.cpp b/components/core/tests/test-encoding_methods.cpp index 33b42f29d..815c5890f 100644 --- a/components/core/tests/test-encoding_methods.cpp +++ b/components/core/tests/test-encoding_methods.cpp @@ -12,10 +12,10 @@ using ffi::four_byte_encoded_variable_t; using ffi::encode_float_string; using ffi::encode_integer_string; using ffi::encode_message; -using ffi::get_bounds_of_next_var; -using ffi::VariablePlaceholder; using ffi::wildcard_match_encoded_vars; using ffi::wildcard_query_matches_any_encoded_var; +using ir::get_bounds_of_next_var; +using ir::VariablePlaceholder; using std::string; using std::string_view; using std::vector; @@ -29,123 +29,6 @@ using std::vector; static void string_views_from_strings (const vector& strings, vector& string_views); -TEST_CASE("ffi::get_bounds_of_next_var", "[ffi][get_bounds_of_next_var]") { - string str; - size_t begin_pos; - size_t end_pos; - bool contains_var_placeholder = false; - - // Since the outputs we use to validate depend on the schema/encoding - // methods, we validate the versions - REQUIRE(strcmp("com.yscope.clp.VariableEncodingMethodsV1", - ffi::cVariableEncodingMethodsVersion) == 0); - REQUIRE(strcmp("com.yscope.clp.VariablesSchemaV2", ffi::cVariablesSchemaVersion) == 0); - - // Corner cases - // Empty string - str = ""; - begin_pos = 0; - end_pos = 0; - REQUIRE(get_bounds_of_next_var(str, begin_pos, end_pos, contains_var_placeholder) == false); - REQUIRE(false == contains_var_placeholder); - - // end_pos past the end of the string - str = "abc"; - begin_pos = 0; - end_pos = string::npos; - REQUIRE(get_bounds_of_next_var(str, begin_pos, end_pos, contains_var_placeholder) == false); - REQUIRE(false == contains_var_placeholder); - - // Non-variables - str = "/"; - begin_pos = 0; - end_pos = 0; - REQUIRE(get_bounds_of_next_var(str, begin_pos, end_pos, contains_var_placeholder) == false); - REQUIRE(str.length() == begin_pos); - REQUIRE(false == contains_var_placeholder); - - str = "xyz"; - begin_pos = 0; - end_pos = 0; - REQUIRE(get_bounds_of_next_var(str, begin_pos, end_pos, contains_var_placeholder) == false); - REQUIRE(str.length() == begin_pos); - REQUIRE(false == contains_var_placeholder); - - str = "="; - begin_pos = 0; - end_pos = 0; - REQUIRE(get_bounds_of_next_var(str, begin_pos, end_pos, contains_var_placeholder) == false); - REQUIRE(str.length() == begin_pos); - REQUIRE(false == contains_var_placeholder); - - // Variables - str = "~=x!abc123;1.2%x:+394/-"; - begin_pos = 0; - end_pos = 0; - - REQUIRE(get_bounds_of_next_var(str, begin_pos, end_pos, contains_var_placeholder) == true); - REQUIRE("x" == str.substr(begin_pos, end_pos - begin_pos)); - REQUIRE(false == contains_var_placeholder); - - REQUIRE(get_bounds_of_next_var(str, begin_pos, end_pos, contains_var_placeholder) == true); - REQUIRE("abc123" == str.substr(begin_pos, end_pos - begin_pos)); - REQUIRE(false == contains_var_placeholder); - - REQUIRE(get_bounds_of_next_var(str, begin_pos, end_pos, contains_var_placeholder) == true); - REQUIRE("1.2" == str.substr(begin_pos, end_pos - begin_pos)); - REQUIRE(false == contains_var_placeholder); - - REQUIRE(get_bounds_of_next_var(str, begin_pos, end_pos, contains_var_placeholder) == true); - REQUIRE("+394" == str.substr(begin_pos, end_pos - begin_pos)); - REQUIRE(false == contains_var_placeholder); - - REQUIRE(get_bounds_of_next_var(str, begin_pos, end_pos, contains_var_placeholder) == false); - REQUIRE(false == contains_var_placeholder); - - str = " ad ff 95 24 0d ff "; - begin_pos = 0; - end_pos = 0; - - REQUIRE(get_bounds_of_next_var(str, begin_pos, end_pos, contains_var_placeholder) == true); - REQUIRE("ad" == str.substr(begin_pos, end_pos - begin_pos)); - REQUIRE(false == contains_var_placeholder); - - REQUIRE(get_bounds_of_next_var(str, begin_pos, end_pos, contains_var_placeholder) == true); - REQUIRE("ff" == str.substr(begin_pos, end_pos - begin_pos)); - REQUIRE(false == contains_var_placeholder); - - REQUIRE(get_bounds_of_next_var(str, begin_pos, end_pos, contains_var_placeholder) == true); - REQUIRE("95" == str.substr(begin_pos, end_pos - begin_pos)); - REQUIRE(false == contains_var_placeholder); - - REQUIRE(get_bounds_of_next_var(str, begin_pos, end_pos, contains_var_placeholder) == true); - REQUIRE("24" == str.substr(begin_pos, end_pos - begin_pos)); - REQUIRE(false == contains_var_placeholder); - - REQUIRE(get_bounds_of_next_var(str, begin_pos, end_pos, contains_var_placeholder) == true); - REQUIRE("0d" == str.substr(begin_pos, end_pos - begin_pos)); - REQUIRE(false == contains_var_placeholder); - - REQUIRE(get_bounds_of_next_var(str, begin_pos, end_pos, contains_var_placeholder) == true); - REQUIRE("ff" == str.substr(begin_pos, end_pos - begin_pos)); - REQUIRE(false == contains_var_placeholder); - - REQUIRE(get_bounds_of_next_var(str, begin_pos, end_pos, contains_var_placeholder) == false); - REQUIRE(str.length() == begin_pos); - REQUIRE(false == contains_var_placeholder); - - // String containing variable placeholder - str = " text "; - str += enum_to_underlying_type(VariablePlaceholder::Integer); - str += " var123 "; - begin_pos = 0; - end_pos = 0; - - REQUIRE(get_bounds_of_next_var(str, begin_pos, end_pos, contains_var_placeholder) == true); - REQUIRE(contains_var_placeholder); - REQUIRE("var123" == str.substr(begin_pos, end_pos - begin_pos)); -} - TEMPLATE_TEST_CASE("Encoding integers", "[ffi][encode-integer]", eight_byte_encoded_variable_t, four_byte_encoded_variable_t) { diff --git a/components/core/tests/test-ir_encoding_methods.cpp b/components/core/tests/test-ir_encoding_methods.cpp index 3c896e908..c4d586d3e 100644 --- a/components/core/tests/test-ir_encoding_methods.cpp +++ b/components/core/tests/test-ir_encoding_methods.cpp @@ -10,6 +10,7 @@ #include "../src/ffi/ir_stream/decoding_methods.hpp" #include "../src/ffi/ir_stream/encoding_methods.hpp" #include "../src/ffi/ir_stream/protocol_constants.hpp" +#include "../src/ir/parsing.hpp" using ffi::decode_float_var; using ffi::decode_integer_var; @@ -20,7 +21,6 @@ using ffi::encode_integer_string; using ffi::encode_message; using ffi::epoch_time_ms_t; using ffi::four_byte_encoded_variable_t; -using ffi::get_bounds_of_next_var; using ffi::ir_stream::cProtocol::EightByteEncodingMagicNumber; using ffi::ir_stream::cProtocol::FourByteEncodingMagicNumber; using ffi::ir_stream::cProtocol::MagicNumberLength; @@ -28,8 +28,8 @@ using ffi::ir_stream::decode_preamble; using ffi::ir_stream::encoded_tag_t; using ffi::ir_stream::get_encoding_type; using ffi::ir_stream::IRErrorCode; -using ffi::VariablePlaceholder; using ffi::wildcard_query_matches_any_encoded_var; +using ir::VariablePlaceholder; using std::chrono::duration_cast; using std::chrono::milliseconds; using std::chrono::system_clock; @@ -443,7 +443,7 @@ TEST_CASE("message_decode_error", "[ffi][decode_next_message]") { // Test if a trailing escape triggers a decoder error auto ir_with_extra_escape{ir_buf}; - ir_with_extra_escape.at(logtype_end_pos - 1) = ffi::cVariablePlaceholderEscapeCharacter; + ir_with_extra_escape.at(logtype_end_pos - 1) = ir::cVariablePlaceholderEscapeCharacter; BufferReader ir_with_extra_escape_buffer{ size_checked_pointer_cast(ir_with_extra_escape.data()), ir_with_extra_escape.size()}; diff --git a/components/core/tests/test-ir_parsing.cpp b/components/core/tests/test-ir_parsing.cpp new file mode 100644 index 000000000..10f348c82 --- /dev/null +++ b/components/core/tests/test-ir_parsing.cpp @@ -0,0 +1,120 @@ +#include + +#include "../src/ir/parsing.hpp" +#include "../src/type_utils.hpp" + +using ir::get_bounds_of_next_var; +using std::string; +using std::string_view; +using std::vector; + +TEST_CASE("ir::get_bounds_of_next_var", "[ir][get_bounds_of_next_var]") { + string str; + size_t begin_pos; + size_t end_pos; + bool contains_var_placeholder = false; + + // Corner cases + // Empty string + str = ""; + begin_pos = 0; + end_pos = 0; + REQUIRE(get_bounds_of_next_var(str, begin_pos, end_pos, contains_var_placeholder) == false); + REQUIRE(false == contains_var_placeholder); + + // end_pos past the end of the string + str = "abc"; + begin_pos = 0; + end_pos = string::npos; + REQUIRE(get_bounds_of_next_var(str, begin_pos, end_pos, contains_var_placeholder) == false); + REQUIRE(false == contains_var_placeholder); + + // Non-variables + str = "/"; + begin_pos = 0; + end_pos = 0; + REQUIRE(get_bounds_of_next_var(str, begin_pos, end_pos, contains_var_placeholder) == false); + REQUIRE(str.length() == begin_pos); + REQUIRE(false == contains_var_placeholder); + + str = "xyz"; + begin_pos = 0; + end_pos = 0; + REQUIRE(get_bounds_of_next_var(str, begin_pos, end_pos, contains_var_placeholder) == false); + REQUIRE(str.length() == begin_pos); + REQUIRE(false == contains_var_placeholder); + + str = "="; + begin_pos = 0; + end_pos = 0; + REQUIRE(get_bounds_of_next_var(str, begin_pos, end_pos, contains_var_placeholder) == false); + REQUIRE(str.length() == begin_pos); + REQUIRE(false == contains_var_placeholder); + + // Variables + str = "~=x!abc123;1.2%x:+394/-"; + begin_pos = 0; + end_pos = 0; + + REQUIRE(get_bounds_of_next_var(str, begin_pos, end_pos, contains_var_placeholder) == true); + REQUIRE("x" == str.substr(begin_pos, end_pos - begin_pos)); + REQUIRE(false == contains_var_placeholder); + + REQUIRE(get_bounds_of_next_var(str, begin_pos, end_pos, contains_var_placeholder) == true); + REQUIRE("abc123" == str.substr(begin_pos, end_pos - begin_pos)); + REQUIRE(false == contains_var_placeholder); + + REQUIRE(get_bounds_of_next_var(str, begin_pos, end_pos, contains_var_placeholder) == true); + REQUIRE("1.2" == str.substr(begin_pos, end_pos - begin_pos)); + REQUIRE(false == contains_var_placeholder); + + REQUIRE(get_bounds_of_next_var(str, begin_pos, end_pos, contains_var_placeholder) == true); + REQUIRE("+394" == str.substr(begin_pos, end_pos - begin_pos)); + REQUIRE(false == contains_var_placeholder); + + REQUIRE(get_bounds_of_next_var(str, begin_pos, end_pos, contains_var_placeholder) == false); + REQUIRE(false == contains_var_placeholder); + + str = " ad ff 95 24 0d ff "; + begin_pos = 0; + end_pos = 0; + + REQUIRE(get_bounds_of_next_var(str, begin_pos, end_pos, contains_var_placeholder) == true); + REQUIRE("ad" == str.substr(begin_pos, end_pos - begin_pos)); + REQUIRE(false == contains_var_placeholder); + + REQUIRE(get_bounds_of_next_var(str, begin_pos, end_pos, contains_var_placeholder) == true); + REQUIRE("ff" == str.substr(begin_pos, end_pos - begin_pos)); + REQUIRE(false == contains_var_placeholder); + + REQUIRE(get_bounds_of_next_var(str, begin_pos, end_pos, contains_var_placeholder) == true); + REQUIRE("95" == str.substr(begin_pos, end_pos - begin_pos)); + REQUIRE(false == contains_var_placeholder); + + REQUIRE(get_bounds_of_next_var(str, begin_pos, end_pos, contains_var_placeholder) == true); + REQUIRE("24" == str.substr(begin_pos, end_pos - begin_pos)); + REQUIRE(false == contains_var_placeholder); + + REQUIRE(get_bounds_of_next_var(str, begin_pos, end_pos, contains_var_placeholder) == true); + REQUIRE("0d" == str.substr(begin_pos, end_pos - begin_pos)); + REQUIRE(false == contains_var_placeholder); + + REQUIRE(get_bounds_of_next_var(str, begin_pos, end_pos, contains_var_placeholder) == true); + REQUIRE("ff" == str.substr(begin_pos, end_pos - begin_pos)); + REQUIRE(false == contains_var_placeholder); + + REQUIRE(get_bounds_of_next_var(str, begin_pos, end_pos, contains_var_placeholder) == false); + REQUIRE(str.length() == begin_pos); + REQUIRE(false == contains_var_placeholder); + + // String containing variable placeholder + str = " text "; + str += enum_to_underlying_type(ir::VariablePlaceholder::Integer); + str += " var123 "; + begin_pos = 0; + end_pos = 0; + + REQUIRE(get_bounds_of_next_var(str, begin_pos, end_pos, contains_var_placeholder) == true); + REQUIRE(contains_var_placeholder); + REQUIRE("var123" == str.substr(begin_pos, end_pos - begin_pos)); +} diff --git a/components/core/tests/test-query_methods.cpp b/components/core/tests/test-query_methods.cpp index 0333e3150..ea172ef1c 100644 --- a/components/core/tests/test-query_methods.cpp +++ b/components/core/tests/test-query_methods.cpp @@ -10,6 +10,7 @@ #include "../src/ffi/search/query_methods.hpp" #include "../src/ffi/search/QueryMethodFailed.hpp" #include "../src/ffi/search/WildcardToken.hpp" +#include "../src/ir/parsing.hpp" using ffi::eight_byte_encoded_variable_t; using ffi::four_byte_encoded_variable_t; @@ -18,7 +19,7 @@ using ffi::search::generate_subqueries; using ffi::search::Subquery; using ffi::search::TokenType; using ffi::search::WildcardToken; -using ffi::VariablePlaceholder; +using ir::VariablePlaceholder; using std::string; using std::variant; using std::vector; From d0779ba318316e55c7ceabb34718a2909963154c Mon Sep 17 00:00:00 2001 From: Kirk Rodrigues <2454684+kirkrodrigues@users.noreply.github.com> Date: Sun, 17 Sep 2023 08:00:11 -0400 Subject: [PATCH 02/30] Replace existing parsing methods with those from ir namespace. --- components/core/CMakeLists.txt | 4 + components/core/cmake/utils.cmake | 2 + components/core/src/Grep.cpp | 4 +- .../core/src/LogTypeDictionaryEntry.cpp | 4 +- components/core/src/Utils.cpp | 49 ----------- components/core/src/Utils.hpp | 43 ---------- .../core/src/streaming_archive/Constants.hpp | 2 +- .../tests/test-EncodedVariableInterpreter.cpp | 2 +- components/core/tests/test-Utils.cpp | 82 ------------------- 9 files changed, 14 insertions(+), 178 deletions(-) diff --git a/components/core/CMakeLists.txt b/components/core/CMakeLists.txt index 26e7bbb79..1cc7a43c3 100644 --- a/components/core/CMakeLists.txt +++ b/components/core/CMakeLists.txt @@ -434,6 +434,8 @@ set(SOURCE_FILES_clg src/Grep.cpp src/Grep.hpp src/ir/LogEvent.hpp + src/ir/parsing.cpp + src/ir/parsing.hpp src/LogTypeDictionaryEntry.cpp src/LogTypeDictionaryEntry.hpp src/LogTypeDictionaryReader.cpp @@ -590,6 +592,8 @@ set(SOURCE_FILES_clo src/Grep.cpp src/Grep.hpp src/ir/LogEvent.hpp + src/ir/parsing.cpp + src/ir/parsing.hpp src/LogTypeDictionaryEntry.cpp src/LogTypeDictionaryEntry.hpp src/LogTypeDictionaryReader.cpp diff --git a/components/core/cmake/utils.cmake b/components/core/cmake/utils.cmake index 146029bbf..f2bb940ce 100644 --- a/components/core/cmake/utils.cmake +++ b/components/core/cmake/utils.cmake @@ -9,6 +9,8 @@ set(SOURCE_FILES_make-dictionaries-readable ${CMAKE_CURRENT_SOURCE_DIR}/src/FileReader.hpp ${CMAKE_CURRENT_SOURCE_DIR}/src/FileWriter.cpp ${CMAKE_CURRENT_SOURCE_DIR}/src/FileWriter.hpp + ${CMAKE_CURRENT_SOURCE_DIR}/src/ir/parsing.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/src/ir/parsing.hpp ${CMAKE_CURRENT_SOURCE_DIR}/src/LogTypeDictionaryEntry.cpp ${CMAKE_CURRENT_SOURCE_DIR}/src/LogTypeDictionaryEntry.hpp ${CMAKE_CURRENT_SOURCE_DIR}/src/LogTypeDictionaryReader.cpp diff --git a/components/core/src/Grep.cpp b/components/core/src/Grep.cpp index 9ad133e81..0fdc72061 100644 --- a/components/core/src/Grep.cpp +++ b/components/core/src/Grep.cpp @@ -9,6 +9,7 @@ #include "StringReader.hpp" #include "Utils.hpp" +using ir::is_delim; using std::string; using std::vector; using streaming_archive::reader::Archive; @@ -556,7 +557,8 @@ bool Grep::get_bounds_of_next_potential_var (const string& value, size_t& begin_ // - it contains a decimal digit, or // - it could be a multi-digit hex value, or // - it's directly preceded by an equals sign and contains an alphabet without a wildcard between the equals sign and the first alphabet of the token - if (contains_decimal_digit || could_be_multi_digit_hex_value(value, begin_pos, end_pos)) { + auto variable = static_cast(value).substr(begin_pos, end_pos - begin_pos); + if (contains_decimal_digit || ir::could_be_multi_digit_hex_value(variable)) { is_var = true; } else if (begin_pos > 0 && '=' == value[begin_pos - 1] && contains_alphabet) { // Find first alphabet or wildcard in token diff --git a/components/core/src/LogTypeDictionaryEntry.cpp b/components/core/src/LogTypeDictionaryEntry.cpp index f84f01e67..8d7557180 100644 --- a/components/core/src/LogTypeDictionaryEntry.cpp +++ b/components/core/src/LogTypeDictionaryEntry.cpp @@ -1,6 +1,7 @@ #include "LogTypeDictionaryEntry.hpp" // Project headers +#include "ir/parsing.hpp" #include "type_utils.hpp" #include "Utils.hpp" @@ -74,7 +75,8 @@ void LogTypeDictionaryEntry::add_float_var () { bool LogTypeDictionaryEntry::parse_next_var (const string& msg, size_t& var_begin_pos, size_t& var_end_pos, string& var) { auto last_var_end_pos = var_end_pos; - if (get_bounds_of_next_var(msg, var_begin_pos, var_end_pos)) { + bool contains_var_placeholder{}; + if (ir::get_bounds_of_next_var(msg, var_begin_pos, var_end_pos, contains_var_placeholder)) { // Append to log type: from end of last variable to start of current variable add_constant(msg, last_var_end_pos, var_begin_pos - last_var_end_pos); diff --git a/components/core/src/Utils.cpp b/components/core/src/Utils.cpp index a41ae8702..a0b226fee 100644 --- a/components/core/src/Utils.cpp +++ b/components/core/src/Utils.cpp @@ -85,55 +85,6 @@ ErrorCode create_directory_structure (const string& path, mode_t mode) { return ErrorCode_Success; } -bool get_bounds_of_next_var (const string& msg, size_t& begin_pos, size_t& end_pos) { - const auto msg_length = msg.length(); - if (end_pos >= msg_length) { - return false; - } - - while (true) { - begin_pos = end_pos; - // Find next non-delimiter - for (; begin_pos < msg_length; ++begin_pos) { - if (false == is_delim(msg[begin_pos])) { - break; - } - } - if (msg_length == begin_pos) { - // Early exit for performance - return false; - } - - bool contains_decimal_digit = false; - bool contains_alphabet = false; - - // Find next delimiter - end_pos = begin_pos; - for (; end_pos < msg_length; ++end_pos) { - char c = msg[end_pos]; - if (is_decimal_digit(c)) { - contains_decimal_digit = true; - } else if (is_alphabet(c)) { - contains_alphabet = true; - } else if (is_delim(c)) { - break; - } - } - - // Treat token as variable if: - // - it contains a decimal digit, or - // - it's directly preceded by an equals sign and contains an alphabet, or - // - it could be a multi-digit hex value - if (contains_decimal_digit || (begin_pos > 0 && '=' == msg[begin_pos - 1] && contains_alphabet) || - could_be_multi_digit_hex_value(msg, begin_pos, end_pos)) - { - break; - } - } - - return (msg_length != begin_pos); -} - string get_parent_directory_path (const string& path) { string dirname = get_unambiguous_path(path); diff --git a/components/core/src/Utils.hpp b/components/core/src/Utils.hpp index 6f8b843f3..e3fa814a0 100644 --- a/components/core/src/Utils.hpp +++ b/components/core/src/Utils.hpp @@ -14,39 +14,6 @@ #include "FileReader.hpp" #include "ParsedMessage.hpp" -/** - * Checks if the given segment of the stringcould be a multi-digit hex value - * @param str - * @param begin_pos - * @param end_pos - * @return true if yes, false otherwise - */ -inline bool could_be_multi_digit_hex_value (const std::string& str, size_t begin_pos, size_t end_pos) { - if (end_pos - begin_pos < 2) { - return false; - } - - for (size_t i = begin_pos; i < end_pos; ++i) { - auto c = str[i]; - if (!( ('a' <= c && c <= 'f') || ('A' <= c && c <= 'F') || ('0' <= c && c <= '9') )) { - return false; - } - } - - return true; -} - -/** - * Checks if character is a delimiter - * We treat everything except the following quoted characters as a delimiter: "+-./0-9A-Z\a-z" - * NOTE: For performance, we rely on the ASCII ordering of characters to compare ranges of characters at a time instead of comparing individual characters - * @param c - * @return true if c is a delimiter, false otherwise - */ -inline bool is_delim (char c) { - return !('+' == c || ('-' <= c && c <= '9') || ('A' <= c && c <= 'Z') || '\\' == c || '_' == c || ('a' <= c && c <= 'z')); -} - /** * Creates a directory with the given path * @param path @@ -82,16 +49,6 @@ ErrorCode create_directory_structure (const std::string& path, mode_t mode); */ std::string get_parent_directory_path (const std::string& path); -/** - * Returns bounds of next variable in given string - * A variable is a token (word between two delimiters) that contains numbers or is directly preceded by an equals sign - * @param msg - * @param begin_pos Begin position of last variable, changes to begin position of next variable - * @param end_pos End position of last variable, changes to end position of next variable - * @return true if a variable was found, false otherwise - */ -bool get_bounds_of_next_var (const std::string& msg, size_t& begin_pos, size_t& end_pos); - /** * Removes ".", "..", and consecutive "/" from a given path and returns the result * @param path The given path diff --git a/components/core/src/streaming_archive/Constants.hpp b/components/core/src/streaming_archive/Constants.hpp index 60334aad2..8d9eb25bf 100644 --- a/components/core/src/streaming_archive/Constants.hpp +++ b/components/core/src/streaming_archive/Constants.hpp @@ -5,7 +5,7 @@ #define STREAMING_ARCHIVE_CONSTANTS_HPP namespace streaming_archive { - constexpr archive_format_version_t cArchiveFormatVersion = cArchiveFormatDevVersionFlag | 7; + constexpr archive_format_version_t cArchiveFormatVersion = cArchiveFormatDevVersionFlag | 8; constexpr char cSegmentsDirname[] = "s"; constexpr char cSegmentListFilename[] = "segment_list.txt"; constexpr char cLogTypeDictFilename[] = "logtype.dict"; diff --git a/components/core/tests/test-EncodedVariableInterpreter.cpp b/components/core/tests/test-EncodedVariableInterpreter.cpp index 04df2e770..9353e949a 100644 --- a/components/core/tests/test-EncodedVariableInterpreter.cpp +++ b/components/core/tests/test-EncodedVariableInterpreter.cpp @@ -230,7 +230,7 @@ TEST_CASE("EncodedVariableInterpreter", "[EncodedVariableInterpreter]") { string large_val_str = to_string(cVariableDictionaryIdMax) + "0"; vector var_strs = {"4938", large_val_str, "-25.5196868642755", - "-00.00", "bin/python2.7.3"}; + "-00.00", "python2.7.3"}; msg = "here is a string with a small int " + var_strs[0] + " and a very large int " + var_strs[1] + " and a double " + var_strs[2] + diff --git a/components/core/tests/test-Utils.cpp b/components/core/tests/test-Utils.cpp index 35edb835f..3f309e5f1 100644 --- a/components/core/tests/test-Utils.cpp +++ b/components/core/tests/test-Utils.cpp @@ -82,85 +82,3 @@ TEST_CASE("get_unambiguous_path", "[get_unambiguous_path]") { REQUIRE(get_unambiguous_path("abc///def/../ghi/./") == "abc/ghi"); REQUIRE(get_unambiguous_path("../abc///def/../ghi/./") == "abc/ghi"); } - -TEST_CASE("get_bounds_of_next_var", "[get_bounds_of_next_var]") { - string str; - size_t begin_pos; - size_t end_pos; - - // Corner cases - // Empty string - str = ""; - begin_pos = 0; - end_pos = 0; - REQUIRE(get_bounds_of_next_var(str, begin_pos, end_pos) == false); - - // end_pos past the end of the string - str = "abc"; - begin_pos = 0; - end_pos = string::npos; - REQUIRE(get_bounds_of_next_var(str, begin_pos, end_pos) == false); - - // Non-variables - str = "/"; - begin_pos = 0; - end_pos = 0; - REQUIRE(get_bounds_of_next_var(str, begin_pos, end_pos) == false); - REQUIRE(str.length() == begin_pos); - - str = "xyz"; - begin_pos = 0; - end_pos = 0; - REQUIRE(get_bounds_of_next_var(str, begin_pos, end_pos) == false); - REQUIRE(str.length() == begin_pos); - - str = "="; - begin_pos = 0; - end_pos = 0; - REQUIRE(get_bounds_of_next_var(str, begin_pos, end_pos) == false); - REQUIRE(str.length() == begin_pos); - - // Variables - str = "~=x!abc123;1.2%x:+394/-"; - begin_pos = 0; - end_pos = 0; - - REQUIRE(get_bounds_of_next_var(str, begin_pos, end_pos) == true); - REQUIRE("x" == str.substr(begin_pos, end_pos - begin_pos)); - - REQUIRE(get_bounds_of_next_var(str, begin_pos, end_pos) == true); - REQUIRE("abc123" == str.substr(begin_pos, end_pos - begin_pos)); - - REQUIRE(get_bounds_of_next_var(str, begin_pos, end_pos) == true); - REQUIRE("1.2" == str.substr(begin_pos, end_pos - begin_pos)); - - REQUIRE(get_bounds_of_next_var(str, begin_pos, end_pos) == true); - REQUIRE("+394/-" == str.substr(begin_pos, end_pos - begin_pos)); - - REQUIRE(get_bounds_of_next_var(str, begin_pos, end_pos) == false); - - str = " ad ff 95 24 0d ff "; - begin_pos = 0; - end_pos = 0; - - REQUIRE(get_bounds_of_next_var(str, begin_pos, end_pos) == true); - REQUIRE("ad" == str.substr(begin_pos, end_pos - begin_pos)); - - REQUIRE(get_bounds_of_next_var(str, begin_pos, end_pos) == true); - REQUIRE("ff" == str.substr(begin_pos, end_pos - begin_pos)); - - REQUIRE(get_bounds_of_next_var(str, begin_pos, end_pos) == true); - REQUIRE("95" == str.substr(begin_pos, end_pos - begin_pos)); - - REQUIRE(get_bounds_of_next_var(str, begin_pos, end_pos) == true); - REQUIRE("24" == str.substr(begin_pos, end_pos - begin_pos)); - - REQUIRE(get_bounds_of_next_var(str, begin_pos, end_pos) == true); - REQUIRE("0d" == str.substr(begin_pos, end_pos - begin_pos)); - - REQUIRE(get_bounds_of_next_var(str, begin_pos, end_pos) == true); - REQUIRE("ff" == str.substr(begin_pos, end_pos - begin_pos)); - - REQUIRE(get_bounds_of_next_var(str, begin_pos, end_pos) == false); - REQUIRE(str.length() == begin_pos); -} From e2474fa3ac5a50ad4f1dfc97316c36c37236cbe6 Mon Sep 17 00:00:00 2001 From: Kirk Rodrigues <2454684+kirkrodrigues@users.noreply.github.com> Date: Sun, 17 Sep 2023 08:25:01 -0400 Subject: [PATCH 03/30] Improve performance of parsing methods from ffi to match those in CLP. --- .../core/src/LogTypeDictionaryEntry.cpp | 3 +- components/core/src/ffi/encoding_methods.hpp | 7 --- components/core/src/ffi/encoding_methods.inc | 38 ++++--------- .../src/ffi/ir_stream/encoding_methods.cpp | 26 +-------- components/core/src/ir/parsing.cpp | 39 ++++++------- components/core/src/ir/parsing.hpp | 6 +- components/core/tests/test-ir_parsing.cpp | 55 ++++++------------- 7 files changed, 48 insertions(+), 126 deletions(-) diff --git a/components/core/src/LogTypeDictionaryEntry.cpp b/components/core/src/LogTypeDictionaryEntry.cpp index 8d7557180..164ededf7 100644 --- a/components/core/src/LogTypeDictionaryEntry.cpp +++ b/components/core/src/LogTypeDictionaryEntry.cpp @@ -75,8 +75,7 @@ void LogTypeDictionaryEntry::add_float_var () { bool LogTypeDictionaryEntry::parse_next_var (const string& msg, size_t& var_begin_pos, size_t& var_end_pos, string& var) { auto last_var_end_pos = var_end_pos; - bool contains_var_placeholder{}; - if (ir::get_bounds_of_next_var(msg, var_begin_pos, var_end_pos, contains_var_placeholder)) { + if (ir::get_bounds_of_next_var(msg, var_begin_pos, var_end_pos)) { // Append to log type: from end of last variable to start of current variable add_constant(msg, last_var_end_pos, var_begin_pos - last_var_end_pos); diff --git a/components/core/src/ffi/encoding_methods.hpp b/components/core/src/ffi/encoding_methods.hpp index fa26ef675..6618db346 100644 --- a/components/core/src/ffi/encoding_methods.hpp +++ b/components/core/src/ffi/encoding_methods.hpp @@ -169,10 +169,6 @@ std::string decode_integer_var(encoded_variable_t encoded_var); * components of the message. * @tparam encoded_variable_t Type of the encoded variable * @tparam ConstantHandler Method to handle constants. Signature: - * (std::string_view constant, bool constant_contains_variable_placeholder, - * std::string& logtype) -> bool - * @tparam FinalConstantHandler Method to handle the constant after the last - * variable. Signature: * (std::string_view constant, std::string& logtype) -> bool * @tparam EncodedVariableHandler Method to handle encoded variables. * Signature: (encoded_variable_t) -> void @@ -182,7 +178,6 @@ std::string decode_integer_var(encoded_variable_t encoded_var); * @param message * @param logtype * @param constant_handler - * @param final_constant_handler * @param encoded_variable_handler * @param dictionary_variable_handler * @return true on success, false otherwise @@ -190,14 +185,12 @@ std::string decode_integer_var(encoded_variable_t encoded_var); template < typename encoded_variable_t, typename ConstantHandler, - typename FinalConstantHandler, typename EncodedVariableHandler, typename DictionaryVariableHandler> bool encode_message_generically( std::string_view message, std::string& logtype, ConstantHandler constant_handler, - FinalConstantHandler final_constant_handler, EncodedVariableHandler encoded_variable_handler, DictionaryVariableHandler dictionary_variable_handler ); diff --git a/components/core/src/ffi/encoding_methods.inc b/components/core/src/ffi/encoding_methods.inc index 38c5a00ef..d85b22312 100644 --- a/components/core/src/ffi/encoding_methods.inc +++ b/components/core/src/ffi/encoding_methods.inc @@ -350,32 +350,23 @@ std::string decode_integer_var(encoded_variable_t encoded_var) { template < typename encoded_variable_t, typename ConstantHandler, - typename FinalConstantHandler, typename EncodedVariableHandler, typename DictionaryVariableHandler> bool encode_message_generically( std::string_view message, std::string& logtype, ConstantHandler constant_handler, - FinalConstantHandler final_constant_handler, EncodedVariableHandler encoded_variable_handler, DictionaryVariableHandler dictionary_variable_handler ) { size_t var_begin_pos = 0; size_t var_end_pos = 0; - bool constant_contains_variable_placeholder = false; size_t constant_begin_pos = 0; logtype.clear(); logtype.reserve(message.length()); - while (ir::get_bounds_of_next_var( - message, - var_begin_pos, - var_end_pos, - constant_contains_variable_placeholder - )) - { + while (ir::get_bounds_of_next_var(message, var_begin_pos, var_end_pos)) { std::string_view constant{&message[constant_begin_pos], var_begin_pos - constant_begin_pos}; - if (false == constant_handler(constant, constant_contains_variable_placeholder, logtype)) { + if (false == constant_handler(constant, logtype)) { return false; } constant_begin_pos = var_end_pos; @@ -401,7 +392,7 @@ bool encode_message_generically( std::string_view constant{ &message[constant_begin_pos], message.length() - constant_begin_pos}; - if (false == final_constant_handler(constant, logtype)) { + if (false == constant_handler(constant, logtype)) { return false; } } @@ -416,11 +407,13 @@ bool encode_message( std::vector& encoded_vars, std::vector& dictionary_var_bounds ) { - auto constant_handler = []( // clang-format off - std::string_view constant, - bool contains_variable_placeholder, - std::string& logtype - ) { // clang-format on + auto constant_handler = [](std::string_view constant, std::string& logtype) { + // Ensure constant doesn't contain a variable placeholder + bool contains_variable_placeholder = std::any_of( + constant.cbegin(), + constant.cend(), + ir::is_variable_placeholder + ); if (contains_variable_placeholder) { return false; } @@ -428,16 +421,6 @@ bool encode_message( logtype.append(constant); return true; }; - auto final_constant_handler = - [&constant_handler](std::string_view constant, std::string& logtype) { - // Ensure final constant doesn't contain a variable placeholder - bool contains_variable_placeholder = std::any_of( - constant.cbegin(), - constant.cend(), - ir::is_variable_placeholder - ); - return constant_handler(constant, contains_variable_placeholder, logtype); - }; auto encoded_variable_handler = [&encoded_vars](encoded_variable_t encoded_variable) { encoded_vars.push_back(encoded_variable); }; @@ -457,7 +440,6 @@ bool encode_message( message, logtype, constant_handler, - final_constant_handler, encoded_variable_handler, dictionary_variable_handler )) diff --git a/components/core/src/ffi/ir_stream/encoding_methods.cpp b/components/core/src/ffi/ir_stream/encoding_methods.cpp index 4e3ea2b2c..2d637dbe3 100644 --- a/components/core/src/ffi/ir_stream/encoding_methods.cpp +++ b/components/core/src/ffi/ir_stream/encoding_methods.cpp @@ -52,29 +52,16 @@ static void add_base_metadata_fields( ); /** - * Appends a constant to the logtype, escaping variable placeholders when - * @p contains_variable_placeholder is true + * Appends a constant to the logtype, escaping any variable placeholders * @param constant - * @param contains_variable_placeholder Whether the constant contains a variable - * placeholder * @param logtype * @return true */ static bool append_constant_to_logtype( string_view constant, - bool contains_variable_placeholder, string& logtype ); -/** - * Appends the final constant to the logtype, escaping variable placeholders as - * necessary - * @param constant - * @param logtype - * @return Same as append_constant_to_logtype - */ -static bool append_final_constant_to_logtype(string_view constant, string& logtype); - /** * A functor for encoding dictionary variables in a message */ @@ -178,7 +165,7 @@ static void add_base_metadata_fields( metadata[cProtocol::Metadata::TimeZoneIdKey] = time_zone_id; } -static bool append_constant_to_logtype(string_view constant, bool, string& logtype) { +static bool append_constant_to_logtype(string_view constant, string& logtype) { size_t begin_pos = 0; auto constant_len = constant.length(); for (size_t i = 0; i < constant_len; ++i) { @@ -195,13 +182,6 @@ static bool append_constant_to_logtype(string_view constant, bool, string& logty return true; } -static bool append_final_constant_to_logtype(string_view constant, string& logtype) { - // Since we don't know if the last constant contains a variable placeholder, - // we'll simply say it does and append_constant_to_logtype will escape one - // if it finds one - return append_constant_to_logtype(constant, true, logtype); -} - namespace eight_byte_encoding { bool encode_preamble( string_view timestamp_pattern, @@ -242,7 +222,6 @@ namespace eight_byte_encoding { message, logtype, append_constant_to_logtype, - append_final_constant_to_logtype, encoded_var_handler, DictionaryVariableHandler(ir_buf) )) @@ -317,7 +296,6 @@ namespace four_byte_encoding { message, logtype, append_constant_to_logtype, - append_final_constant_to_logtype, encoded_var_handler, DictionaryVariableHandler(ir_buf) )) diff --git a/components/core/src/ir/parsing.cpp b/components/core/src/ir/parsing.cpp index 507e89a1b..c6cc9d1d5 100644 --- a/components/core/src/ir/parsing.cpp +++ b/components/core/src/ir/parsing.cpp @@ -1,7 +1,5 @@ #include "parsing.hpp" -#include - #include "../string_utils.hpp" #include "../type_utils.hpp" @@ -25,21 +23,27 @@ bool is_variable_placeholder(char c) { || (enum_to_underlying_type(VariablePlaceholder::Float) == c); } -bool could_be_multi_digit_hex_value(string_view const str) { +// NOTE: `inline` improves performance by 1-2% +inline bool could_be_multi_digit_hex_value(string_view str) { if (str.length() < 2) { return false; } - return std::all_of(str.cbegin(), str.cend(), [](char c) { - return ('a' <= c && c <= 'f') || ('A' <= c && c <= 'F') || ('0' <= c && c <= '9'); - }); + // NOTE: This is 1-2% faster than using std::all_of with the opposite + // condition + for (auto c : str) { + if (!(('a' <= c && c <= 'f') || ('A' <= c && c <= 'F') || ('0' <= c && c <= '9'))) { + return false; + } + } + + return true; } bool is_var(std::string_view value) { size_t begin_pos = 0; size_t end_pos = 0; - bool contains_var_placeholder; - if (get_bounds_of_next_var(value, begin_pos, end_pos, contains_var_placeholder)) { + if (get_bounds_of_next_var(value, begin_pos, end_pos)) { // Ensure the entire value is a variable return (0 == begin_pos && value.length() == end_pos); } else { @@ -50,8 +54,7 @@ bool is_var(std::string_view value) { bool get_bounds_of_next_var( string_view const str, size_t& begin_pos, - size_t& end_pos, - bool& contains_var_placeholder + size_t& end_pos ) { auto const msg_length = str.length(); if (end_pos >= msg_length) { @@ -62,22 +65,11 @@ bool get_bounds_of_next_var( begin_pos = end_pos; // Find next non-delimiter - bool char_before_var_was_equals_sign = false; - char previous_char = 0; for (; begin_pos < msg_length; ++begin_pos) { auto c = str[begin_pos]; if (false == is_delim(c)) { - char_before_var_was_equals_sign = ('=' == previous_char); break; } - previous_char = c; - - if (enum_to_underlying_type(VariablePlaceholder::Integer) == c - || enum_to_underlying_type(VariablePlaceholder::Dictionary) == c - || enum_to_underlying_type(VariablePlaceholder::Float) == c) - { - contains_var_placeholder = true; - } } if (msg_length == begin_pos) { // Early exit for performance @@ -100,12 +92,13 @@ bool get_bounds_of_next_var( } } - string_view variable(str.cbegin() + begin_pos, end_pos - begin_pos); + auto variable = str.substr(begin_pos, end_pos - begin_pos); // Treat token as variable if: // - it contains a decimal digit, or // - it's directly preceded by '=' and contains an alphabet char, or // - it could be a multi-digit hex value - if (contains_decimal_digit || (char_before_var_was_equals_sign && contains_alphabet) + if (contains_decimal_digit + || (begin_pos > 0 && '=' == str[begin_pos - 1] && contains_alphabet) || could_be_multi_digit_hex_value(variable)) { break; diff --git a/components/core/src/ir/parsing.hpp b/components/core/src/ir/parsing.hpp index fa9bdeca0..99405cc15 100644 --- a/components/core/src/ir/parsing.hpp +++ b/components/core/src/ir/parsing.hpp @@ -61,16 +61,12 @@ bool is_var(std::string_view value); * position of next variable * @param end_pos End position of last variable, changes to end position of * next variable - * @param contains_var_placeholder Whether the string already contains a - * variable placeholder (for efficiency, this is only set to true, so the - * caller must reset it to false if necessary) * @return true if a variable was found, false otherwise */ bool get_bounds_of_next_var( std::string_view str, size_t& begin_pos, - size_t& end_pos, - bool& contains_var_placeholder + size_t& end_pos ); } // namespace ir diff --git a/components/core/tests/test-ir_parsing.cpp b/components/core/tests/test-ir_parsing.cpp index 10f348c82..9a8286d22 100644 --- a/components/core/tests/test-ir_parsing.cpp +++ b/components/core/tests/test-ir_parsing.cpp @@ -12,100 +12,82 @@ TEST_CASE("ir::get_bounds_of_next_var", "[ir][get_bounds_of_next_var]") { string str; size_t begin_pos; size_t end_pos; - bool contains_var_placeholder = false; // Corner cases // Empty string str = ""; begin_pos = 0; end_pos = 0; - REQUIRE(get_bounds_of_next_var(str, begin_pos, end_pos, contains_var_placeholder) == false); - REQUIRE(false == contains_var_placeholder); + REQUIRE(get_bounds_of_next_var(str, begin_pos, end_pos) == false); // end_pos past the end of the string str = "abc"; begin_pos = 0; end_pos = string::npos; - REQUIRE(get_bounds_of_next_var(str, begin_pos, end_pos, contains_var_placeholder) == false); - REQUIRE(false == contains_var_placeholder); + REQUIRE(get_bounds_of_next_var(str, begin_pos, end_pos) == false); // Non-variables str = "/"; begin_pos = 0; end_pos = 0; - REQUIRE(get_bounds_of_next_var(str, begin_pos, end_pos, contains_var_placeholder) == false); + REQUIRE(get_bounds_of_next_var(str, begin_pos, end_pos) == false); REQUIRE(str.length() == begin_pos); - REQUIRE(false == contains_var_placeholder); str = "xyz"; begin_pos = 0; end_pos = 0; - REQUIRE(get_bounds_of_next_var(str, begin_pos, end_pos, contains_var_placeholder) == false); + REQUIRE(get_bounds_of_next_var(str, begin_pos, end_pos) == false); REQUIRE(str.length() == begin_pos); - REQUIRE(false == contains_var_placeholder); str = "="; begin_pos = 0; end_pos = 0; - REQUIRE(get_bounds_of_next_var(str, begin_pos, end_pos, contains_var_placeholder) == false); + REQUIRE(get_bounds_of_next_var(str, begin_pos, end_pos) == false); REQUIRE(str.length() == begin_pos); - REQUIRE(false == contains_var_placeholder); // Variables str = "~=x!abc123;1.2%x:+394/-"; begin_pos = 0; end_pos = 0; - REQUIRE(get_bounds_of_next_var(str, begin_pos, end_pos, contains_var_placeholder) == true); + REQUIRE(get_bounds_of_next_var(str, begin_pos, end_pos) == true); REQUIRE("x" == str.substr(begin_pos, end_pos - begin_pos)); - REQUIRE(false == contains_var_placeholder); - REQUIRE(get_bounds_of_next_var(str, begin_pos, end_pos, contains_var_placeholder) == true); + REQUIRE(get_bounds_of_next_var(str, begin_pos, end_pos) == true); REQUIRE("abc123" == str.substr(begin_pos, end_pos - begin_pos)); - REQUIRE(false == contains_var_placeholder); - REQUIRE(get_bounds_of_next_var(str, begin_pos, end_pos, contains_var_placeholder) == true); + REQUIRE(get_bounds_of_next_var(str, begin_pos, end_pos) == true); REQUIRE("1.2" == str.substr(begin_pos, end_pos - begin_pos)); - REQUIRE(false == contains_var_placeholder); - REQUIRE(get_bounds_of_next_var(str, begin_pos, end_pos, contains_var_placeholder) == true); + REQUIRE(get_bounds_of_next_var(str, begin_pos, end_pos) == true); REQUIRE("+394" == str.substr(begin_pos, end_pos - begin_pos)); - REQUIRE(false == contains_var_placeholder); - REQUIRE(get_bounds_of_next_var(str, begin_pos, end_pos, contains_var_placeholder) == false); - REQUIRE(false == contains_var_placeholder); + REQUIRE(get_bounds_of_next_var(str, begin_pos, end_pos) == false); str = " ad ff 95 24 0d ff "; begin_pos = 0; end_pos = 0; - REQUIRE(get_bounds_of_next_var(str, begin_pos, end_pos, contains_var_placeholder) == true); + REQUIRE(get_bounds_of_next_var(str, begin_pos, end_pos) == true); REQUIRE("ad" == str.substr(begin_pos, end_pos - begin_pos)); - REQUIRE(false == contains_var_placeholder); - REQUIRE(get_bounds_of_next_var(str, begin_pos, end_pos, contains_var_placeholder) == true); + REQUIRE(get_bounds_of_next_var(str, begin_pos, end_pos) == true); REQUIRE("ff" == str.substr(begin_pos, end_pos - begin_pos)); - REQUIRE(false == contains_var_placeholder); - REQUIRE(get_bounds_of_next_var(str, begin_pos, end_pos, contains_var_placeholder) == true); + REQUIRE(get_bounds_of_next_var(str, begin_pos, end_pos) == true); REQUIRE("95" == str.substr(begin_pos, end_pos - begin_pos)); - REQUIRE(false == contains_var_placeholder); - REQUIRE(get_bounds_of_next_var(str, begin_pos, end_pos, contains_var_placeholder) == true); + REQUIRE(get_bounds_of_next_var(str, begin_pos, end_pos) == true); REQUIRE("24" == str.substr(begin_pos, end_pos - begin_pos)); - REQUIRE(false == contains_var_placeholder); - REQUIRE(get_bounds_of_next_var(str, begin_pos, end_pos, contains_var_placeholder) == true); + REQUIRE(get_bounds_of_next_var(str, begin_pos, end_pos) == true); REQUIRE("0d" == str.substr(begin_pos, end_pos - begin_pos)); - REQUIRE(false == contains_var_placeholder); - REQUIRE(get_bounds_of_next_var(str, begin_pos, end_pos, contains_var_placeholder) == true); + REQUIRE(get_bounds_of_next_var(str, begin_pos, end_pos) == true); REQUIRE("ff" == str.substr(begin_pos, end_pos - begin_pos)); - REQUIRE(false == contains_var_placeholder); - REQUIRE(get_bounds_of_next_var(str, begin_pos, end_pos, contains_var_placeholder) == false); + REQUIRE(get_bounds_of_next_var(str, begin_pos, end_pos) == false); REQUIRE(str.length() == begin_pos); - REQUIRE(false == contains_var_placeholder); // String containing variable placeholder str = " text "; @@ -114,7 +96,6 @@ TEST_CASE("ir::get_bounds_of_next_var", "[ir][get_bounds_of_next_var]") { begin_pos = 0; end_pos = 0; - REQUIRE(get_bounds_of_next_var(str, begin_pos, end_pos, contains_var_placeholder) == true); - REQUIRE(contains_var_placeholder); + REQUIRE(get_bounds_of_next_var(str, begin_pos, end_pos) == true); REQUIRE("var123" == str.substr(begin_pos, end_pos - begin_pos)); } From 1c920b41265f72cf2e096658b41abfd177d35dc8 Mon Sep 17 00:00:00 2001 From: Kirk Rodrigues <2454684+kirkrodrigues@users.noreply.github.com> Date: Sun, 17 Sep 2023 08:44:21 -0400 Subject: [PATCH 04/30] Fix build error --- components/core/src/Grep.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/components/core/src/Grep.cpp b/components/core/src/Grep.cpp index 0fdc72061..2ab4f7875 100644 --- a/components/core/src/Grep.cpp +++ b/components/core/src/Grep.cpp @@ -6,6 +6,7 @@ // Project headers #include "compressor_frontend/Constants.hpp" #include "EncodedVariableInterpreter.hpp" +#include "ir/parsing.hpp" #include "StringReader.hpp" #include "Utils.hpp" From aa1d327319f0487de18226a6c4a4aa704cedda48 Mon Sep 17 00:00:00 2001 From: Kirk Rodrigues <2454684+kirkrodrigues@users.noreply.github.com> Date: Sun, 17 Sep 2023 08:55:29 -0400 Subject: [PATCH 05/30] C++ 101: Inline methods should be defined in the header. --- components/core/src/ir/parsing.cpp | 17 ----------------- components/core/src/ir/parsing.hpp | 17 ++++++++++++++++- 2 files changed, 16 insertions(+), 18 deletions(-) diff --git a/components/core/src/ir/parsing.cpp b/components/core/src/ir/parsing.cpp index c6cc9d1d5..6b9ebf08b 100644 --- a/components/core/src/ir/parsing.cpp +++ b/components/core/src/ir/parsing.cpp @@ -23,23 +23,6 @@ bool is_variable_placeholder(char c) { || (enum_to_underlying_type(VariablePlaceholder::Float) == c); } -// NOTE: `inline` improves performance by 1-2% -inline bool could_be_multi_digit_hex_value(string_view str) { - if (str.length() < 2) { - return false; - } - - // NOTE: This is 1-2% faster than using std::all_of with the opposite - // condition - for (auto c : str) { - if (!(('a' <= c && c <= 'f') || ('A' <= c && c <= 'F') || ('0' <= c && c <= '9'))) { - return false; - } - } - - return true; -} - bool is_var(std::string_view value) { size_t begin_pos = 0; size_t end_pos = 0; diff --git a/components/core/src/ir/parsing.hpp b/components/core/src/ir/parsing.hpp index 99405cc15..a5d447728 100644 --- a/components/core/src/ir/parsing.hpp +++ b/components/core/src/ir/parsing.hpp @@ -37,10 +37,25 @@ bool is_delim(signed char c); bool is_variable_placeholder(char c); /** + * NOTE: This method is marked inline for a 1-2% performance improvement * @param str * @return Whether the given string could be a multi-digit hex value */ -bool could_be_multi_digit_hex_value(std::string_view str); +inline bool could_be_multi_digit_hex_value(std::string_view str) { + if (str.length() < 2) { + return false; + } + + // NOTE: This is 1-2% faster than using std::all_of with the opposite + // condition + for (auto c : str) { + if (!(('a' <= c && c <= 'f') || ('A' <= c && c <= 'F') || ('0' <= c && c <= '9'))) { + return false; + } + } + + return true; +} /** * @param value From 7fd4656dcf392f815768b47919773140910520a8 Mon Sep 17 00:00:00 2001 From: Kirk Rodrigues <2454684+kirkrodrigues@users.noreply.github.com> Date: Sun, 17 Sep 2023 09:52:12 -0400 Subject: [PATCH 06/30] Replace LogTypeDictionaryEntry::VarDelim with ir::VariablePlaceholder; Deduplicate logtype constant escaping method; Replace all instances of variable delimiter with variable placeholder. --- .../core/src/EncodedVariableInterpreter.cpp | 17 +++--- .../core/src/LogTypeDictionaryEntry.cpp | 61 ++++++------------- .../core/src/LogTypeDictionaryEntry.hpp | 35 +++++------ components/core/src/ffi/encoding_methods.hpp | 4 +- .../src/ffi/ir_stream/encoding_methods.cpp | 14 +---- components/core/src/ir/parsing.cpp | 22 +++++-- components/core/src/ir/parsing.hpp | 13 ++-- .../make_dictionaries_readable/README.md | 6 +- .../make-dictionaries-readable.cpp | 17 +++--- .../tests/test-EncodedVariableInterpreter.cpp | 4 +- 10 files changed, 84 insertions(+), 109 deletions(-) diff --git a/components/core/src/EncodedVariableInterpreter.cpp b/components/core/src/EncodedVariableInterpreter.cpp index 246897ea7..df241b3a4 100644 --- a/components/core/src/EncodedVariableInterpreter.cpp +++ b/components/core/src/EncodedVariableInterpreter.cpp @@ -8,6 +8,7 @@ #include "Defs.h" #include "ffi/encoding_methods.hpp" #include "ffi/ir_stream/decoding_methods.hpp" +#include "ir/parsing.hpp" #include "spdlog_with_specializations.hpp" #include "string_utils.hpp" #include "type_utils.hpp" @@ -296,25 +297,25 @@ bool EncodedVariableInterpreter::decode_variables_into_message (const LogTypeDic return false; } - LogTypeDictionaryEntry::VarDelim var_delim; + ir::VariablePlaceholder var_placeholder; size_t constant_begin_pos = 0; string float_str; variable_dictionary_id_t var_dict_id; for (size_t i = 0; i < num_vars_in_logtype; ++i) { - size_t var_position = logtype_dict_entry.get_var_info(i, var_delim); + size_t var_position = logtype_dict_entry.get_var_info(i, var_placeholder); // Add the constant that's between the last variable and this one decompressed_msg.append(logtype_value, constant_begin_pos, var_position - constant_begin_pos); - switch (var_delim) { - case LogTypeDictionaryEntry::VarDelim::Integer: + switch (var_placeholder) { + case ir::VariablePlaceholder::Integer: decompressed_msg += std::to_string(encoded_vars[i]); break; - case LogTypeDictionaryEntry::VarDelim::Float: + case ir::VariablePlaceholder::Float: convert_encoded_float_to_string(encoded_vars[i], float_str); decompressed_msg += float_str; break; - case LogTypeDictionaryEntry::VarDelim::Dictionary: + case ir::VariablePlaceholder::Dictionary: var_dict_id = decode_var_dict_id(encoded_vars[i]); decompressed_msg += var_dict.get_value(var_dict_id); break; @@ -323,10 +324,10 @@ bool EncodedVariableInterpreter::decode_variables_into_message (const LogTypeDic "EncodedVariableInterpreter: Logtype '{}' contains " "unexpected variable placeholder 0x{:x}", logtype_value, - enum_to_underlying_type(var_delim)); + enum_to_underlying_type(var_placeholder)); return false; } - // Move past the variable delimiter + // Move past the variable placeholder constant_begin_pos = var_position + 1; } // Append remainder of logtype, if any diff --git a/components/core/src/LogTypeDictionaryEntry.cpp b/components/core/src/LogTypeDictionaryEntry.cpp index 164ededf7..e8c89949a 100644 --- a/components/core/src/LogTypeDictionaryEntry.cpp +++ b/components/core/src/LogTypeDictionaryEntry.cpp @@ -7,43 +7,16 @@ using std::string; -// Constants -static constexpr char cEscapeChar = '\\'; - -// Local function prototypes -/** - * Escapes any variable delimiters in the identified portion of the given value - * @param value - * @param begin_ix - * @param end_ix - * @param escaped_value - */ -static void escape_variable_delimiters (const std::string& value, size_t begin_ix, size_t end_ix, std::string& escaped_value); - -static void escape_variable_delimiters (const string& value, size_t begin_ix, size_t end_ix, string& escaped_value) { - for (size_t i = begin_ix; i < end_ix; ++i) { - auto c = value[i]; - - // Add escape character if necessary - if (enum_to_underlying_type(LogTypeDictionaryEntry::VarDelim::Integer) == c || - enum_to_underlying_type(LogTypeDictionaryEntry::VarDelim::Float) == c || - enum_to_underlying_type(LogTypeDictionaryEntry::VarDelim::Dictionary) == c || - cEscapeChar == c) { - escaped_value += cEscapeChar; - } - - // Add character - escaped_value += value[i]; - } -} - -size_t LogTypeDictionaryEntry::get_var_info (size_t var_ix, VarDelim& var_delim) const { +size_t LogTypeDictionaryEntry::get_var_info( + size_t var_ix, + ir::VariablePlaceholder& var_placeholder +) const { if (var_ix >= m_var_positions.size()) { return SIZE_MAX; } auto var_position = m_var_positions[var_ix]; - var_delim = (VarDelim)m_value[var_position]; + var_placeholder = static_cast(m_value[var_position]); return m_var_positions[var_ix]; } @@ -134,18 +107,18 @@ ErrorCode LogTypeDictionaryEntry::try_read_from_file (streaming_compression::Dec if (is_escaped) { constant += c; is_escaped = false; - } else if (cEscapeChar == c) { + } else if (ir::cVariablePlaceholderEscapeCharacter == c) { is_escaped = true; } else { - if (enum_to_underlying_type(LogTypeDictionaryEntry::VarDelim::Integer) == c) { + if (enum_to_underlying_type(ir::VariablePlaceholder::Integer) == c) { add_constant(constant, 0, constant.length()); constant.clear(); add_int_var(); - } else if (enum_to_underlying_type(LogTypeDictionaryEntry::VarDelim::Float) == c) { + } else if (enum_to_underlying_type(ir::VariablePlaceholder::Float) == c) { add_constant(constant, 0, constant.length()); constant.clear(); add_float_var(); - } else if (enum_to_underlying_type(LogTypeDictionaryEntry::VarDelim::Dictionary) == c) + } else if (enum_to_underlying_type(ir::VariablePlaceholder::Dictionary) == c) { add_constant(constant, 0, constant.length()); constant.clear(); @@ -171,21 +144,25 @@ void LogTypeDictionaryEntry::read_from_file (streaming_compression::Decompressor } void LogTypeDictionaryEntry::get_value_with_unfounded_variables_escaped (string& escaped_logtype_value) const { + auto value_view = static_cast(m_value); size_t begin_ix = 0; // Reset escaped value and reserve enough space to at least contain the whole value escaped_logtype_value.clear(); - escaped_logtype_value.reserve(m_value.length()); + escaped_logtype_value.reserve(value_view.length()); for (auto var_position : m_var_positions) { size_t end_ix = var_position; - escape_variable_delimiters(m_value, begin_ix, end_ix, escaped_logtype_value); + ir::escape_and_append_constant_to_logtype( + value_view.substr(begin_ix, end_ix - begin_ix), + escaped_logtype_value + ); - // Add variable delimiter - escaped_logtype_value += m_value[end_ix]; + // Add variable placeholder + escaped_logtype_value += value_view[end_ix]; // Move begin to start of next portion of logtype between variables begin_ix = end_ix + 1; } - // Escape any variable delimiters in remainder of value - escape_variable_delimiters(m_value, begin_ix, m_value.length(), escaped_logtype_value); + // Escape any variable placeholders in remainder of value + ir::escape_and_append_constant_to_logtype(value_view.substr(begin_ix), escaped_logtype_value); } diff --git a/components/core/src/LogTypeDictionaryEntry.hpp b/components/core/src/LogTypeDictionaryEntry.hpp index 85a2aabde..aade2fbad 100644 --- a/components/core/src/LogTypeDictionaryEntry.hpp +++ b/components/core/src/LogTypeDictionaryEntry.hpp @@ -9,6 +9,7 @@ #include "DictionaryEntry.hpp" #include "ErrorCode.hpp" #include "FileReader.hpp" +#include "ir/parsing.hpp" #include "streaming_compression/zstd/Compressor.hpp" #include "streaming_compression/zstd/Decompressor.hpp" #include "TraceableException.hpp" @@ -31,15 +32,6 @@ class LogTypeDictionaryEntry : public DictionaryEntry { } }; - // Constants - enum class VarDelim : char { - // NOTE: These values are used within logtypes to denote variables, so care must be taken when changing them - Integer = 0x11, - Dictionary = 0x12, - Float = 0x13, - Length = 3 - }; - // Constructors LogTypeDictionaryEntry () = default; // Use default copy constructor @@ -51,35 +43,35 @@ class LogTypeDictionaryEntry : public DictionaryEntry { // Methods /** - * Adds a dictionary variable delimiter to the given logtype + * Adds a dictionary variable placeholder to the given logtype * @param logtype */ static void add_dict_var (std::string& logtype) { - logtype += enum_to_underlying_type(VarDelim::Dictionary); + logtype += enum_to_underlying_type(ir::VariablePlaceholder::Dictionary); } /** - * Adds an integer variable delimiter to the given logtype + * Adds an integer variable placeholder to the given logtype * @param logtype */ static void add_int_var (std::string& logtype) { - logtype += enum_to_underlying_type(VarDelim::Integer); + logtype += enum_to_underlying_type(ir::VariablePlaceholder::Integer); } /** - * Adds a float variable delimiter to the given logtype + * Adds a float variable placeholder to the given logtype * @param logtype */ static void add_float_var (std::string& logtype) { - logtype += enum_to_underlying_type(VarDelim::Float); + logtype += enum_to_underlying_type(ir::VariablePlaceholder::Float); } size_t get_num_vars () const { return m_var_positions.size(); } /** * Gets all info about a variable in the logtype * @param var_ix The index of the variable to get the info for - * @param var_delim + * @param var_placeholder * @return The variable's position in the logtype, or SIZE_MAX if var_ix is out of bounds */ - size_t get_var_info (size_t var_ix, VarDelim& var_delim) const; + size_t get_var_info (size_t var_ix, ir::VariablePlaceholder& var_placeholder) const; /** * Gets the size (in-memory) of the data contained in this entry @@ -95,15 +87,15 @@ class LogTypeDictionaryEntry : public DictionaryEntry { */ void add_constant (const std::string& value_containing_constant, size_t begin_pos, size_t length); /** - * Adds an int variable delimiter + * Adds an int variable placeholder */ void add_int_var (); /** - * Adds a float variable delimiter + * Adds a float variable placeholder */ void add_float_var (); /** - * Adds a dictionary variable delimiter + * Adds a dictionary variable placeholder */ void add_dictionary_var (); @@ -147,7 +139,8 @@ class LogTypeDictionaryEntry : public DictionaryEntry { private: // Methods /** - * Escapes any variable delimiters that don't correspond to the positions of variables in the logtype entry's value + * Escapes any variable placeholders that don't correspond to the positions + * of variables in the logtype entry's value * @param escaped_logtype_value */ void get_value_with_unfounded_variables_escaped (std::string& escaped_logtype_value) const; diff --git a/components/core/src/ffi/encoding_methods.hpp b/components/core/src/ffi/encoding_methods.hpp index 6618db346..ec4a8ec37 100644 --- a/components/core/src/ffi/encoding_methods.hpp +++ b/components/core/src/ffi/encoding_methods.hpp @@ -51,10 +51,10 @@ static constexpr char cVariableEncodingMethodsVersion[] static constexpr char cVariablesSchemaVersion[] = "com.yscope.clp.VariablesSchemaV2"; static constexpr char cTooFewDictionaryVarsErrorMessage[] - = "There are fewer dictionary variables than dictionary variable delimiters in the " + = "There are fewer dictionary variables than dictionary variable placeholders in the " "logtype."; static constexpr char cTooFewEncodedVarsErrorMessage[] - = "There are fewer encoded variables than encoded variable delimiters in the logtype."; + = "There are fewer encoded variables than encoded variable placeholders in the logtype."; static constexpr char cUnexpectedEscapeCharacterMessage[] = "Unexpected escape character without escaped value at the end of the logtype."; diff --git a/components/core/src/ffi/ir_stream/encoding_methods.cpp b/components/core/src/ffi/ir_stream/encoding_methods.cpp index 2d637dbe3..ec427ab76 100644 --- a/components/core/src/ffi/ir_stream/encoding_methods.cpp +++ b/components/core/src/ffi/ir_stream/encoding_methods.cpp @@ -166,19 +166,7 @@ static void add_base_metadata_fields( } static bool append_constant_to_logtype(string_view constant, string& logtype) { - size_t begin_pos = 0; - auto constant_len = constant.length(); - for (size_t i = 0; i < constant_len; ++i) { - auto c = constant[i]; - if (ir::cVariablePlaceholderEscapeCharacter == c || ir::is_variable_placeholder(c)) { - logtype.append(constant, begin_pos, i - begin_pos); - logtype += ir::cVariablePlaceholderEscapeCharacter; - // NOTE: We don't need to append the character of interest - // immediately since the next constant copy operation will get it - begin_pos = i; - } - } - logtype.append(constant, begin_pos, constant_len - begin_pos); + ir::escape_and_append_constant_to_logtype(constant, logtype); return true; } diff --git a/components/core/src/ir/parsing.cpp b/components/core/src/ir/parsing.cpp index 6b9ebf08b..1fa59be34 100644 --- a/components/core/src/ir/parsing.cpp +++ b/components/core/src/ir/parsing.cpp @@ -34,11 +34,7 @@ bool is_var(std::string_view value) { } } -bool get_bounds_of_next_var( - string_view const str, - size_t& begin_pos, - size_t& end_pos -) { +bool get_bounds_of_next_var(string_view const str, size_t& begin_pos, size_t& end_pos) { auto const msg_length = str.length(); if (end_pos >= msg_length) { return false; @@ -90,4 +86,20 @@ bool get_bounds_of_next_var( return (msg_length != begin_pos); } + +void escape_and_append_constant_to_logtype(string_view constant, std::string& logtype) { + size_t begin_pos = 0; + auto constant_len = constant.length(); + for (size_t i = 0; i < constant_len; ++i) { + auto c = constant[i]; + if (cVariablePlaceholderEscapeCharacter == c || is_variable_placeholder(c)) { + logtype.append(constant, begin_pos, i - begin_pos); + logtype += ir::cVariablePlaceholderEscapeCharacter; + // NOTE: We don't need to append the character of interest + // immediately since the next constant copy operation will get it + begin_pos = i; + } + } + logtype.append(constant, begin_pos, constant_len - begin_pos); +} } // namespace ir diff --git a/components/core/src/ir/parsing.hpp b/components/core/src/ir/parsing.hpp index a5d447728..5ee93f340 100644 --- a/components/core/src/ir/parsing.hpp +++ b/components/core/src/ir/parsing.hpp @@ -78,11 +78,14 @@ bool is_var(std::string_view value); * next variable * @return true if a variable was found, false otherwise */ -bool get_bounds_of_next_var( - std::string_view str, - size_t& begin_pos, - size_t& end_pos -); +bool get_bounds_of_next_var(std::string_view str, size_t& begin_pos, size_t& end_pos); + +/** + * Appends the given constant to the logtype, escaping any variable placeholders + * @param constant + * @param logtype + */ +void escape_and_append_constant_to_logtype(std::string_view constant, std::string& logtype); } // namespace ir #endif // IR_PARSING_HPP diff --git a/components/core/src/utils/make_dictionaries_readable/README.md b/components/core/src/utils/make_dictionaries_readable/README.md index 4a232a285..c3d574ef6 100644 --- a/components/core/src/utils/make_dictionaries_readable/README.md +++ b/components/core/src/utils/make_dictionaries_readable/README.md @@ -4,6 +4,6 @@ For a dictionary, `make-dictionaries-readable` prints one entry per line. For log type dictionary entries, this requires making some characters printable: * Newlines are replaced with `\n` -* Dictionary variable delimiters are replaced with `\d` -* Non-dictionary integer variable delimiters are replaced with `\i` -* Non-dictionary float variable delimiters are replaced with `\f` +* Dictionary variable placeholders are replaced with `\d` +* Non-dictionary integer variable placeholders are replaced with `\i` +* Non-dictionary float variable placeholders are replaced with `\f` diff --git a/components/core/src/utils/make_dictionaries_readable/make-dictionaries-readable.cpp b/components/core/src/utils/make_dictionaries_readable/make-dictionaries-readable.cpp index a84e8e08a..d1785038f 100644 --- a/components/core/src/utils/make_dictionaries_readable/make-dictionaries-readable.cpp +++ b/components/core/src/utils/make_dictionaries_readable/make-dictionaries-readable.cpp @@ -10,6 +10,7 @@ // Project headers #include "../../FileWriter.hpp" +#include "../../ir/parsing.hpp" #include "../../LogTypeDictionaryReader.hpp" #include "../../spdlog_with_specializations.hpp" #include "../../streaming_archive/Constants.hpp" @@ -66,28 +67,28 @@ int main (int argc, const char* argv[]) { size_t constant_begin_pos = 0; for (size_t var_ix = 0; var_ix < entry.get_num_vars(); ++var_ix) { - LogTypeDictionaryEntry::VarDelim var_delim; - size_t var_pos = entry.get_var_info(var_ix, var_delim); + ir::VariablePlaceholder var_placeholder; + size_t var_pos = entry.get_var_info(var_ix, var_placeholder); // Add the constant that's between the last variable and this one, with newlines escaped human_readable_value.append(value, constant_begin_pos, var_pos - constant_begin_pos); - switch (var_delim) { - case LogTypeDictionaryEntry::VarDelim::Integer: + switch (var_placeholder) { + case ir::VariablePlaceholder::Integer: human_readable_value += "\\i"; break; - case LogTypeDictionaryEntry::VarDelim::Float: + case ir::VariablePlaceholder::Float: human_readable_value += "\\f"; break; - case LogTypeDictionaryEntry::VarDelim::Dictionary: + case ir::VariablePlaceholder::Dictionary: human_readable_value += "\\d"; break; default: SPDLOG_ERROR("Logtype '{}' contains unexpected variable placeholder 0x{:x}", - value, enum_to_underlying_type(var_delim)); + value, enum_to_underlying_type(var_placeholder)); return -1; } - // Move past the variable delimiter + // Move past the variable placeholder constant_begin_pos = var_pos + 1; } // Append remainder of value, if any diff --git a/components/core/tests/test-EncodedVariableInterpreter.cpp b/components/core/tests/test-EncodedVariableInterpreter.cpp index 9353e949a..c95971204 100644 --- a/components/core/tests/test-EncodedVariableInterpreter.cpp +++ b/components/core/tests/test-EncodedVariableInterpreter.cpp @@ -245,10 +245,10 @@ TEST_CASE("EncodedVariableInterpreter", "[EncodedVariableInterpreter]") { // Test var_ids is correctly populated size_t encoded_var_id_ix = 0; - LogTypeDictionaryEntry::VarDelim var_placeholder; + ir::VariablePlaceholder var_placeholder; for (auto var_ix = 0; var_ix < logtype_dict_entry.get_num_vars(); var_ix++) { std::ignore = logtype_dict_entry.get_var_info(var_ix, var_placeholder); - if (LogTypeDictionaryEntry::VarDelim::Dictionary == var_placeholder) { + if (ir::VariablePlaceholder::Dictionary == var_placeholder) { auto var = encoded_vars[var_ix]; REQUIRE(var_ids.size() > encoded_var_id_ix); REQUIRE(EncodedVariableInterpreter::decode_var_dict_id(var) == From 22dc8206063034a87c63a927644323f2f3e49ea1 Mon Sep 17 00:00:00 2001 From: kirkrodrigues <2454684+kirkrodrigues@users.noreply.github.com> Date: Sun, 17 Sep 2023 17:14:58 -0400 Subject: [PATCH 07/30] Apply suggestions from code review Co-authored-by: Lin Zhihao <59785146+LinZhihao-723@users.noreply.github.com> --- components/core/src/LogTypeDictionaryEntry.cpp | 3 +-- components/core/src/ir/parsing.cpp | 4 ++-- components/core/src/ir/parsing.hpp | 2 +- 3 files changed, 4 insertions(+), 5 deletions(-) diff --git a/components/core/src/LogTypeDictionaryEntry.cpp b/components/core/src/LogTypeDictionaryEntry.cpp index e8c89949a..1d6ec4a7c 100644 --- a/components/core/src/LogTypeDictionaryEntry.cpp +++ b/components/core/src/LogTypeDictionaryEntry.cpp @@ -118,8 +118,7 @@ ErrorCode LogTypeDictionaryEntry::try_read_from_file (streaming_compression::Dec add_constant(constant, 0, constant.length()); constant.clear(); add_float_var(); - } else if (enum_to_underlying_type(ir::VariablePlaceholder::Dictionary) == c) - { + } else if (enum_to_underlying_type(ir::VariablePlaceholder::Dictionary) == c) { add_constant(constant, 0, constant.length()); constant.clear(); add_dictionary_var(); diff --git a/components/core/src/ir/parsing.cpp b/components/core/src/ir/parsing.cpp index 1fa59be34..27678a418 100644 --- a/components/core/src/ir/parsing.cpp +++ b/components/core/src/ir/parsing.cpp @@ -36,7 +36,7 @@ bool is_var(std::string_view value) { bool get_bounds_of_next_var(string_view const str, size_t& begin_pos, size_t& end_pos) { auto const msg_length = str.length(); - if (end_pos >= msg_length) { + if (msg_length <= end_pos) { return false; } @@ -77,7 +77,7 @@ bool get_bounds_of_next_var(string_view const str, size_t& begin_pos, size_t& en // - it's directly preceded by '=' and contains an alphabet char, or // - it could be a multi-digit hex value if (contains_decimal_digit - || (begin_pos > 0 && '=' == str[begin_pos - 1] && contains_alphabet) + || (0 < begin_pos && '=' == str[begin_pos - 1] && contains_alphabet) || could_be_multi_digit_hex_value(variable)) { break; diff --git a/components/core/src/ir/parsing.hpp b/components/core/src/ir/parsing.hpp index 5ee93f340..d4a738366 100644 --- a/components/core/src/ir/parsing.hpp +++ b/components/core/src/ir/parsing.hpp @@ -49,7 +49,7 @@ inline bool could_be_multi_digit_hex_value(std::string_view str) { // NOTE: This is 1-2% faster than using std::all_of with the opposite // condition for (auto c : str) { - if (!(('a' <= c && c <= 'f') || ('A' <= c && c <= 'F') || ('0' <= c && c <= '9'))) { + if (false == (('a' <= c && c <= 'f') || ('A' <= c && c <= 'F') || ('0' <= c && c <= '9'))) { return false; } } From 10e339a55748d313c72c2fba99b2ec5668d63113 Mon Sep 17 00:00:00 2001 From: Kirk Rodrigues <2454684+kirkrodrigues@users.noreply.github.com> Date: Sun, 17 Sep 2023 17:15:43 -0400 Subject: [PATCH 08/30] Add suggestion from code review with formatting. --- components/core/src/ir/parsing.cpp | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/components/core/src/ir/parsing.cpp b/components/core/src/ir/parsing.cpp index 27678a418..940252671 100644 --- a/components/core/src/ir/parsing.cpp +++ b/components/core/src/ir/parsing.cpp @@ -11,10 +11,9 @@ namespace ir { * ranges of characters at a time instead of comparing individual characters */ bool is_delim(signed char c) { - return !( - '+' == c || ('-' <= c && c <= '.') || ('0' <= c && c <= '9') || ('A' <= c && c <= 'Z') - || '\\' == c || '_' == c || ('a' <= c && c <= 'z') - ); + return false + == ('+' == c || ('-' <= c && c <= '.') || ('0' <= c && c <= '9') + || ('A' <= c && c <= 'Z') || '\\' == c || '_' == c || ('a' <= c && c <= 'z')); } bool is_variable_placeholder(char c) { From 22bfa84c7271a7bae4d8bb3f04d1bbae6563fc9b Mon Sep 17 00:00:00 2001 From: LittleStar <59785146+LinZhihao-723@users.noreply.github.com> Date: Sun, 17 Sep 2023 20:43:03 -0400 Subject: [PATCH 09/30] Fix dictionary collision for compression/decompression --- components/core/CMakeLists.txt | 2 ++ .../core/src/EncodedVariableInterpreter.cpp | 2 ++ .../core/src/LogTypeDictionaryEntry.cpp | 20 +++++++++++++------ .../core/src/LogTypeDictionaryEntry.hpp | 11 ++++++++++ components/core/src/ir/parsing.hpp | 1 + 5 files changed, 30 insertions(+), 6 deletions(-) diff --git a/components/core/CMakeLists.txt b/components/core/CMakeLists.txt index 1cc7a43c3..7dbe74bf5 100644 --- a/components/core/CMakeLists.txt +++ b/components/core/CMakeLists.txt @@ -7,6 +7,8 @@ if (NOT CMAKE_BUILD_TYPE AND NOT CMAKE_CONFIGURATION_TYPES) set(CMAKE_BUILD_TYPE "${default_build_type}" CACHE STRING "Choose the type of build." FORCE) endif() +set(CMAKE_EXPORT_COMPILE_COMMANDS ON) + # Set general compressor set(GENERAL_COMPRESSOR "zstd" CACHE STRING "The general-purpose compressor used as the 2nd-stage compressor") set_property(CACHE GENERAL_COMPRESSOR PROPERTY STRINGS passthrough zstd) diff --git a/components/core/src/EncodedVariableInterpreter.cpp b/components/core/src/EncodedVariableInterpreter.cpp index df241b3a4..62fa78e34 100644 --- a/components/core/src/EncodedVariableInterpreter.cpp +++ b/components/core/src/EncodedVariableInterpreter.cpp @@ -319,6 +319,8 @@ bool EncodedVariableInterpreter::decode_variables_into_message (const LogTypeDic var_dict_id = decode_var_dict_id(encoded_vars[i]); decompressed_msg += var_dict.get_value(var_dict_id); break; + case ir::VariablePlaceholder::Escape: + break; default: SPDLOG_ERROR( "EncodedVariableInterpreter: Logtype '{}' contains " diff --git a/components/core/src/LogTypeDictionaryEntry.cpp b/components/core/src/LogTypeDictionaryEntry.cpp index 1d6ec4a7c..9166892eb 100644 --- a/components/core/src/LogTypeDictionaryEntry.cpp +++ b/components/core/src/LogTypeDictionaryEntry.cpp @@ -46,18 +46,25 @@ void LogTypeDictionaryEntry::add_float_var () { add_float_var(m_value); } +void LogTypeDictionaryEntry::add_escape_var() { + m_var_positions.push_back(m_value.length()); + add_escape_var(m_value); +} + bool LogTypeDictionaryEntry::parse_next_var (const string& msg, size_t& var_begin_pos, size_t& var_end_pos, string& var) { auto last_var_end_pos = var_end_pos; if (ir::get_bounds_of_next_var(msg, var_begin_pos, var_end_pos)) { // Append to log type: from end of last variable to start of current variable - add_constant(msg, last_var_end_pos, var_begin_pos - last_var_end_pos); + auto constant = std::string_view(msg).substr(last_var_end_pos, var_begin_pos - last_var_end_pos); + ir::escape_and_append_constant_to_logtype(constant, m_value); var.assign(msg, var_begin_pos, var_end_pos - var_begin_pos); return true; } if (last_var_end_pos < msg.length()) { // Append to log type: from end of last variable to end - add_constant(msg, last_var_end_pos, msg.length() - last_var_end_pos); + auto constant = std::string_view(msg).substr(last_var_end_pos, msg.length() - last_var_end_pos); + ir::escape_and_append_constant_to_logtype(constant, m_value); } return false; @@ -71,10 +78,8 @@ void LogTypeDictionaryEntry::clear () { void LogTypeDictionaryEntry::write_to_file (streaming_compression::Compressor& compressor) const { compressor.write_numeric_value(m_id); - string escaped_value; - get_value_with_unfounded_variables_escaped(escaped_value); - compressor.write_numeric_value(escaped_value.length()); - compressor.write_string(escaped_value); + compressor.write_numeric_value(m_value.length()); + compressor.write_string(m_value); } ErrorCode LogTypeDictionaryEntry::try_read_from_file (streaming_compression::Decompressor& decompressor) { @@ -109,6 +114,9 @@ ErrorCode LogTypeDictionaryEntry::try_read_from_file (streaming_compression::Dec is_escaped = false; } else if (ir::cVariablePlaceholderEscapeCharacter == c) { is_escaped = true; + add_constant(constant, 0, constant.length()); + constant.clear(); + add_escape_var(); } else { if (enum_to_underlying_type(ir::VariablePlaceholder::Integer) == c) { add_constant(constant, 0, constant.length()); diff --git a/components/core/src/LogTypeDictionaryEntry.hpp b/components/core/src/LogTypeDictionaryEntry.hpp index aade2fbad..35bef1e60 100644 --- a/components/core/src/LogTypeDictionaryEntry.hpp +++ b/components/core/src/LogTypeDictionaryEntry.hpp @@ -63,6 +63,13 @@ class LogTypeDictionaryEntry : public DictionaryEntry { static void add_float_var (std::string& logtype) { logtype += enum_to_underlying_type(ir::VariablePlaceholder::Float); } + /** + * Adds an escape variable placeholder to the given logtype + * @param logtype + */ + static void add_escape_var (std::string& logtype) { + logtype += enum_to_underlying_type(ir::VariablePlaceholder::Escape); + } size_t get_num_vars () const { return m_var_positions.size(); } /** @@ -98,6 +105,10 @@ class LogTypeDictionaryEntry : public DictionaryEntry { * Adds a dictionary variable placeholder */ void add_dictionary_var (); + /** + * Adds an escape variable placeholder + */ + void add_escape_var (); /** * Parses next variable from a message, constructing the constant part of the message's logtype as well diff --git a/components/core/src/ir/parsing.hpp b/components/core/src/ir/parsing.hpp index d4a738366..2aa358623 100644 --- a/components/core/src/ir/parsing.hpp +++ b/components/core/src/ir/parsing.hpp @@ -17,6 +17,7 @@ enum class VariablePlaceholder : char { Integer = 0x11, Dictionary = 0x12, Float = 0x13, + Escape = 0x5c, }; constexpr char cVariablePlaceholderEscapeCharacter = '\\'; From 7d6d7494291471b5b454b32a964979b14505012a Mon Sep 17 00:00:00 2001 From: LittleStar <59785146+LinZhihao-723@users.noreply.github.com> Date: Mon, 18 Sep 2023 10:26:25 -0400 Subject: [PATCH 10/30] Fix the generated log type --- components/core/src/Grep.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/components/core/src/Grep.cpp b/components/core/src/Grep.cpp index 2ab4f7875..3b2c22909 100644 --- a/components/core/src/Grep.cpp +++ b/components/core/src/Grep.cpp @@ -370,6 +370,9 @@ SubQueryMatchabilityResult generate_logtypes_and_vars_for_subquery (const Archiv logtype.append(processed_search_string, last_token_end_pos, string::npos); last_token_end_pos = processed_search_string.length(); } + std::string const uncleaned_logtype = logtype; + logtype.clear(); + ir::escape_and_append_constant_to_logtype(uncleaned_logtype, logtype); if ("*" == logtype) { // Logtype will match all messages From 8ebf5dedb39af7fd7b9b85066586e1f4c8f518ae Mon Sep 17 00:00:00 2001 From: LittleStar <59785146+LinZhihao-723@users.noreply.github.com> Date: Mon, 18 Sep 2023 14:41:48 -0400 Subject: [PATCH 11/30] Fix var tracking --- .../core/src/EncodedVariableInterpreter.cpp | 21 +++++---- .../core/src/LogTypeDictionaryEntry.cpp | 46 +++++-------------- .../core/src/LogTypeDictionaryEntry.hpp | 31 +++++++------ .../src/streaming_archive/reader/File.cpp | 6 +-- .../make-dictionaries-readable.cpp | 10 ++-- .../tests/test-EncodedVariableInterpreter.cpp | 6 +-- 6 files changed, 53 insertions(+), 67 deletions(-) diff --git a/components/core/src/EncodedVariableInterpreter.cpp b/components/core/src/EncodedVariableInterpreter.cpp index 62fa78e34..40326d994 100644 --- a/components/core/src/EncodedVariableInterpreter.cpp +++ b/components/core/src/EncodedVariableInterpreter.cpp @@ -287,13 +287,14 @@ void EncodedVariableInterpreter::encode_and_add_to_dictionary( bool EncodedVariableInterpreter::decode_variables_into_message (const LogTypeDictionaryEntry& logtype_dict_entry, const VariableDictionaryReader& var_dict, const vector& encoded_vars, string& decompressed_msg) { - size_t num_vars_in_logtype = logtype_dict_entry.get_num_vars(); + size_t const num_placeholders_in_logtype = logtype_dict_entry.get_num_placeholders(); + size_t const num_vars = logtype_dict_entry.get_num_variables(); // Ensure the number of variables in the logtype matches the number of encoded variables given const auto& logtype_value = logtype_dict_entry.get_value(); - if (num_vars_in_logtype != encoded_vars.size()) { + if (num_vars != encoded_vars.size()) { SPDLOG_ERROR("EncodedVariableInterpreter: Logtype '{}' contains {} variables, but {} were given for decoding.", logtype_value.c_str(), - num_vars_in_logtype, encoded_vars.size()); + num_vars, encoded_vars.size()); return false; } @@ -301,22 +302,22 @@ bool EncodedVariableInterpreter::decode_variables_into_message (const LogTypeDic size_t constant_begin_pos = 0; string float_str; variable_dictionary_id_t var_dict_id; - for (size_t i = 0; i < num_vars_in_logtype; ++i) { - size_t var_position = logtype_dict_entry.get_var_info(i, var_placeholder); + for (size_t placeholder_ix = 0, var_ix = 0; placeholder_ix < num_placeholders_in_logtype; ++placeholder_ix) { + size_t placeholder_position = logtype_dict_entry.get_placeholder_info(placeholder_ix, var_placeholder); // Add the constant that's between the last variable and this one decompressed_msg.append(logtype_value, constant_begin_pos, - var_position - constant_begin_pos); + placeholder_position - constant_begin_pos); switch (var_placeholder) { case ir::VariablePlaceholder::Integer: - decompressed_msg += std::to_string(encoded_vars[i]); + decompressed_msg += std::to_string(encoded_vars[var_ix++]); break; case ir::VariablePlaceholder::Float: - convert_encoded_float_to_string(encoded_vars[i], float_str); + convert_encoded_float_to_string(encoded_vars[var_ix++], float_str); decompressed_msg += float_str; break; case ir::VariablePlaceholder::Dictionary: - var_dict_id = decode_var_dict_id(encoded_vars[i]); + var_dict_id = decode_var_dict_id(encoded_vars[var_ix++]); decompressed_msg += var_dict.get_value(var_dict_id); break; case ir::VariablePlaceholder::Escape: @@ -330,7 +331,7 @@ bool EncodedVariableInterpreter::decode_variables_into_message (const LogTypeDic return false; } // Move past the variable placeholder - constant_begin_pos = var_position + 1; + constant_begin_pos = placeholder_position + 1; } // Append remainder of logtype, if any if (constant_begin_pos < logtype_value.length()) { diff --git a/components/core/src/LogTypeDictionaryEntry.cpp b/components/core/src/LogTypeDictionaryEntry.cpp index 9166892eb..9c3b31314 100644 --- a/components/core/src/LogTypeDictionaryEntry.cpp +++ b/components/core/src/LogTypeDictionaryEntry.cpp @@ -7,23 +7,23 @@ using std::string; -size_t LogTypeDictionaryEntry::get_var_info( +size_t LogTypeDictionaryEntry::get_placeholder_info( size_t var_ix, ir::VariablePlaceholder& var_placeholder ) const { - if (var_ix >= m_var_positions.size()) { + if (var_ix >= m_placeholder_positions.size()) { return SIZE_MAX; } - auto var_position = m_var_positions[var_ix]; + auto var_position = m_placeholder_positions[var_ix]; var_placeholder = static_cast(m_value[var_position]); - return m_var_positions[var_ix]; + return m_placeholder_positions[var_ix]; } size_t LogTypeDictionaryEntry::get_data_size () const { // NOTE: sizeof(vector[0]) is executed at compile time so there's no risk of an exception at runtime - return sizeof(m_id) + m_value.length() + m_var_positions.size() * sizeof(m_var_positions[0]) + + return sizeof(m_id) + m_value.length() + m_placeholder_positions.size() * sizeof(m_placeholder_positions[0]) + m_ids_of_segments_containing_entry.size() * sizeof(segment_id_t); } @@ -32,23 +32,24 @@ void LogTypeDictionaryEntry::add_constant (const string& value_containing_consta } void LogTypeDictionaryEntry::add_dictionary_var () { - m_var_positions.push_back(m_value.length()); + m_placeholder_positions.push_back(m_value.length()); add_dict_var(m_value); } void LogTypeDictionaryEntry::add_int_var () { - m_var_positions.push_back(m_value.length()); + m_placeholder_positions.push_back(m_value.length()); add_int_var(m_value); } void LogTypeDictionaryEntry::add_float_var () { - m_var_positions.push_back(m_value.length()); + m_placeholder_positions.push_back(m_value.length()); add_float_var(m_value); } void LogTypeDictionaryEntry::add_escape_var() { - m_var_positions.push_back(m_value.length()); + m_placeholder_positions.push_back(m_value.length()); add_escape_var(m_value); + ++m_num_escape_placeholders; } bool LogTypeDictionaryEntry::parse_next_var (const string& msg, size_t& var_begin_pos, size_t& var_end_pos, string& var) { @@ -72,7 +73,8 @@ bool LogTypeDictionaryEntry::parse_next_var (const string& msg, size_t& var_begi void LogTypeDictionaryEntry::clear () { m_value.clear(); - m_var_positions.clear(); + m_placeholder_positions.clear(); + m_num_escape_placeholders = 0; } void LogTypeDictionaryEntry::write_to_file (streaming_compression::Compressor& compressor) const { @@ -149,27 +151,3 @@ void LogTypeDictionaryEntry::read_from_file (streaming_compression::Decompressor throw OperationFailed(error_code, __FILENAME__, __LINE__); } } - -void LogTypeDictionaryEntry::get_value_with_unfounded_variables_escaped (string& escaped_logtype_value) const { - auto value_view = static_cast(m_value); - size_t begin_ix = 0; - // Reset escaped value and reserve enough space to at least contain the whole value - escaped_logtype_value.clear(); - escaped_logtype_value.reserve(value_view.length()); - for (auto var_position : m_var_positions) { - size_t end_ix = var_position; - - ir::escape_and_append_constant_to_logtype( - value_view.substr(begin_ix, end_ix - begin_ix), - escaped_logtype_value - ); - - // Add variable placeholder - escaped_logtype_value += value_view[end_ix]; - - // Move begin to start of next portion of logtype between variables - begin_ix = end_ix + 1; - } - // Escape any variable placeholders in remainder of value - ir::escape_and_append_constant_to_logtype(value_view.substr(begin_ix), escaped_logtype_value); -} diff --git a/components/core/src/LogTypeDictionaryEntry.hpp b/components/core/src/LogTypeDictionaryEntry.hpp index 35bef1e60..25168f9d8 100644 --- a/components/core/src/LogTypeDictionaryEntry.hpp +++ b/components/core/src/LogTypeDictionaryEntry.hpp @@ -33,7 +33,7 @@ class LogTypeDictionaryEntry : public DictionaryEntry { }; // Constructors - LogTypeDictionaryEntry () = default; + LogTypeDictionaryEntry () : m_num_escape_placeholders{0} {}; // Use default copy constructor LogTypeDictionaryEntry (const LogTypeDictionaryEntry&) = default; @@ -71,14 +71,26 @@ class LogTypeDictionaryEntry : public DictionaryEntry { logtype += enum_to_underlying_type(ir::VariablePlaceholder::Escape); } - size_t get_num_vars () const { return m_var_positions.size(); } /** - * Gets all info about a variable in the logtype + * @return The total number of placeholders on record. The placeholders may + * represent an escape characters. + */ + size_t get_num_placeholders () const { return m_placeholder_positions.size(); } + + /** + * @return The total number of variables on record. + */ + size_t get_num_variables () const { + return m_placeholder_positions.size() - m_num_escape_placeholders; + } + + /** + * Gets all info about a placeholder in the logtype * @param var_ix The index of the variable to get the info for * @param var_placeholder * @return The variable's position in the logtype, or SIZE_MAX if var_ix is out of bounds */ - size_t get_var_info (size_t var_ix, ir::VariablePlaceholder& var_placeholder) const; + size_t get_placeholder_info (size_t var_ix, ir::VariablePlaceholder& var_placeholder) const; /** * Gets the size (in-memory) of the data contained in this entry @@ -148,16 +160,9 @@ class LogTypeDictionaryEntry : public DictionaryEntry { void read_from_file (streaming_compression::Decompressor& decompressor); private: - // Methods - /** - * Escapes any variable placeholders that don't correspond to the positions - * of variables in the logtype entry's value - * @param escaped_logtype_value - */ - void get_value_with_unfounded_variables_escaped (std::string& escaped_logtype_value) const; - // Variables - std::vector m_var_positions; + std::vector m_placeholder_positions; + size_t m_num_escape_placeholders; }; #endif // LOGTYPEDICTIONARYENTRY_HPP diff --git a/components/core/src/streaming_archive/reader/File.cpp b/components/core/src/streaming_archive/reader/File.cpp index 346585fbc..afd0f048c 100644 --- a/components/core/src/streaming_archive/reader/File.cpp +++ b/components/core/src/streaming_archive/reader/File.cpp @@ -212,7 +212,7 @@ namespace streaming_archive::reader { // Get number of variables in logtype const auto& logtype_dictionary_entry = m_archive_logtype_dict->get_entry(logtype_id); - auto num_vars = logtype_dictionary_entry.get_num_vars(); + auto const num_vars = logtype_dictionary_entry.get_num_variables(); auto timestamp = m_timestamps[m_msgs_ix]; if (search_begin_timestamp <= timestamp && timestamp <= search_end_timestamp) { @@ -253,7 +253,7 @@ namespace streaming_archive::reader { // Get number of variables in logtype const auto& logtype_dictionary_entry = m_archive_logtype_dict->get_entry(logtype_id); - auto num_vars = logtype_dictionary_entry.get_num_vars(); + auto const num_vars = logtype_dictionary_entry.get_num_variables(); for (auto sub_query : query.get_relevant_sub_queries()) { // Check if logtype matches search @@ -315,7 +315,7 @@ namespace streaming_archive::reader { // Get variables msg.clear_vars(); const auto& logtype_dictionary_entry = m_archive_logtype_dict->get_entry(logtype_id); - auto num_vars = logtype_dictionary_entry.get_num_vars(); + auto const num_vars = logtype_dictionary_entry.get_num_variables(); if (m_variables_ix + num_vars > m_num_variables) { return false; } diff --git a/components/core/src/utils/make_dictionaries_readable/make-dictionaries-readable.cpp b/components/core/src/utils/make_dictionaries_readable/make-dictionaries-readable.cpp index d1785038f..4d992de32 100644 --- a/components/core/src/utils/make_dictionaries_readable/make-dictionaries-readable.cpp +++ b/components/core/src/utils/make_dictionaries_readable/make-dictionaries-readable.cpp @@ -66,12 +66,12 @@ int main (int argc, const char* argv[]) { human_readable_value.clear(); size_t constant_begin_pos = 0; - for (size_t var_ix = 0; var_ix < entry.get_num_vars(); ++var_ix) { + for (size_t placeholder_ix = 0; placeholder_ix < entry.get_num_placeholders(); ++placeholder_ix) { ir::VariablePlaceholder var_placeholder; - size_t var_pos = entry.get_var_info(var_ix, var_placeholder); + size_t placeholder_pos = entry.get_placeholder_info(placeholder_ix, var_placeholder); // Add the constant that's between the last variable and this one, with newlines escaped - human_readable_value.append(value, constant_begin_pos, var_pos - constant_begin_pos); + human_readable_value.append(value, constant_begin_pos, placeholder_pos - constant_begin_pos); switch (var_placeholder) { case ir::VariablePlaceholder::Integer: @@ -83,13 +83,15 @@ int main (int argc, const char* argv[]) { case ir::VariablePlaceholder::Dictionary: human_readable_value += "\\d"; break; + case ir::VariablePlaceholder::Escape: + break; default: SPDLOG_ERROR("Logtype '{}' contains unexpected variable placeholder 0x{:x}", value, enum_to_underlying_type(var_placeholder)); return -1; } // Move past the variable placeholder - constant_begin_pos = var_pos + 1; + constant_begin_pos = placeholder_pos + 1; } // Append remainder of value, if any if (constant_begin_pos < value.length()) { diff --git a/components/core/tests/test-EncodedVariableInterpreter.cpp b/components/core/tests/test-EncodedVariableInterpreter.cpp index c95971204..97d9247d1 100644 --- a/components/core/tests/test-EncodedVariableInterpreter.cpp +++ b/components/core/tests/test-EncodedVariableInterpreter.cpp @@ -246,10 +246,10 @@ TEST_CASE("EncodedVariableInterpreter", "[EncodedVariableInterpreter]") { // Test var_ids is correctly populated size_t encoded_var_id_ix = 0; ir::VariablePlaceholder var_placeholder; - for (auto var_ix = 0; var_ix < logtype_dict_entry.get_num_vars(); var_ix++) { - std::ignore = logtype_dict_entry.get_var_info(var_ix, var_placeholder); + for (auto placeholder_ix = 0; placeholder_ix < logtype_dict_entry.get_num_placeholders(); placeholder_ix++) { + std::ignore = logtype_dict_entry.get_placeholder_info(placeholder_ix, var_placeholder); if (ir::VariablePlaceholder::Dictionary == var_placeholder) { - auto var = encoded_vars[var_ix]; + auto var = encoded_vars[placeholder_ix]; REQUIRE(var_ids.size() > encoded_var_id_ix); REQUIRE(EncodedVariableInterpreter::decode_var_dict_id(var) == var_ids[encoded_var_id_ix]); From 5963bbd84f6fd476e44433beb84934c9fcd2efcd Mon Sep 17 00:00:00 2001 From: LittleStar <59785146+LinZhihao-723@users.noreply.github.com> Date: Mon, 18 Sep 2023 14:44:09 -0400 Subject: [PATCH 12/30] Add missing const --- .../make_dictionaries_readable/make-dictionaries-readable.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/components/core/src/utils/make_dictionaries_readable/make-dictionaries-readable.cpp b/components/core/src/utils/make_dictionaries_readable/make-dictionaries-readable.cpp index 4d992de32..c6c4d32e4 100644 --- a/components/core/src/utils/make_dictionaries_readable/make-dictionaries-readable.cpp +++ b/components/core/src/utils/make_dictionaries_readable/make-dictionaries-readable.cpp @@ -68,7 +68,7 @@ int main (int argc, const char* argv[]) { size_t constant_begin_pos = 0; for (size_t placeholder_ix = 0; placeholder_ix < entry.get_num_placeholders(); ++placeholder_ix) { ir::VariablePlaceholder var_placeholder; - size_t placeholder_pos = entry.get_placeholder_info(placeholder_ix, var_placeholder); + size_t const placeholder_pos = entry.get_placeholder_info(placeholder_ix, var_placeholder); // Add the constant that's between the last variable and this one, with newlines escaped human_readable_value.append(value, constant_begin_pos, placeholder_pos - constant_begin_pos); From 6199eec762f7a79e1f17eda7293faa0ca2650fc0 Mon Sep 17 00:00:00 2001 From: LittleStar <59785146+LinZhihao-723@users.noreply.github.com> Date: Mon, 18 Sep 2023 16:22:04 -0400 Subject: [PATCH 13/30] Fix search --- components/core/src/Grep.cpp | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/components/core/src/Grep.cpp b/components/core/src/Grep.cpp index 3b2c22909..a1ae52efa 100644 --- a/components/core/src/Grep.cpp +++ b/components/core/src/Grep.cpp @@ -358,7 +358,7 @@ SubQueryMatchabilityResult generate_logtypes_and_vars_for_subquery (const Archiv } } else { if (!query_token.is_var()) { - logtype += query_token.get_value(); + ir::escape_and_append_constant_to_logtype(query_token.get_value(), logtype); } else if (!process_var_token(query_token, archive, ignore_case, sub_query, logtype)) { return SubQueryMatchabilityResult::WontMatch; } @@ -370,9 +370,6 @@ SubQueryMatchabilityResult generate_logtypes_and_vars_for_subquery (const Archiv logtype.append(processed_search_string, last_token_end_pos, string::npos); last_token_end_pos = processed_search_string.length(); } - std::string const uncleaned_logtype = logtype; - logtype.clear(); - ir::escape_and_append_constant_to_logtype(uncleaned_logtype, logtype); if ("*" == logtype) { // Logtype will match all messages From 40f02b0591127ff004a186c5b8e88306e7b7a9eb Mon Sep 17 00:00:00 2001 From: LittleStar <59785146+LinZhihao-723@users.noreply.github.com> Date: Mon, 18 Sep 2023 17:26:04 -0400 Subject: [PATCH 14/30] Track the number of escape placeholders when parsing the logtype in the dictionary entry. Add unittests --- .../core/src/LogTypeDictionaryEntry.cpp | 4 ++-- components/core/src/ir/parsing.cpp | 24 +++++++++++++++++++ components/core/src/ir/parsing.hpp | 16 +++++++++++++ .../tests/test-EncodedVariableInterpreter.cpp | 15 +++++++++++- 4 files changed, 56 insertions(+), 3 deletions(-) diff --git a/components/core/src/LogTypeDictionaryEntry.cpp b/components/core/src/LogTypeDictionaryEntry.cpp index 9c3b31314..03aeda970 100644 --- a/components/core/src/LogTypeDictionaryEntry.cpp +++ b/components/core/src/LogTypeDictionaryEntry.cpp @@ -57,7 +57,7 @@ bool LogTypeDictionaryEntry::parse_next_var (const string& msg, size_t& var_begi if (ir::get_bounds_of_next_var(msg, var_begin_pos, var_end_pos)) { // Append to log type: from end of last variable to start of current variable auto constant = std::string_view(msg).substr(last_var_end_pos, var_begin_pos - last_var_end_pos); - ir::escape_and_append_constant_to_logtype(constant, m_value); + m_num_escape_placeholders += ir::escape_and_append_constant_to_logtype_with_tracking(constant, m_value, m_placeholder_positions); var.assign(msg, var_begin_pos, var_end_pos - var_begin_pos); return true; @@ -65,7 +65,7 @@ bool LogTypeDictionaryEntry::parse_next_var (const string& msg, size_t& var_begi if (last_var_end_pos < msg.length()) { // Append to log type: from end of last variable to end auto constant = std::string_view(msg).substr(last_var_end_pos, msg.length() - last_var_end_pos); - ir::escape_and_append_constant_to_logtype(constant, m_value); + m_num_escape_placeholders += ir::escape_and_append_constant_to_logtype_with_tracking(constant, m_value, m_placeholder_positions); } return false; diff --git a/components/core/src/ir/parsing.cpp b/components/core/src/ir/parsing.cpp index 940252671..f09806345 100644 --- a/components/core/src/ir/parsing.cpp +++ b/components/core/src/ir/parsing.cpp @@ -101,4 +101,28 @@ void escape_and_append_constant_to_logtype(string_view constant, std::string& lo } logtype.append(constant, begin_pos, constant_len - begin_pos); } + +size_t escape_and_append_constant_to_logtype_with_tracking( + std::string_view constant, + std::string& logtype, + std::vector& escape_placeholder_positions +) { + size_t num_escape_placeholder_added = 0; + size_t begin_pos = 0; + auto constant_len = constant.length(); + for (size_t i = 0; i < constant_len; ++i) { + auto c = constant[i]; + if (cVariablePlaceholderEscapeCharacter == c || is_variable_placeholder(c)) { + logtype.append(constant, begin_pos, i - begin_pos); + escape_placeholder_positions.push_back(logtype.size()); + ++num_escape_placeholder_added; + logtype += ir::cVariablePlaceholderEscapeCharacter; + // NOTE: We don't need to append the character of interest + // immediately since the next constant copy operation will get it + begin_pos = i; + } + } + logtype.append(constant, begin_pos, constant_len - begin_pos); + return num_escape_placeholder_added; +} } // namespace ir diff --git a/components/core/src/ir/parsing.hpp b/components/core/src/ir/parsing.hpp index 2aa358623..f2a124aaf 100644 --- a/components/core/src/ir/parsing.hpp +++ b/components/core/src/ir/parsing.hpp @@ -11,6 +11,7 @@ */ #include +#include namespace ir { enum class VariablePlaceholder : char { @@ -87,6 +88,21 @@ bool get_bounds_of_next_var(std::string_view str, size_t& begin_pos, size_t& end * @param logtype */ void escape_and_append_constant_to_logtype(std::string_view constant, std::string& logtype); + +/** + * Appends the given constant to the logtype, escaping any variable placeholders + * and append the position of escaped positions + * @param constant + * @param logtype + * @param escape_placeholder_positions The vector to append the positions of the + * added escape placeholders + * @return Total number of escape placeholders added + */ +[[maybe_unused]] size_t escape_and_append_constant_to_logtype_with_tracking( + std::string_view constant, + std::string& logtype, + std::vector& escape_placeholder_positions +); } // namespace ir #endif // IR_PARSING_HPP diff --git a/components/core/tests/test-EncodedVariableInterpreter.cpp b/components/core/tests/test-EncodedVariableInterpreter.cpp index 97d9247d1..239b91654 100644 --- a/components/core/tests/test-EncodedVariableInterpreter.cpp +++ b/components/core/tests/test-EncodedVariableInterpreter.cpp @@ -6,6 +6,7 @@ // Project headers #include "../src/EncodedVariableInterpreter.hpp" +#include "../src/ir/parsing.hpp" #include "../src/streaming_archive/Constants.hpp" using std::string; @@ -235,7 +236,10 @@ TEST_CASE("EncodedVariableInterpreter", "[EncodedVariableInterpreter]") { " and a very large int " + var_strs[1] + " and a double " + var_strs[2] + " and a weird double " + var_strs[3] + - " and a str with numbers " + var_strs[4]; + " and a str with numbers " + var_strs[4] + + " and an int placeholder " + enum_to_underlying_type(ir::VariablePlaceholder::Integer) + + " and a float placeholder " + enum_to_underlying_type(ir::VariablePlaceholder::Float) + + " and a dictionary placeholder " + enum_to_underlying_type(ir::VariablePlaceholder::Dictionary); LogTypeDictionaryEntry logtype_dict_entry; EncodedVariableInterpreter::encode_and_add_to_dictionary(msg, logtype_dict_entry, @@ -275,6 +279,15 @@ TEST_CASE("EncodedVariableInterpreter", "[EncodedVariableInterpreter]") { REQUIRE(EncodedVariableInterpreter::encode_and_search_dictionary(var_strs[3], var_dict_reader, false, search_logtype, sub_query)); search_logtype += " and a str with numbers "; REQUIRE(EncodedVariableInterpreter::encode_and_search_dictionary(var_strs[4], var_dict_reader, false, search_logtype, sub_query)); + search_logtype += " and an int placeholder "; + search_logtype += enum_to_underlying_type(ir::VariablePlaceholder::Escape); + search_logtype += enum_to_underlying_type(ir::VariablePlaceholder::Integer); + search_logtype += " and a float placeholder "; + search_logtype += enum_to_underlying_type(ir::VariablePlaceholder::Escape); + search_logtype += enum_to_underlying_type(ir::VariablePlaceholder::Float); + search_logtype += " and a dictionary placeholder "; + search_logtype += enum_to_underlying_type(ir::VariablePlaceholder::Escape); + search_logtype += enum_to_underlying_type(ir::VariablePlaceholder::Dictionary); auto& vars = sub_query.get_vars(); REQUIRE(vars.size() == encoded_vars.size()); for (size_t i = 0; i < vars.size(); ++i) { From a72d52f74c14d6ed0e935f347c7e75e4ee21b302 Mon Sep 17 00:00:00 2001 From: Kirk Rodrigues <2454684+kirkrodrigues@users.noreply.github.com> Date: Tue, 19 Sep 2023 00:05:28 -0400 Subject: [PATCH 15/30] Don't unescape logtypes when ingesting IR log events. --- .../core/src/EncodedVariableInterpreter.cpp | 2 +- .../src/ffi/ir_stream/decoding_methods.cpp | 2 +- .../src/ffi/ir_stream/decoding_methods.hpp | 3 +++ .../src/ffi/ir_stream/decoding_methods.inc | 18 +++++++++++------- 4 files changed, 16 insertions(+), 9 deletions(-) diff --git a/components/core/src/EncodedVariableInterpreter.cpp b/components/core/src/EncodedVariableInterpreter.cpp index 40326d994..486693c17 100644 --- a/components/core/src/EncodedVariableInterpreter.cpp +++ b/components/core/src/EncodedVariableInterpreter.cpp @@ -273,7 +273,7 @@ void EncodedVariableInterpreter::encode_and_add_to_dictionary( encoded_vars.push_back(encoded_var); }; - ffi::ir_stream::generic_decode_message( + ffi::ir_stream::generic_decode_message( log_event.get_logtype(), log_event.get_encoded_vars(), log_event.get_dict_vars(), diff --git a/components/core/src/ffi/ir_stream/decoding_methods.cpp b/components/core/src/ffi/ir_stream/decoding_methods.cpp index 167fc77cd..c1c7aac35 100644 --- a/components/core/src/ffi/ir_stream/decoding_methods.cpp +++ b/components/core/src/ffi/ir_stream/decoding_methods.cpp @@ -288,7 +288,7 @@ generic_decode_next_message(ReaderInterface& reader, string& message, epoch_time auto dict_var_handler = [&](string const& dict_var) { message.append(dict_var); }; try { - generic_decode_message( + generic_decode_message( logtype, encoded_vars, dict_vars, diff --git a/components/core/src/ffi/ir_stream/decoding_methods.hpp b/components/core/src/ffi/ir_stream/decoding_methods.hpp index dcf5c207d..b715d2b5c 100644 --- a/components/core/src/ffi/ir_stream/decoding_methods.hpp +++ b/components/core/src/ffi/ir_stream/decoding_methods.hpp @@ -74,6 +74,8 @@ auto deserialize_ir_message( /** * Decodes the IR message calls the given methods to handle each component of * the message + * @tparam unescape_logtype Whether to remove the escape characters from the + * logtype before calling \p ConstantHandler * @tparam encoded_variable_t Type of the encoded variable * @tparam ConstantHandler Method to handle constants in the logtype. * Signature: (const std::string&, size_t, size_t) -> void @@ -93,6 +95,7 @@ auto deserialize_ir_message( * @throw DecodingException if the message can not be decoded properly */ template < + bool unescape_logtype, typename encoded_variable_t, typename ConstantHandler, typename EncodedIntHandler, diff --git a/components/core/src/ffi/ir_stream/decoding_methods.inc b/components/core/src/ffi/ir_stream/decoding_methods.inc index 139688388..5964a95a9 100644 --- a/components/core/src/ffi/ir_stream/decoding_methods.inc +++ b/components/core/src/ffi/ir_stream/decoding_methods.inc @@ -11,6 +11,7 @@ namespace ffi::ir_stream { template < + bool unescape_logtype, typename encoded_variable_t, typename ConstantHandler, typename EncodedIntHandler, @@ -109,14 +110,17 @@ void generic_decode_message( cUnexpectedEscapeCharacterMessage ); } - constant_handler( - logtype, - next_static_text_begin_pos, - cur_pos - next_static_text_begin_pos - ); - // Skip the escape character - next_static_text_begin_pos = cur_pos + 1; + if constexpr (unescape_logtype) { + constant_handler( + logtype, + next_static_text_begin_pos, + cur_pos - next_static_text_begin_pos + ); + + // Skip the escape character + next_static_text_begin_pos = cur_pos + 1; + } // The character after the escape character is static text // (regardless of whether it is a variable placeholder), so // increment cur_pos by 1 to ensure we don't process the From 251b8a7732edd2d421d80612ed00459121f4852e Mon Sep 17 00:00:00 2001 From: LittleStar <59785146+LinZhihao-723@users.noreply.github.com> Date: Sat, 23 Sep 2023 16:40:35 -0700 Subject: [PATCH 16/30] Double escape when building the search logtype --- components/core/CMakeLists.txt | 4 +++ components/core/src/Grep.cpp | 12 ++++++-- components/core/src/ir/parsing.cpp | 37 ++++------------------ components/core/src/ir/parsing.hpp | 35 ++++++++++++++++----- components/core/src/ir/parsing.inc | 46 ++++++++++++++++++++++++++++ components/core/src/string_utils.cpp | 2 ++ 6 files changed, 95 insertions(+), 41 deletions(-) create mode 100644 components/core/src/ir/parsing.inc diff --git a/components/core/CMakeLists.txt b/components/core/CMakeLists.txt index 7dbe74bf5..503d8f122 100644 --- a/components/core/CMakeLists.txt +++ b/components/core/CMakeLists.txt @@ -255,6 +255,7 @@ set(SOURCE_FILES_clp src/ir/LogEventDeserializer.hpp src/ir/parsing.cpp src/ir/parsing.hpp + src/ir/parsing.inc src/ir/utils.cpp src/ir/utils.hpp src/LibarchiveFileReader.cpp @@ -438,6 +439,7 @@ set(SOURCE_FILES_clg src/ir/LogEvent.hpp src/ir/parsing.cpp src/ir/parsing.hpp + src/ir/parsing.inc src/LogTypeDictionaryEntry.cpp src/LogTypeDictionaryEntry.hpp src/LogTypeDictionaryReader.cpp @@ -596,6 +598,7 @@ set(SOURCE_FILES_clo src/ir/LogEvent.hpp src/ir/parsing.cpp src/ir/parsing.hpp + src/ir/parsing.inc src/LogTypeDictionaryEntry.cpp src/LogTypeDictionaryEntry.hpp src/LogTypeDictionaryReader.cpp @@ -799,6 +802,7 @@ set(SOURCE_FILES_unitTest src/ir/LogEventDeserializer.hpp src/ir/parsing.cpp src/ir/parsing.hpp + src/ir/parsing.inc src/ir/utils.cpp src/ir/utils.hpp src/LibarchiveFileReader.cpp diff --git a/components/core/src/Grep.cpp b/components/core/src/Grep.cpp index a1ae52efa..273fb24b7 100644 --- a/components/core/src/Grep.cpp +++ b/components/core/src/Grep.cpp @@ -340,7 +340,10 @@ SubQueryMatchabilityResult generate_logtypes_and_vars_for_subquery (const Archiv string logtype; for (const auto& query_token : query_tokens) { // Append from end of last token to beginning of this token, to logtype - logtype.append(processed_search_string, last_token_end_pos, query_token.get_begin_pos() - last_token_end_pos); + ir::escape_and_append_constant_to_logtype( + static_cast(processed_search_string).substr(last_token_end_pos, query_token.get_begin_pos() - last_token_end_pos), + logtype + ); last_token_end_pos = query_token.get_end_pos(); if (query_token.is_wildcard()) { @@ -358,7 +361,7 @@ SubQueryMatchabilityResult generate_logtypes_and_vars_for_subquery (const Archiv } } else { if (!query_token.is_var()) { - ir::escape_and_append_constant_to_logtype(query_token.get_value(), logtype); + ir::escape_and_append_constant_to_logtype(query_token.get_value(), logtype); } else if (!process_var_token(query_token, archive, ignore_case, sub_query, logtype)) { return SubQueryMatchabilityResult::WontMatch; } @@ -367,7 +370,10 @@ SubQueryMatchabilityResult generate_logtypes_and_vars_for_subquery (const Archiv if (last_token_end_pos < processed_search_string.length()) { // Append from end of last token to end - logtype.append(processed_search_string, last_token_end_pos, string::npos); + ir::escape_and_append_constant_to_logtype( + static_cast(processed_search_string).substr(last_token_end_pos, string::npos), + logtype + ); last_token_end_pos = processed_search_string.length(); } diff --git a/components/core/src/ir/parsing.cpp b/components/core/src/ir/parsing.cpp index f09806345..625b6bc12 100644 --- a/components/core/src/ir/parsing.cpp +++ b/components/core/src/ir/parsing.cpp @@ -86,43 +86,18 @@ bool get_bounds_of_next_var(string_view const str, size_t& begin_pos, size_t& en return (msg_length != begin_pos); } -void escape_and_append_constant_to_logtype(string_view constant, std::string& logtype) { - size_t begin_pos = 0; - auto constant_len = constant.length(); - for (size_t i = 0; i < constant_len; ++i) { - auto c = constant[i]; - if (cVariablePlaceholderEscapeCharacter == c || is_variable_placeholder(c)) { - logtype.append(constant, begin_pos, i - begin_pos); - logtype += ir::cVariablePlaceholderEscapeCharacter; - // NOTE: We don't need to append the character of interest - // immediately since the next constant copy operation will get it - begin_pos = i; - } - } - logtype.append(constant, begin_pos, constant_len - begin_pos); -} - size_t escape_and_append_constant_to_logtype_with_tracking( std::string_view constant, std::string& logtype, std::vector& escape_placeholder_positions ) { size_t num_escape_placeholder_added = 0; - size_t begin_pos = 0; - auto constant_len = constant.length(); - for (size_t i = 0; i < constant_len; ++i) { - auto c = constant[i]; - if (cVariablePlaceholderEscapeCharacter == c || is_variable_placeholder(c)) { - logtype.append(constant, begin_pos, i - begin_pos); - escape_placeholder_positions.push_back(logtype.size()); - ++num_escape_placeholder_added; - logtype += ir::cVariablePlaceholderEscapeCharacter; - // NOTE: We don't need to append the character of interest - // immediately since the next constant copy operation will get it - begin_pos = i; - } - } - logtype.append(constant, begin_pos, constant_len - begin_pos); + auto escape_handler = [&](std::string& logtype) { + escape_placeholder_positions.push_back(logtype.size()); + ++num_escape_placeholder_added; + logtype += ir::cVariablePlaceholderEscapeCharacter; + }; + append_constant_to_logtype(constant, logtype, escape_handler); return num_escape_placeholder_added; } } // namespace ir diff --git a/components/core/src/ir/parsing.hpp b/components/core/src/ir/parsing.hpp index f2a124aaf..0fc5463cd 100644 --- a/components/core/src/ir/parsing.hpp +++ b/components/core/src/ir/parsing.hpp @@ -84,25 +84,46 @@ bool get_bounds_of_next_var(std::string_view str, size_t& begin_pos, size_t& end /** * Appends the given constant to the logtype, escaping any variable placeholders + * and track the position of escape characters appended. * @param constant * @param logtype + * @param escape_placeholder_positions The vector to append the positions of the + * added escape placeholders + * @return Total number of escape placeholders added */ -void escape_and_append_constant_to_logtype(std::string_view constant, std::string& logtype); +[[maybe_unused]] size_t escape_and_append_constant_to_logtype_with_tracking( + std::string_view constant, + std::string& logtype, + std::vector& escape_placeholder_positions +); + /** * Appends the given constant to the logtype, escaping any variable placeholders - * and append the position of escaped positions + * @tparam double_escape Whether to escape the variable placeholders twice. This + * should be set to true when building a logtype for wildcard search. * @param constant * @param logtype - * @param escape_placeholder_positions The vector to append the positions of the - * added escape placeholders - * @return Total number of escape placeholders added */ -[[maybe_unused]] size_t escape_and_append_constant_to_logtype_with_tracking( +template +void escape_and_append_constant_to_logtype(std::string_view constant, std::string& logtype); + +/** + * Appends the given constant to the logtype, escaping any variable placeholders + * by using the escape handler. + * @tparam double_escape Whether to escape the variable placeholders twice. + * @tparam EscapeHandler Method to append and track escape chars when escaping + * variable placeholders. Signature: (std::string& logtype) + * @param constant + * @param logtype +*/ +template +void append_constant_to_logtype( std::string_view constant, std::string& logtype, - std::vector& escape_placeholder_positions + EscapeHandler escape_handler ); } // namespace ir +#include "parsing.inc" #endif // IR_PARSING_HPP diff --git a/components/core/src/ir/parsing.inc b/components/core/src/ir/parsing.inc new file mode 100644 index 000000000..a22a87d41 --- /dev/null +++ b/components/core/src/ir/parsing.inc @@ -0,0 +1,46 @@ +#ifndef IR_PARSING_INC +#define IR_PARSING_INC + +#include +#include + +namespace ir { +template +void append_constant_to_logtype( + std::string_view constant, + std::string& logtype, + EscapeHandler escape_handler +) { + size_t begin_pos = 0; + auto constant_len = constant.length(); + for (size_t i = 0; i < constant_len; ++i) { + auto const c = constant[i]; + bool const is_escape_char = (cVariablePlaceholderEscapeCharacter == c); + if (is_escape_char || is_variable_placeholder(c)) { + logtype.append(constant, begin_pos, i - begin_pos); + escape_handler(logtype); + if constexpr (double_escape) { + if (false == is_escape_char) { + escape_handler(logtype); + } + } + // NOTE: We don't need to append the character of interest + // immediately since the next constant copy operation will get it + begin_pos = i; + } + } + logtype.append(constant, begin_pos, constant_len - begin_pos); +} + +template +void escape_and_append_constant_to_logtype( + std::string_view constant, + std::string& logtype +) { + auto escape_handler = [&](std::string& logtype) { + logtype += ir::cVariablePlaceholderEscapeCharacter; + }; + append_constant_to_logtype(constant, logtype, escape_handler); +} +} +#endif diff --git a/components/core/src/string_utils.cpp b/components/core/src/string_utils.cpp index 68e3e1d31..556193d3a 100644 --- a/components/core/src/string_utils.cpp +++ b/components/core/src/string_utils.cpp @@ -4,6 +4,8 @@ #include #include +#include "ir/parsing.hpp" + using std::string; using std::string_view; From b61e3245a212b84a7b6f402092e57462fb93a3d3 Mon Sep 17 00:00:00 2001 From: LittleStar <59785146+LinZhihao-723@users.noreply.github.com> Date: Sun, 24 Sep 2023 02:48:38 -0700 Subject: [PATCH 17/30] Refactor the code to reduce the inner branching --- components/core/src/ir/parsing.hpp | 3 +-- components/core/src/ir/parsing.inc | 34 ++++++++++++++---------------- 2 files changed, 17 insertions(+), 20 deletions(-) diff --git a/components/core/src/ir/parsing.hpp b/components/core/src/ir/parsing.hpp index 0fc5463cd..61aa62178 100644 --- a/components/core/src/ir/parsing.hpp +++ b/components/core/src/ir/parsing.hpp @@ -97,7 +97,6 @@ bool get_bounds_of_next_var(std::string_view str, size_t& begin_pos, size_t& end std::vector& escape_placeholder_positions ); - /** * Appends the given constant to the logtype, escaping any variable placeholders * @tparam double_escape Whether to escape the variable placeholders twice. This @@ -116,7 +115,7 @@ void escape_and_append_constant_to_logtype(std::string_view constant, std::strin * variable placeholders. Signature: (std::string& logtype) * @param constant * @param logtype -*/ + */ template void append_constant_to_logtype( std::string_view constant, diff --git a/components/core/src/ir/parsing.inc b/components/core/src/ir/parsing.inc index a22a87d41..55fef6b7b 100644 --- a/components/core/src/ir/parsing.inc +++ b/components/core/src/ir/parsing.inc @@ -16,31 +16,29 @@ void append_constant_to_logtype( for (size_t i = 0; i < constant_len; ++i) { auto const c = constant[i]; bool const is_escape_char = (cVariablePlaceholderEscapeCharacter == c); - if (is_escape_char || is_variable_placeholder(c)) { - logtype.append(constant, begin_pos, i - begin_pos); + if (false == is_escape_char && false == is_variable_placeholder(c)) { + continue; + } + logtype.append(constant, begin_pos, i - begin_pos); + // NOTE: We don't need to append the character of interest + // immediately since the next constant copy operation will get it + begin_pos = i; + escape_handler(logtype); + if constexpr (false == double_escape) { + continue; + } + if (false == is_escape_char) { escape_handler(logtype); - if constexpr (double_escape) { - if (false == is_escape_char) { - escape_handler(logtype); - } - } - // NOTE: We don't need to append the character of interest - // immediately since the next constant copy operation will get it - begin_pos = i; } } logtype.append(constant, begin_pos, constant_len - begin_pos); } template -void escape_and_append_constant_to_logtype( - std::string_view constant, - std::string& logtype -) { - auto escape_handler = [&](std::string& logtype) { - logtype += ir::cVariablePlaceholderEscapeCharacter; - }; +void escape_and_append_constant_to_logtype(std::string_view constant, std::string& logtype) { + auto escape_handler + = [&](std::string& logtype) { logtype += ir::cVariablePlaceholderEscapeCharacter; }; append_constant_to_logtype(constant, logtype, escape_handler); } -} +} // namespace ir #endif From dde3dfb63eb8ba95d3ec3781fd6b1d31b6480b24 Mon Sep 17 00:00:00 2001 From: Lin Zhihao <59785146+LinZhihao-723@users.noreply.github.com> Date: Wed, 29 Nov 2023 17:20:14 -0500 Subject: [PATCH 18/30] Apply suggestions from code review Co-authored-by: kirkrodrigues <2454684+kirkrodrigues@users.noreply.github.com> --- .../core/src/EncodedVariableInterpreter.cpp | 2 +- components/core/src/LogTypeDictionaryEntry.cpp | 10 ++++++++-- components/core/src/LogTypeDictionaryEntry.hpp | 15 +++++++-------- components/core/src/ir/parsing.hpp | 2 +- 4 files changed, 17 insertions(+), 12 deletions(-) diff --git a/components/core/src/EncodedVariableInterpreter.cpp b/components/core/src/EncodedVariableInterpreter.cpp index 486693c17..5c1994061 100644 --- a/components/core/src/EncodedVariableInterpreter.cpp +++ b/components/core/src/EncodedVariableInterpreter.cpp @@ -305,7 +305,7 @@ bool EncodedVariableInterpreter::decode_variables_into_message (const LogTypeDic for (size_t placeholder_ix = 0, var_ix = 0; placeholder_ix < num_placeholders_in_logtype; ++placeholder_ix) { size_t placeholder_position = logtype_dict_entry.get_placeholder_info(placeholder_ix, var_placeholder); - // Add the constant that's between the last variable and this one + // Add the constant that's between the last placeholder and this one decompressed_msg.append(logtype_value, constant_begin_pos, placeholder_position - constant_begin_pos); switch (var_placeholder) { diff --git a/components/core/src/LogTypeDictionaryEntry.cpp b/components/core/src/LogTypeDictionaryEntry.cpp index 03aeda970..1e4e2d4e6 100644 --- a/components/core/src/LogTypeDictionaryEntry.cpp +++ b/components/core/src/LogTypeDictionaryEntry.cpp @@ -56,7 +56,10 @@ bool LogTypeDictionaryEntry::parse_next_var (const string& msg, size_t& var_begi auto last_var_end_pos = var_end_pos; if (ir::get_bounds_of_next_var(msg, var_begin_pos, var_end_pos)) { // Append to log type: from end of last variable to start of current variable - auto constant = std::string_view(msg).substr(last_var_end_pos, var_begin_pos - last_var_end_pos); + auto constant = static_cast(msg).substr( + last_var_end_pos, + var_begin_pos - last_var_end_pos + ); m_num_escape_placeholders += ir::escape_and_append_constant_to_logtype_with_tracking(constant, m_value, m_placeholder_positions); var.assign(msg, var_begin_pos, var_end_pos - var_begin_pos); @@ -64,7 +67,10 @@ bool LogTypeDictionaryEntry::parse_next_var (const string& msg, size_t& var_begi } if (last_var_end_pos < msg.length()) { // Append to log type: from end of last variable to end - auto constant = std::string_view(msg).substr(last_var_end_pos, msg.length() - last_var_end_pos); + auto constant = static_cast(msg).substr( + last_var_end_pos, + msg.length() - last_var_end_pos + ); m_num_escape_placeholders += ir::escape_and_append_constant_to_logtype_with_tracking(constant, m_value, m_placeholder_positions); } diff --git a/components/core/src/LogTypeDictionaryEntry.hpp b/components/core/src/LogTypeDictionaryEntry.hpp index 25168f9d8..8e8db66ae 100644 --- a/components/core/src/LogTypeDictionaryEntry.hpp +++ b/components/core/src/LogTypeDictionaryEntry.hpp @@ -64,21 +64,20 @@ class LogTypeDictionaryEntry : public DictionaryEntry { logtype += enum_to_underlying_type(ir::VariablePlaceholder::Float); } /** - * Adds an escape variable placeholder to the given logtype + * Adds an escape character to the given logtype * @param logtype */ - static void add_escape_var (std::string& logtype) { + static void add_escape (std::string& logtype) { logtype += enum_to_underlying_type(ir::VariablePlaceholder::Escape); } /** - * @return The total number of placeholders on record. The placeholders may - * represent an escape characters. + * @return The number of variable placeholders (included escaped ones) in the logtype. */ size_t get_num_placeholders () const { return m_placeholder_positions.size(); } /** - * @return The total number of variables on record. + * @return The number of variable placeholders (excluding escaped ones) in the logtype. */ size_t get_num_variables () const { return m_placeholder_positions.size() - m_num_escape_placeholders; @@ -118,9 +117,9 @@ class LogTypeDictionaryEntry : public DictionaryEntry { */ void add_dictionary_var (); /** - * Adds an escape variable placeholder + * Adds an escape character */ - void add_escape_var (); + void add_escape (); /** * Parses next variable from a message, constructing the constant part of the message's logtype as well @@ -162,7 +161,7 @@ class LogTypeDictionaryEntry : public DictionaryEntry { private: // Variables std::vector m_placeholder_positions; - size_t m_num_escape_placeholders; + size_t m_num_escaped_placeholders; }; #endif // LOGTYPEDICTIONARYENTRY_HPP diff --git a/components/core/src/ir/parsing.hpp b/components/core/src/ir/parsing.hpp index 61aa62178..3b0820dc1 100644 --- a/components/core/src/ir/parsing.hpp +++ b/components/core/src/ir/parsing.hpp @@ -18,7 +18,7 @@ enum class VariablePlaceholder : char { Integer = 0x11, Dictionary = 0x12, Float = 0x13, - Escape = 0x5c, + Escape = '\\', }; constexpr char cVariablePlaceholderEscapeCharacter = '\\'; From 0fe414de58f7c42926218cb87b10515a433dee4b Mon Sep 17 00:00:00 2001 From: LittleStar <59785146+LinZhihao-723@users.noreply.github.com> Date: Wed, 29 Nov 2023 17:28:52 -0500 Subject: [PATCH 19/30] Undo parsing.hpp --- components/core/src/string_utils.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/components/core/src/string_utils.cpp b/components/core/src/string_utils.cpp index 556193d3a..68e3e1d31 100644 --- a/components/core/src/string_utils.cpp +++ b/components/core/src/string_utils.cpp @@ -4,8 +4,6 @@ #include #include -#include "ir/parsing.hpp" - using std::string; using std::string_view; From 37eba2cf6b7cc4ec34ef69487a329f569d8ef83f Mon Sep 17 00:00:00 2001 From: LittleStar <59785146+LinZhihao-723@users.noreply.github.com> Date: Wed, 29 Nov 2023 17:35:18 -0500 Subject: [PATCH 20/30] Rename escape_var --- components/core/src/LogTypeDictionaryEntry.cpp | 14 +++++++------- components/core/src/LogTypeDictionaryEntry.hpp | 4 ++-- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/components/core/src/LogTypeDictionaryEntry.cpp b/components/core/src/LogTypeDictionaryEntry.cpp index 1e4e2d4e6..2540e8e23 100644 --- a/components/core/src/LogTypeDictionaryEntry.cpp +++ b/components/core/src/LogTypeDictionaryEntry.cpp @@ -46,10 +46,10 @@ void LogTypeDictionaryEntry::add_float_var () { add_float_var(m_value); } -void LogTypeDictionaryEntry::add_escape_var() { +void LogTypeDictionaryEntry::add_escape() { m_placeholder_positions.push_back(m_value.length()); - add_escape_var(m_value); - ++m_num_escape_placeholders; + add_escape(m_value); + ++m_num_escaped_placeholders; } bool LogTypeDictionaryEntry::parse_next_var (const string& msg, size_t& var_begin_pos, size_t& var_end_pos, string& var) { @@ -60,7 +60,7 @@ bool LogTypeDictionaryEntry::parse_next_var (const string& msg, size_t& var_begi last_var_end_pos, var_begin_pos - last_var_end_pos ); - m_num_escape_placeholders += ir::escape_and_append_constant_to_logtype_with_tracking(constant, m_value, m_placeholder_positions); + m_num_escaped_placeholders += ir::escape_and_append_constant_to_logtype_with_tracking(constant, m_value, m_placeholder_positions); var.assign(msg, var_begin_pos, var_end_pos - var_begin_pos); return true; @@ -71,7 +71,7 @@ bool LogTypeDictionaryEntry::parse_next_var (const string& msg, size_t& var_begi last_var_end_pos, msg.length() - last_var_end_pos ); - m_num_escape_placeholders += ir::escape_and_append_constant_to_logtype_with_tracking(constant, m_value, m_placeholder_positions); + m_num_escaped_placeholders += ir::escape_and_append_constant_to_logtype_with_tracking(constant, m_value, m_placeholder_positions); } return false; @@ -80,7 +80,7 @@ bool LogTypeDictionaryEntry::parse_next_var (const string& msg, size_t& var_begi void LogTypeDictionaryEntry::clear () { m_value.clear(); m_placeholder_positions.clear(); - m_num_escape_placeholders = 0; + m_num_escaped_placeholders = 0; } void LogTypeDictionaryEntry::write_to_file (streaming_compression::Compressor& compressor) const { @@ -124,7 +124,7 @@ ErrorCode LogTypeDictionaryEntry::try_read_from_file (streaming_compression::Dec is_escaped = true; add_constant(constant, 0, constant.length()); constant.clear(); - add_escape_var(); + add_escape(); } else { if (enum_to_underlying_type(ir::VariablePlaceholder::Integer) == c) { add_constant(constant, 0, constant.length()); diff --git a/components/core/src/LogTypeDictionaryEntry.hpp b/components/core/src/LogTypeDictionaryEntry.hpp index 8e8db66ae..a3001eb30 100644 --- a/components/core/src/LogTypeDictionaryEntry.hpp +++ b/components/core/src/LogTypeDictionaryEntry.hpp @@ -33,7 +33,7 @@ class LogTypeDictionaryEntry : public DictionaryEntry { }; // Constructors - LogTypeDictionaryEntry () : m_num_escape_placeholders{0} {}; + LogTypeDictionaryEntry () : m_num_escaped_placeholders{0} {}; // Use default copy constructor LogTypeDictionaryEntry (const LogTypeDictionaryEntry&) = default; @@ -80,7 +80,7 @@ class LogTypeDictionaryEntry : public DictionaryEntry { * @return The number of variable placeholders (excluding escaped ones) in the logtype. */ size_t get_num_variables () const { - return m_placeholder_positions.size() - m_num_escape_placeholders; + return m_placeholder_positions.size() - m_num_escaped_placeholders; } /** From eb8e9cbd78d4c444609edf14f9a22e6b84a69d84 Mon Sep 17 00:00:00 2001 From: LittleStar <59785146+LinZhihao-723@users.noreply.github.com> Date: Wed, 29 Nov 2023 17:37:57 -0500 Subject: [PATCH 21/30] Refactor member variable init --- components/core/src/LogTypeDictionaryEntry.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/components/core/src/LogTypeDictionaryEntry.hpp b/components/core/src/LogTypeDictionaryEntry.hpp index a3001eb30..b47f7009f 100644 --- a/components/core/src/LogTypeDictionaryEntry.hpp +++ b/components/core/src/LogTypeDictionaryEntry.hpp @@ -33,7 +33,7 @@ class LogTypeDictionaryEntry : public DictionaryEntry { }; // Constructors - LogTypeDictionaryEntry () : m_num_escaped_placeholders{0} {}; + LogTypeDictionaryEntry () = default; // Use default copy constructor LogTypeDictionaryEntry (const LogTypeDictionaryEntry&) = default; @@ -161,7 +161,7 @@ class LogTypeDictionaryEntry : public DictionaryEntry { private: // Variables std::vector m_placeholder_positions; - size_t m_num_escaped_placeholders; + size_t m_num_escaped_placeholders{0}; }; #endif // LOGTYPEDICTIONARYENTRY_HPP From d44862d5c3e6ed9d70f55abc6c532a9a06080dd9 Mon Sep 17 00:00:00 2001 From: LittleStar <59785146+LinZhihao-723@users.noreply.github.com> Date: Wed, 29 Nov 2023 17:39:31 -0500 Subject: [PATCH 22/30] Refactoring the place to do the variabel initializations --- components/core/src/EncodedVariableInterpreter.cpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/components/core/src/EncodedVariableInterpreter.cpp b/components/core/src/EncodedVariableInterpreter.cpp index 5c1994061..e8b681708 100644 --- a/components/core/src/EncodedVariableInterpreter.cpp +++ b/components/core/src/EncodedVariableInterpreter.cpp @@ -287,11 +287,9 @@ void EncodedVariableInterpreter::encode_and_add_to_dictionary( bool EncodedVariableInterpreter::decode_variables_into_message (const LogTypeDictionaryEntry& logtype_dict_entry, const VariableDictionaryReader& var_dict, const vector& encoded_vars, string& decompressed_msg) { - size_t const num_placeholders_in_logtype = logtype_dict_entry.get_num_placeholders(); - size_t const num_vars = logtype_dict_entry.get_num_variables(); - // Ensure the number of variables in the logtype matches the number of encoded variables given const auto& logtype_value = logtype_dict_entry.get_value(); + size_t const num_vars = logtype_dict_entry.get_num_variables(); if (num_vars != encoded_vars.size()) { SPDLOG_ERROR("EncodedVariableInterpreter: Logtype '{}' contains {} variables, but {} were given for decoding.", logtype_value.c_str(), num_vars, encoded_vars.size()); @@ -302,6 +300,7 @@ bool EncodedVariableInterpreter::decode_variables_into_message (const LogTypeDic size_t constant_begin_pos = 0; string float_str; variable_dictionary_id_t var_dict_id; + size_t const num_placeholders_in_logtype = logtype_dict_entry.get_num_placeholders(); for (size_t placeholder_ix = 0, var_ix = 0; placeholder_ix < num_placeholders_in_logtype; ++placeholder_ix) { size_t placeholder_position = logtype_dict_entry.get_placeholder_info(placeholder_ix, var_placeholder); From 0ab0024a5ac72cd3e20ff8bce62abf938e7bba17 Mon Sep 17 00:00:00 2001 From: LittleStar <59785146+LinZhihao-723@users.noreply.github.com> Date: Wed, 29 Nov 2023 17:49:52 -0500 Subject: [PATCH 23/30] Fix the variable placeholder --- components/core/src/LogTypeDictionaryEntry.cpp | 2 +- components/core/src/ffi/ir_stream/decoding_methods.inc | 2 +- components/core/src/ir/parsing.cpp | 2 +- components/core/src/ir/parsing.hpp | 2 -- components/core/tests/test-ir_encoding_methods.cpp | 3 ++- 5 files changed, 5 insertions(+), 6 deletions(-) diff --git a/components/core/src/LogTypeDictionaryEntry.cpp b/components/core/src/LogTypeDictionaryEntry.cpp index 2540e8e23..729bbcff0 100644 --- a/components/core/src/LogTypeDictionaryEntry.cpp +++ b/components/core/src/LogTypeDictionaryEntry.cpp @@ -120,7 +120,7 @@ ErrorCode LogTypeDictionaryEntry::try_read_from_file (streaming_compression::Dec if (is_escaped) { constant += c; is_escaped = false; - } else if (ir::cVariablePlaceholderEscapeCharacter == c) { + } else if (enum_to_underlying_type(ir::VariablePlaceholder::Escape) == c) { is_escaped = true; add_constant(constant, 0, constant.length()); constant.clear(); diff --git a/components/core/src/ffi/ir_stream/decoding_methods.inc b/components/core/src/ffi/ir_stream/decoding_methods.inc index 5964a95a9..cad66f901 100644 --- a/components/core/src/ffi/ir_stream/decoding_methods.inc +++ b/components/core/src/ffi/ir_stream/decoding_methods.inc @@ -99,7 +99,7 @@ void generic_decode_message( break; } - case ir::cVariablePlaceholderEscapeCharacter: { + case enum_to_underlying_type(ir::VariablePlaceholder::Escape): { // Ensure the escape character is followed by a // character that's being escaped if (cur_pos == logtype_length - 1) { diff --git a/components/core/src/ir/parsing.cpp b/components/core/src/ir/parsing.cpp index 625b6bc12..f961b5941 100644 --- a/components/core/src/ir/parsing.cpp +++ b/components/core/src/ir/parsing.cpp @@ -95,7 +95,7 @@ size_t escape_and_append_constant_to_logtype_with_tracking( auto escape_handler = [&](std::string& logtype) { escape_placeholder_positions.push_back(logtype.size()); ++num_escape_placeholder_added; - logtype += ir::cVariablePlaceholderEscapeCharacter; + logtype += enum_to_underlying_type(ir::VariablePlaceholder::Escape); }; append_constant_to_logtype(constant, logtype, escape_handler); return num_escape_placeholder_added; diff --git a/components/core/src/ir/parsing.hpp b/components/core/src/ir/parsing.hpp index 3b0820dc1..368ef6fc0 100644 --- a/components/core/src/ir/parsing.hpp +++ b/components/core/src/ir/parsing.hpp @@ -21,8 +21,6 @@ enum class VariablePlaceholder : char { Escape = '\\', }; -constexpr char cVariablePlaceholderEscapeCharacter = '\\'; - /** * Checks if the given character is a delimiter * We treat everything *except* the following quoted characters as a diff --git a/components/core/tests/test-ir_encoding_methods.cpp b/components/core/tests/test-ir_encoding_methods.cpp index c4d586d3e..b168d315e 100644 --- a/components/core/tests/test-ir_encoding_methods.cpp +++ b/components/core/tests/test-ir_encoding_methods.cpp @@ -443,7 +443,8 @@ TEST_CASE("message_decode_error", "[ffi][decode_next_message]") { // Test if a trailing escape triggers a decoder error auto ir_with_extra_escape{ir_buf}; - ir_with_extra_escape.at(logtype_end_pos - 1) = ir::cVariablePlaceholderEscapeCharacter; + ir_with_extra_escape.at(logtype_end_pos - 1) + = enum_to_underlying_type(VariablePlaceholder::Escape); BufferReader ir_with_extra_escape_buffer{ size_checked_pointer_cast(ir_with_extra_escape.data()), ir_with_extra_escape.size()}; From 8c77fbb19980d6d0f5a5d042e39b7167c45bcd58 Mon Sep 17 00:00:00 2001 From: LittleStar <59785146+LinZhihao-723@users.noreply.github.com> Date: Wed, 29 Nov 2023 18:11:53 -0500 Subject: [PATCH 24/30] Removing cVariablePlaceholder --- components/core/src/ir/parsing.inc | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/components/core/src/ir/parsing.inc b/components/core/src/ir/parsing.inc index 55fef6b7b..14f3c5adc 100644 --- a/components/core/src/ir/parsing.inc +++ b/components/core/src/ir/parsing.inc @@ -4,6 +4,8 @@ #include #include +#include "../type_utils.hpp" + namespace ir { template void append_constant_to_logtype( @@ -15,7 +17,7 @@ void append_constant_to_logtype( auto constant_len = constant.length(); for (size_t i = 0; i < constant_len; ++i) { auto const c = constant[i]; - bool const is_escape_char = (cVariablePlaceholderEscapeCharacter == c); + bool const is_escape_char = (enum_to_underlying_type(VariablePlaceholder::Escape) == c); if (false == is_escape_char && false == is_variable_placeholder(c)) { continue; } @@ -36,8 +38,9 @@ void append_constant_to_logtype( template void escape_and_append_constant_to_logtype(std::string_view constant, std::string& logtype) { - auto escape_handler - = [&](std::string& logtype) { logtype += ir::cVariablePlaceholderEscapeCharacter; }; + auto escape_handler = [&](std::string& logtype) { + logtype += enum_to_underlying_type(VariablePlaceholder::Escape); + }; append_constant_to_logtype(constant, logtype, escape_handler); } } // namespace ir From bf95c9a21bd6e74c5842356ad7b84dd7b981a2cb Mon Sep 17 00:00:00 2001 From: LittleStar <59785146+LinZhihao-723@users.noreply.github.com> Date: Wed, 29 Nov 2023 18:53:04 -0500 Subject: [PATCH 25/30] Adding a case to the unit test --- components/core/tests/test-EncodedVariableInterpreter.cpp | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/components/core/tests/test-EncodedVariableInterpreter.cpp b/components/core/tests/test-EncodedVariableInterpreter.cpp index 239b91654..5e494d022 100644 --- a/components/core/tests/test-EncodedVariableInterpreter.cpp +++ b/components/core/tests/test-EncodedVariableInterpreter.cpp @@ -237,6 +237,7 @@ TEST_CASE("EncodedVariableInterpreter", "[EncodedVariableInterpreter]") { " and a double " + var_strs[2] + " and a weird double " + var_strs[3] + " and a str with numbers " + var_strs[4] + + " and an escape " + enum_to_underlying_type(ir::VariablePlaceholder::Escape) + " and an int placeholder " + enum_to_underlying_type(ir::VariablePlaceholder::Integer) + " and a float placeholder " + enum_to_underlying_type(ir::VariablePlaceholder::Float) + " and a dictionary placeholder " + enum_to_underlying_type(ir::VariablePlaceholder::Dictionary); @@ -279,6 +280,9 @@ TEST_CASE("EncodedVariableInterpreter", "[EncodedVariableInterpreter]") { REQUIRE(EncodedVariableInterpreter::encode_and_search_dictionary(var_strs[3], var_dict_reader, false, search_logtype, sub_query)); search_logtype += " and a str with numbers "; REQUIRE(EncodedVariableInterpreter::encode_and_search_dictionary(var_strs[4], var_dict_reader, false, search_logtype, sub_query)); + search_logtype += " and an escape "; + search_logtype += enum_to_underlying_type(ir::VariablePlaceholder::Escape); + search_logtype += enum_to_underlying_type(ir::VariablePlaceholder::Escape); search_logtype += " and an int placeholder "; search_logtype += enum_to_underlying_type(ir::VariablePlaceholder::Escape); search_logtype += enum_to_underlying_type(ir::VariablePlaceholder::Integer); @@ -295,7 +299,7 @@ TEST_CASE("EncodedVariableInterpreter", "[EncodedVariableInterpreter]") { } // Test search for unknown variable - REQUIRE(!EncodedVariableInterpreter::encode_and_search_dictionary("abc123", var_dict_reader, false, search_logtype, sub_query)); + REQUIRE(false == EncodedVariableInterpreter::encode_and_search_dictionary("abc123", var_dict_reader, false, search_logtype, sub_query)); REQUIRE(logtype_dict_entry.get_value() == search_logtype); From 469c2b31aba7659ef13356800d66cd42321932c1 Mon Sep 17 00:00:00 2001 From: LittleStar <59785146+LinZhihao-723@users.noreply.github.com> Date: Wed, 29 Nov 2023 19:54:45 -0500 Subject: [PATCH 26/30] Refactor the escape function --- components/core/src/Grep.cpp | 19 ++++++++--- .../core/src/LogTypeDictionaryEntry.cpp | 9 +++-- .../src/ffi/ir_stream/encoding_methods.cpp | 5 ++- components/core/src/ir/parsing.cpp | 15 -------- components/core/src/ir/parsing.hpp | 34 +++---------------- components/core/src/ir/parsing.inc | 20 ++--------- 6 files changed, 33 insertions(+), 69 deletions(-) diff --git a/components/core/src/Grep.cpp b/components/core/src/Grep.cpp index 273fb24b7..ef4505be3 100644 --- a/components/core/src/Grep.cpp +++ b/components/core/src/Grep.cpp @@ -338,11 +338,19 @@ SubQueryMatchabilityResult generate_logtypes_and_vars_for_subquery (const Archiv { size_t last_token_end_pos = 0; string logtype; + auto double_escape_handler = [](std::string& logtype, char char_to_escape) -> void { + auto const escape_char{enum_to_underlying_type(ir::VariablePlaceholder::Escape)}; + logtype += escape_char; + if (escape_char != char_to_escape) { + logtype += escape_char; + } + }; for (const auto& query_token : query_tokens) { // Append from end of last token to beginning of this token, to logtype - ir::escape_and_append_constant_to_logtype( + ir::escape_and_append_constant_to_logtype( static_cast(processed_search_string).substr(last_token_end_pos, query_token.get_begin_pos() - last_token_end_pos), - logtype + logtype, + double_escape_handler ); last_token_end_pos = query_token.get_end_pos(); @@ -361,7 +369,7 @@ SubQueryMatchabilityResult generate_logtypes_and_vars_for_subquery (const Archiv } } else { if (!query_token.is_var()) { - ir::escape_and_append_constant_to_logtype(query_token.get_value(), logtype); + ir::escape_and_append_constant_to_logtype(query_token.get_value(), logtype, double_escape_handler); } else if (!process_var_token(query_token, archive, ignore_case, sub_query, logtype)) { return SubQueryMatchabilityResult::WontMatch; } @@ -370,9 +378,10 @@ SubQueryMatchabilityResult generate_logtypes_and_vars_for_subquery (const Archiv if (last_token_end_pos < processed_search_string.length()) { // Append from end of last token to end - ir::escape_and_append_constant_to_logtype( + ir::escape_and_append_constant_to_logtype( static_cast(processed_search_string).substr(last_token_end_pos, string::npos), - logtype + logtype, + double_escape_handler ); last_token_end_pos = processed_search_string.length(); } diff --git a/components/core/src/LogTypeDictionaryEntry.cpp b/components/core/src/LogTypeDictionaryEntry.cpp index 729bbcff0..9adc4e4c4 100644 --- a/components/core/src/LogTypeDictionaryEntry.cpp +++ b/components/core/src/LogTypeDictionaryEntry.cpp @@ -54,13 +54,18 @@ void LogTypeDictionaryEntry::add_escape() { bool LogTypeDictionaryEntry::parse_next_var (const string& msg, size_t& var_begin_pos, size_t& var_end_pos, string& var) { auto last_var_end_pos = var_end_pos; + auto escape_handler = [&](std::string& logtype, [[maybe_unused]] char char_to_escape) -> void { + m_placeholder_positions.push_back(logtype.size()); + ++m_num_escaped_placeholders; + logtype += enum_to_underlying_type(ir::VariablePlaceholder::Escape); + }; if (ir::get_bounds_of_next_var(msg, var_begin_pos, var_end_pos)) { // Append to log type: from end of last variable to start of current variable auto constant = static_cast(msg).substr( last_var_end_pos, var_begin_pos - last_var_end_pos ); - m_num_escaped_placeholders += ir::escape_and_append_constant_to_logtype_with_tracking(constant, m_value, m_placeholder_positions); + ir::escape_and_append_constant_to_logtype(constant, m_value, escape_handler); var.assign(msg, var_begin_pos, var_end_pos - var_begin_pos); return true; @@ -71,7 +76,7 @@ bool LogTypeDictionaryEntry::parse_next_var (const string& msg, size_t& var_begi last_var_end_pos, msg.length() - last_var_end_pos ); - m_num_escaped_placeholders += ir::escape_and_append_constant_to_logtype_with_tracking(constant, m_value, m_placeholder_positions); + ir::escape_and_append_constant_to_logtype(constant, m_value, escape_handler); } return false; diff --git a/components/core/src/ffi/ir_stream/encoding_methods.cpp b/components/core/src/ffi/ir_stream/encoding_methods.cpp index ec427ab76..ba795f69d 100644 --- a/components/core/src/ffi/ir_stream/encoding_methods.cpp +++ b/components/core/src/ffi/ir_stream/encoding_methods.cpp @@ -166,7 +166,10 @@ static void add_base_metadata_fields( } static bool append_constant_to_logtype(string_view constant, string& logtype) { - ir::escape_and_append_constant_to_logtype(constant, logtype); + auto escape_handler = [&](std::string& logtype, [[maybe_unused]] char char_to_escape) -> void { + logtype += enum_to_underlying_type(ir::VariablePlaceholder::Escape); + }; + ir::escape_and_append_constant_to_logtype(constant, logtype, escape_handler); return true; } diff --git a/components/core/src/ir/parsing.cpp b/components/core/src/ir/parsing.cpp index f961b5941..6af750c62 100644 --- a/components/core/src/ir/parsing.cpp +++ b/components/core/src/ir/parsing.cpp @@ -85,19 +85,4 @@ bool get_bounds_of_next_var(string_view const str, size_t& begin_pos, size_t& en return (msg_length != begin_pos); } - -size_t escape_and_append_constant_to_logtype_with_tracking( - std::string_view constant, - std::string& logtype, - std::vector& escape_placeholder_positions -) { - size_t num_escape_placeholder_added = 0; - auto escape_handler = [&](std::string& logtype) { - escape_placeholder_positions.push_back(logtype.size()); - ++num_escape_placeholder_added; - logtype += enum_to_underlying_type(ir::VariablePlaceholder::Escape); - }; - append_constant_to_logtype(constant, logtype, escape_handler); - return num_escape_placeholder_added; -} } // namespace ir diff --git a/components/core/src/ir/parsing.hpp b/components/core/src/ir/parsing.hpp index 368ef6fc0..ba318a56c 100644 --- a/components/core/src/ir/parsing.hpp +++ b/components/core/src/ir/parsing.hpp @@ -80,42 +80,18 @@ bool is_var(std::string_view value); */ bool get_bounds_of_next_var(std::string_view str, size_t& begin_pos, size_t& end_pos); -/** - * Appends the given constant to the logtype, escaping any variable placeholders - * and track the position of escape characters appended. - * @param constant - * @param logtype - * @param escape_placeholder_positions The vector to append the positions of the - * added escape placeholders - * @return Total number of escape placeholders added - */ -[[maybe_unused]] size_t escape_and_append_constant_to_logtype_with_tracking( - std::string_view constant, - std::string& logtype, - std::vector& escape_placeholder_positions -); - -/** - * Appends the given constant to the logtype, escaping any variable placeholders - * @tparam double_escape Whether to escape the variable placeholders twice. This - * should be set to true when building a logtype for wildcard search. - * @param constant - * @param logtype - */ -template -void escape_and_append_constant_to_logtype(std::string_view constant, std::string& logtype); - /** * Appends the given constant to the logtype, escaping any variable placeholders * by using the escape handler. - * @tparam double_escape Whether to escape the variable placeholders twice. * @tparam EscapeHandler Method to append and track escape chars when escaping - * variable placeholders. Signature: (std::string& logtype) + * variable placeholders. Signature: + * (std::string& logtype, [[maybe_unused]] char char_to_escape) -> void * @param constant * @param logtype + * @param escape_handler */ -template -void append_constant_to_logtype( +template +void escape_and_append_constant_to_logtype( std::string_view constant, std::string& logtype, EscapeHandler escape_handler diff --git a/components/core/src/ir/parsing.inc b/components/core/src/ir/parsing.inc index 14f3c5adc..bbcece639 100644 --- a/components/core/src/ir/parsing.inc +++ b/components/core/src/ir/parsing.inc @@ -7,8 +7,8 @@ #include "../type_utils.hpp" namespace ir { -template -void append_constant_to_logtype( +template +void escape_and_append_constant_to_logtype( std::string_view constant, std::string& logtype, EscapeHandler escape_handler @@ -25,23 +25,9 @@ void append_constant_to_logtype( // NOTE: We don't need to append the character of interest // immediately since the next constant copy operation will get it begin_pos = i; - escape_handler(logtype); - if constexpr (false == double_escape) { - continue; - } - if (false == is_escape_char) { - escape_handler(logtype); - } + escape_handler(logtype, c); } logtype.append(constant, begin_pos, constant_len - begin_pos); } - -template -void escape_and_append_constant_to_logtype(std::string_view constant, std::string& logtype) { - auto escape_handler = [&](std::string& logtype) { - logtype += enum_to_underlying_type(VariablePlaceholder::Escape); - }; - append_constant_to_logtype(constant, logtype, escape_handler); -} } // namespace ir #endif From 62363ecc420046a059d056f3f5b3c8735e47e9fb Mon Sep 17 00:00:00 2001 From: LittleStar <59785146+LinZhihao-723@users.noreply.github.com> Date: Wed, 29 Nov 2023 23:36:28 -0500 Subject: [PATCH 27/30] Fix subquery --- components/core/src/ffi/search/Subquery.cpp | 33 +++++++++++++++++++-- 1 file changed, 30 insertions(+), 3 deletions(-) diff --git a/components/core/src/ffi/search/Subquery.cpp b/components/core/src/ffi/search/Subquery.cpp index 448521c4a..ac7e3d3f7 100644 --- a/components/core/src/ffi/search/Subquery.cpp +++ b/components/core/src/ffi/search/Subquery.cpp @@ -1,5 +1,6 @@ #include "Subquery.hpp" +#include "../../ir/parsing.hpp" #include "QueryWildcard.hpp" using std::string; @@ -14,18 +15,44 @@ Subquery::Subquery(string logtype_query, Subquery::QueryVari m_query_vars(variables) { // Determine if the query contains variables bool is_escaped = false; - for (auto const c : m_logtype_query) { + auto const logtype_query_length{m_logtype_query.size()}; + std::vector escaped_placeholder_positions; + escaped_placeholder_positions.reserve(logtype_query_length / 2); + auto const escape_char{enum_to_underlying_type(ir::VariablePlaceholder::Escape)}; + for (size_t idx = 0; idx < logtype_query_length; ++idx) { + char const c{m_logtype_query[idx]}; if (is_escaped) { is_escaped = false; - } else if ('\\' == c) { + if (escape_char == c) { + continue; + } + escaped_placeholder_positions.push_back(idx); + } else if (escape_char == c) { is_escaped = true; } else if ((enum_to_underlying_type(WildcardType::ZeroOrMoreChars) == c || enum_to_underlying_type(WildcardType::AnyChar) == c)) { m_logtype_query_contains_wildcards = true; - break; } } + if (false == m_logtype_query_contains_wildcards) { + return; + } + if (escaped_placeholder_positions.empty()) { + return; + } + std::string double_escaped_logtype_query; + size_t curr_pos{0}; + for (auto const boundary : escaped_placeholder_positions) { + double_escaped_logtype_query.append(m_logtype_query, curr_pos, boundary - curr_pos); + double_escaped_logtype_query += escape_char; + curr_pos = boundary; + } + if (logtype_query_length != curr_pos) { + double_escaped_logtype_query + .append(m_logtype_query, curr_pos, logtype_query_length - curr_pos); + } + m_logtype_query = std::move(double_escaped_logtype_query); } // Explicitly declare specializations to avoid having to validate that the From 7c3189f1b3b806898e00472964241fa8dacc9de7 Mon Sep 17 00:00:00 2001 From: LittleStar <59785146+LinZhihao-723@users.noreply.github.com> Date: Thu, 30 Nov 2023 16:58:54 -0500 Subject: [PATCH 28/30] Fix ffi and search --- components/core/src/Grep.cpp | 6 +- .../core/src/LogTypeDictionaryEntry.cpp | 4 +- components/core/src/ffi/encoding_methods.hpp | 2 +- components/core/src/ffi/encoding_methods.inc | 24 +--- .../core/src/ffi/ir_stream/Attribute.hpp | 22 +++ .../src/ffi/ir_stream/encoding_methods.cpp | 23 +--- components/core/src/ffi/search/Subquery.cpp | 14 +- .../core/src/ffi/search/query_methods.cpp | 11 +- components/core/src/ir/parsing.cpp | 7 + components/core/src/ir/parsing.hpp | 9 +- components/core/src/ir/parsing.inc | 2 +- .../core/tests/test-encoding_methods.cpp | 4 +- components/core/tests/test-query_methods.cpp | 128 ++++++++++++++++++ 13 files changed, 194 insertions(+), 62 deletions(-) create mode 100644 components/core/src/ffi/ir_stream/Attribute.hpp diff --git a/components/core/src/Grep.cpp b/components/core/src/Grep.cpp index ef4505be3..e2ce5b7b4 100644 --- a/components/core/src/Grep.cpp +++ b/components/core/src/Grep.cpp @@ -347,7 +347,7 @@ SubQueryMatchabilityResult generate_logtypes_and_vars_for_subquery (const Archiv }; for (const auto& query_token : query_tokens) { // Append from end of last token to beginning of this token, to logtype - ir::escape_and_append_constant_to_logtype( + ir::append_constant_to_logtype( static_cast(processed_search_string).substr(last_token_end_pos, query_token.get_begin_pos() - last_token_end_pos), logtype, double_escape_handler @@ -369,7 +369,7 @@ SubQueryMatchabilityResult generate_logtypes_and_vars_for_subquery (const Archiv } } else { if (!query_token.is_var()) { - ir::escape_and_append_constant_to_logtype(query_token.get_value(), logtype, double_escape_handler); + ir::append_constant_to_logtype(query_token.get_value(), logtype, double_escape_handler); } else if (!process_var_token(query_token, archive, ignore_case, sub_query, logtype)) { return SubQueryMatchabilityResult::WontMatch; } @@ -378,7 +378,7 @@ SubQueryMatchabilityResult generate_logtypes_and_vars_for_subquery (const Archiv if (last_token_end_pos < processed_search_string.length()) { // Append from end of last token to end - ir::escape_and_append_constant_to_logtype( + ir::append_constant_to_logtype( static_cast(processed_search_string).substr(last_token_end_pos, string::npos), logtype, double_escape_handler diff --git a/components/core/src/LogTypeDictionaryEntry.cpp b/components/core/src/LogTypeDictionaryEntry.cpp index 9adc4e4c4..b73f47715 100644 --- a/components/core/src/LogTypeDictionaryEntry.cpp +++ b/components/core/src/LogTypeDictionaryEntry.cpp @@ -65,7 +65,7 @@ bool LogTypeDictionaryEntry::parse_next_var (const string& msg, size_t& var_begi last_var_end_pos, var_begin_pos - last_var_end_pos ); - ir::escape_and_append_constant_to_logtype(constant, m_value, escape_handler); + ir::append_constant_to_logtype(constant, m_value, escape_handler); var.assign(msg, var_begin_pos, var_end_pos - var_begin_pos); return true; @@ -76,7 +76,7 @@ bool LogTypeDictionaryEntry::parse_next_var (const string& msg, size_t& var_begi last_var_end_pos, msg.length() - last_var_end_pos ); - ir::escape_and_append_constant_to_logtype(constant, m_value, escape_handler); + ir::append_constant_to_logtype(constant, m_value, escape_handler); } return false; diff --git a/components/core/src/ffi/encoding_methods.hpp b/components/core/src/ffi/encoding_methods.hpp index ec4a8ec37..35f603ce9 100644 --- a/components/core/src/ffi/encoding_methods.hpp +++ b/components/core/src/ffi/encoding_methods.hpp @@ -169,7 +169,7 @@ std::string decode_integer_var(encoded_variable_t encoded_var); * components of the message. * @tparam encoded_variable_t Type of the encoded variable * @tparam ConstantHandler Method to handle constants. Signature: - * (std::string_view constant, std::string& logtype) -> bool + * (std::string_view constant, std::string& logtype) -> void * @tparam EncodedVariableHandler Method to handle encoded variables. * Signature: (encoded_variable_t) -> void * @tparam DictionaryVariableHandler Method to handle dictionary variables. diff --git a/components/core/src/ffi/encoding_methods.inc b/components/core/src/ffi/encoding_methods.inc index d85b22312..f0d2194d4 100644 --- a/components/core/src/ffi/encoding_methods.inc +++ b/components/core/src/ffi/encoding_methods.inc @@ -366,9 +366,7 @@ bool encode_message_generically( logtype.reserve(message.length()); while (ir::get_bounds_of_next_var(message, var_begin_pos, var_end_pos)) { std::string_view constant{&message[constant_begin_pos], var_begin_pos - constant_begin_pos}; - if (false == constant_handler(constant, logtype)) { - return false; - } + constant_handler(constant, logtype); constant_begin_pos = var_end_pos; // Encode the variable @@ -392,9 +390,7 @@ bool encode_message_generically( std::string_view constant{ &message[constant_begin_pos], message.length() - constant_begin_pos}; - if (false == constant_handler(constant, logtype)) { - return false; - } + constant_handler(constant, logtype); } return true; @@ -407,20 +403,6 @@ bool encode_message( std::vector& encoded_vars, std::vector& dictionary_var_bounds ) { - auto constant_handler = [](std::string_view constant, std::string& logtype) { - // Ensure constant doesn't contain a variable placeholder - bool contains_variable_placeholder = std::any_of( - constant.cbegin(), - constant.cend(), - ir::is_variable_placeholder - ); - if (contains_variable_placeholder) { - return false; - } - - logtype.append(constant); - return true; - }; auto encoded_variable_handler = [&encoded_vars](encoded_variable_t encoded_variable) { encoded_vars.push_back(encoded_variable); }; @@ -439,7 +421,7 @@ bool encode_message( == encode_message_generically( message, logtype, - constant_handler, + ir::escape_and_append_const_to_logtype, encoded_variable_handler, dictionary_variable_handler )) diff --git a/components/core/src/ffi/ir_stream/Attribute.hpp b/components/core/src/ffi/ir_stream/Attribute.hpp new file mode 100644 index 000000000..8699caefd --- /dev/null +++ b/components/core/src/ffi/ir_stream/Attribute.hpp @@ -0,0 +1,22 @@ +#ifndef FFI_IR_STREAM_ATTRIBUTES_HPP +#define FFI_IR_STREAM_ATTRIBUTES_HPP + +#include +#include +#include + +namespace ffi::ir_stream { +class Attribute { +public: + [[nodiscard]] auto is_string() const -> bool { + return std::holds_alternative(m_attribute); + } + [[nodiscard]] auto is_int() const -> bool { + return std::holds_alternative(m_attribute); + } +private: + std::variant m_attribute; +}; +} + +#endif \ No newline at end of file diff --git a/components/core/src/ffi/ir_stream/encoding_methods.cpp b/components/core/src/ffi/ir_stream/encoding_methods.cpp index ba795f69d..6628619e3 100644 --- a/components/core/src/ffi/ir_stream/encoding_methods.cpp +++ b/components/core/src/ffi/ir_stream/encoding_methods.cpp @@ -51,17 +51,6 @@ static void add_base_metadata_fields( nlohmann::json& metadata ); -/** - * Appends a constant to the logtype, escaping any variable placeholders - * @param constant - * @param logtype - * @return true - */ -static bool append_constant_to_logtype( - string_view constant, - string& logtype -); - /** * A functor for encoding dictionary variables in a message */ @@ -165,14 +154,6 @@ static void add_base_metadata_fields( metadata[cProtocol::Metadata::TimeZoneIdKey] = time_zone_id; } -static bool append_constant_to_logtype(string_view constant, string& logtype) { - auto escape_handler = [&](std::string& logtype, [[maybe_unused]] char char_to_escape) -> void { - logtype += enum_to_underlying_type(ir::VariablePlaceholder::Escape); - }; - ir::escape_and_append_constant_to_logtype(constant, logtype, escape_handler); - return true; -} - namespace eight_byte_encoding { bool encode_preamble( string_view timestamp_pattern, @@ -212,7 +193,7 @@ namespace eight_byte_encoding { == encode_message_generically( message, logtype, - append_constant_to_logtype, + ir::escape_and_append_const_to_logtype, encoded_var_handler, DictionaryVariableHandler(ir_buf) )) @@ -286,7 +267,7 @@ namespace four_byte_encoding { == encode_message_generically( message, logtype, - append_constant_to_logtype, + ir::escape_and_append_const_to_logtype, encoded_var_handler, DictionaryVariableHandler(ir_buf) )) diff --git a/components/core/src/ffi/search/Subquery.cpp b/components/core/src/ffi/search/Subquery.cpp index ac7e3d3f7..7bdd2a11c 100644 --- a/components/core/src/ffi/search/Subquery.cpp +++ b/components/core/src/ffi/search/Subquery.cpp @@ -10,11 +10,11 @@ using std::vector; namespace ffi::search { template Subquery::Subquery(string logtype_query, Subquery::QueryVariables variables) - : m_logtype_query(std::move(logtype_query)), - m_logtype_query_contains_wildcards(false), - m_query_vars(variables) { + : m_logtype_query{std::move(logtype_query)}, + m_logtype_query_contains_wildcards{false}, + m_query_vars{std::move(variables)} { // Determine if the query contains variables - bool is_escaped = false; + bool is_escaped{false}; auto const logtype_query_length{m_logtype_query.size()}; std::vector escaped_placeholder_positions; escaped_placeholder_positions.reserve(logtype_query_length / 2); @@ -43,10 +43,10 @@ Subquery::Subquery(string logtype_query, Subquery::QueryVari } std::string double_escaped_logtype_query; size_t curr_pos{0}; - for (auto const boundary : escaped_placeholder_positions) { - double_escaped_logtype_query.append(m_logtype_query, curr_pos, boundary - curr_pos); + for (auto const pos : escaped_placeholder_positions) { + double_escaped_logtype_query.append(m_logtype_query, curr_pos, pos - curr_pos); double_escaped_logtype_query += escape_char; - curr_pos = boundary; + curr_pos = pos; } if (logtype_query_length != curr_pos) { double_escaped_logtype_query diff --git a/components/core/src/ffi/search/query_methods.cpp b/components/core/src/ffi/search/query_methods.cpp index e343e78fc..a09f15782 100644 --- a/components/core/src/ffi/search/query_methods.cpp +++ b/components/core/src/ffi/search/query_methods.cpp @@ -93,8 +93,10 @@ void generate_subqueries( size_t constant_begin_pos = 0; for (auto const& token : tokens) { auto begin_pos = std::visit(TokenGetBeginPos, token); - logtype_query - .append(wildcard_query, constant_begin_pos, begin_pos - constant_begin_pos); + ir::escape_and_append_const_to_logtype( + wildcard_query.substr(constant_begin_pos, begin_pos - constant_begin_pos), + logtype_query + ); std::visit( overloaded{ @@ -114,7 +116,10 @@ void generate_subqueries( constant_begin_pos = std::visit(TokenGetEndPos, token); } - logtype_query.append(wildcard_query, constant_begin_pos); + ir::escape_and_append_const_to_logtype( + wildcard_query.substr(constant_begin_pos), + logtype_query + ); // Save sub-query if it's unique bool sub_query_exists = false; diff --git a/components/core/src/ir/parsing.cpp b/components/core/src/ir/parsing.cpp index 6af750c62..5f3563e41 100644 --- a/components/core/src/ir/parsing.cpp +++ b/components/core/src/ir/parsing.cpp @@ -85,4 +85,11 @@ bool get_bounds_of_next_var(string_view const str, size_t& begin_pos, size_t& en return (msg_length != begin_pos); } + +void escape_and_append_const_to_logtype(std::string_view constant, std::string& logtype) { + auto escape_handler = [&](std::string& logtype, [[maybe_unused]] char char_to_escape) -> void { + logtype += enum_to_underlying_type(ir::VariablePlaceholder::Escape); + }; + append_constant_to_logtype(constant, logtype, escape_handler); +} } // namespace ir diff --git a/components/core/src/ir/parsing.hpp b/components/core/src/ir/parsing.hpp index ba318a56c..2bb1049d7 100644 --- a/components/core/src/ir/parsing.hpp +++ b/components/core/src/ir/parsing.hpp @@ -80,6 +80,13 @@ bool is_var(std::string_view value); */ bool get_bounds_of_next_var(std::string_view str, size_t& begin_pos, size_t& end_pos); +/** + * Appends a constant to the logtype, escaping any variable placeholders. + * @param constant + * @param logtype +*/ +void escape_and_append_const_to_logtype(std::string_view constant, std::string& logtype); + /** * Appends the given constant to the logtype, escaping any variable placeholders * by using the escape handler. @@ -91,7 +98,7 @@ bool get_bounds_of_next_var(std::string_view str, size_t& begin_pos, size_t& end * @param escape_handler */ template -void escape_and_append_constant_to_logtype( +void append_constant_to_logtype( std::string_view constant, std::string& logtype, EscapeHandler escape_handler diff --git a/components/core/src/ir/parsing.inc b/components/core/src/ir/parsing.inc index bbcece639..d6b015ced 100644 --- a/components/core/src/ir/parsing.inc +++ b/components/core/src/ir/parsing.inc @@ -8,7 +8,7 @@ namespace ir { template -void escape_and_append_constant_to_logtype( +void append_constant_to_logtype( std::string_view constant, std::string& logtype, EscapeHandler escape_handler diff --git a/components/core/tests/test-encoding_methods.cpp b/components/core/tests/test-encoding_methods.cpp index 815c5890f..1fa3c5408 100644 --- a/components/core/tests/test-encoding_methods.cpp +++ b/components/core/tests/test-encoding_methods.cpp @@ -418,11 +418,11 @@ TEMPLATE_TEST_CASE("Encoding messages", "[ffi][encode-message]", eight_byte_enco // Test encoding a message with a variable placeholder after the variables message = " test var123 "; message += enum_to_underlying_type(VariablePlaceholder::Integer); - REQUIRE(false == encode_message(message, logtype, encoded_vars, dictionary_var_bounds)); + REQUIRE(encode_message(message, logtype, encoded_vars, dictionary_var_bounds)); // Test encoding a message with a variable placeholder before a variable message += " var234"; - REQUIRE(false == encode_message(message, logtype, encoded_vars, dictionary_var_bounds)); + REQUIRE(encode_message(message, logtype, encoded_vars, dictionary_var_bounds)); } TEMPLATE_TEST_CASE("wildcard_query_matches_any_encoded_var", diff --git a/components/core/tests/test-query_methods.cpp b/components/core/tests/test-query_methods.cpp index ea172ef1c..477d71f8b 100644 --- a/components/core/tests/test-query_methods.cpp +++ b/components/core/tests/test-query_methods.cpp @@ -95,6 +95,14 @@ TEMPLATE_TEST_CASE("ffi::search::query_methods", "[ffi][search][query_methods]", message += " and a weird double " + var_strs[var_ix++]; message += " and a string with numbers " + var_strs[var_ix++]; message += " and another string with numbers " + var_strs[var_ix++]; + message += " and an escape "; + message += enum_to_underlying_type(ir::VariablePlaceholder::Escape); + message += " and an int placeholder "; + message += enum_to_underlying_type(ir::VariablePlaceholder::Integer); + message += " and a float placeholder "; + message += enum_to_underlying_type(ir::VariablePlaceholder::Float); + message += " and a dictionary placeholder "; + message += enum_to_underlying_type(ir::VariablePlaceholder::Dictionary); REQUIRE(ffi::encode_message(message, logtype, encoded_vars, dictionary_var_bounds)); wildcard_query = message; @@ -772,4 +780,124 @@ TEMPLATE_TEST_CASE("ffi::search::query_methods", "[ffi][search][query_methods]", } } } + + // In the following wildcard query, `^Q` represents a char with the value of + // VariablePlaceholder::Integer and `^R` represents a char with the value of + // VariablePlaceholder::Dictionary. + SECTION("*escape ^Q placeholders ^R in subqueries*") { + std::string const inner_static_text{ + std::string(" ") + enum_to_underlying_type(VariablePlaceholder::Integer) + + " placeholders " + enum_to_underlying_type(VariablePlaceholder::Dictionary) + + " in "}; + std::string const double_escaped_static_text{ + std::string(" ") + enum_to_underlying_type(VariablePlaceholder::Escape) + + enum_to_underlying_type(VariablePlaceholder::Escape) + + enum_to_underlying_type(VariablePlaceholder::Integer) + " placeholders " + + enum_to_underlying_type(VariablePlaceholder::Escape) + + enum_to_underlying_type(VariablePlaceholder::Escape) + + enum_to_underlying_type(VariablePlaceholder::Dictionary) + " in "}; + std::string const prefix{"*escape"}; + std::string const postfix{"subqueries*"}; + + std::unordered_map logtype_query_to_expected_subquery; + ExpectedSubquery expected_subquery; + + // In the comments below, \d donates VariablePlaceholder::Dictionary + // Expected log type: "*escape \\^Q placeholders \\^R in subqueries*" + expected_subquery.logtype_query = prefix; + expected_subquery.logtype_query += double_escaped_static_text; + expected_subquery.logtype_query += postfix; + expected_subquery.logtype_query_contains_wildcards = true; + logtype_query_to_expected_subquery.emplace( + expected_subquery.logtype_query, + expected_subquery + ); + expected_subquery.clear(); + + // Expected log type: "*\d \\^Q placeholders \\^R in subqueries*" + expected_subquery.logtype_query = "*"; + expected_subquery.logtype_query += enum_to_underlying_type(VariablePlaceholder::Dictionary); + expected_subquery.logtype_query += double_escaped_static_text; + expected_subquery.logtype_query += postfix; + expected_subquery.query_var_types.emplace_back(false, VariablePlaceholder::Dictionary); + expected_subquery.logtype_query_contains_wildcards = true; + logtype_query_to_expected_subquery.emplace( + expected_subquery.logtype_query, + expected_subquery + ); + expected_subquery.clear(); + + // Expected log type: "*escape \\^Q placeholders \\^R in \d*" + expected_subquery.logtype_query = prefix; + expected_subquery.logtype_query += double_escaped_static_text; + expected_subquery.logtype_query += enum_to_underlying_type(VariablePlaceholder::Dictionary); + expected_subquery.logtype_query += "*"; + expected_subquery.query_var_types.emplace_back(false, VariablePlaceholder::Dictionary); + expected_subquery.logtype_query_contains_wildcards = true; + logtype_query_to_expected_subquery.emplace( + expected_subquery.logtype_query, + expected_subquery + ); + expected_subquery.clear(); + + // Expected log type: "*\d \\^Q placeholders \\^R in \d*" + expected_subquery.logtype_query = "*"; + expected_subquery.logtype_query += enum_to_underlying_type(VariablePlaceholder::Dictionary); + expected_subquery.logtype_query += double_escaped_static_text; + expected_subquery.logtype_query += enum_to_underlying_type(VariablePlaceholder::Dictionary); + expected_subquery.logtype_query += "*"; + expected_subquery.query_var_types.emplace_back(false, VariablePlaceholder::Dictionary); + expected_subquery.query_var_types.emplace_back(false, VariablePlaceholder::Dictionary); + expected_subquery.logtype_query_contains_wildcards = true; + logtype_query_to_expected_subquery.emplace( + expected_subquery.logtype_query, + expected_subquery + ); + expected_subquery.clear(); + + wildcard_query = prefix + inner_static_text + postfix; + generate_subqueries(wildcard_query, subqueries); + + REQUIRE(subqueries.size() == logtype_query_to_expected_subquery.size()); + auto const map_end{logtype_query_to_expected_subquery.cend()}; + for (auto const& subquery : subqueries) { + auto const& logtype_query{subquery.get_logtype_query()}; + auto const& query_vars{subquery.get_query_vars()}; + + auto const idx{logtype_query_to_expected_subquery.find(logtype_query)}; + REQUIRE(map_end != idx); + auto const& expected_subquery{idx->second}; + REQUIRE(subquery.logtype_query_contains_wildcards() + == expected_subquery.logtype_query_contains_wildcards); + auto const& expected_var_types{expected_subquery.query_var_types}; + REQUIRE(expected_var_types.size() == query_vars.size()); + for (size_t i{0}; i < expected_var_types.size(); ++i) { + auto const& expected_var_type{expected_var_types[i]}; + auto const& query_var{query_vars[i]}; + + if (expected_var_type.is_exact) { + REQUIRE(false); + } else { + REQUIRE(std::holds_alternative(query_var)); + auto const& wildcard_var{std::get(query_var)}; + switch (expected_var_type.interpretation) { + case VariablePlaceholder::Integer: + REQUIRE(TokenType::IntegerVariable + == wildcard_var.get_current_interpretation()); + break; + case VariablePlaceholder::Float: + REQUIRE(TokenType::FloatVariable + == wildcard_var.get_current_interpretation()); + break; + case VariablePlaceholder::Dictionary: + REQUIRE(TokenType::DictionaryVariable + == wildcard_var.get_current_interpretation()); + break; + default: + REQUIRE(false); + } + } + } + } + } } From ad482403b72b4e329e54b752031cf730abce201d Mon Sep 17 00:00:00 2001 From: Lin Zhihao <59785146+LinZhihao-723@users.noreply.github.com> Date: Thu, 30 Nov 2023 22:53:50 -0500 Subject: [PATCH 29/30] Delete components/core/src/ffi/ir_stream/Attribute.hpp --- .../core/src/ffi/ir_stream/Attribute.hpp | 22 ------------------- 1 file changed, 22 deletions(-) delete mode 100644 components/core/src/ffi/ir_stream/Attribute.hpp diff --git a/components/core/src/ffi/ir_stream/Attribute.hpp b/components/core/src/ffi/ir_stream/Attribute.hpp deleted file mode 100644 index 8699caefd..000000000 --- a/components/core/src/ffi/ir_stream/Attribute.hpp +++ /dev/null @@ -1,22 +0,0 @@ -#ifndef FFI_IR_STREAM_ATTRIBUTES_HPP -#define FFI_IR_STREAM_ATTRIBUTES_HPP - -#include -#include -#include - -namespace ffi::ir_stream { -class Attribute { -public: - [[nodiscard]] auto is_string() const -> bool { - return std::holds_alternative(m_attribute); - } - [[nodiscard]] auto is_int() const -> bool { - return std::holds_alternative(m_attribute); - } -private: - std::variant m_attribute; -}; -} - -#endif \ No newline at end of file From a37f90cc13b1f0a80c6c24fdb39cdb784194a0f6 Mon Sep 17 00:00:00 2001 From: Kirk Rodrigues <2454684+kirkrodrigues@users.noreply.github.com> Date: Mon, 4 Dec 2023 22:04:15 -0500 Subject: [PATCH 30/30] Bug-fix: Apply correct escaping for wildcard queries; Some refactoring. --- components/core/src/Grep.cpp | 34 +++- .../core/src/LogTypeDictionaryEntry.cpp | 29 +-- .../core/src/LogTypeDictionaryEntry.hpp | 12 +- components/core/src/ffi/search/Subquery.cpp | 29 ++- .../core/src/ffi/search/query_methods.cpp | 17 +- components/core/src/ir/parsing.cpp | 13 +- components/core/src/ir/parsing.hpp | 19 +- components/core/src/ir/parsing.inc | 6 +- components/core/tests/test-query_methods.cpp | 183 ++++++++---------- 9 files changed, 181 insertions(+), 161 deletions(-) diff --git a/components/core/src/Grep.cpp b/components/core/src/Grep.cpp index e2ce5b7b4..e533a4eea 100644 --- a/components/core/src/Grep.cpp +++ b/components/core/src/Grep.cpp @@ -338,19 +338,29 @@ SubQueryMatchabilityResult generate_logtypes_and_vars_for_subquery (const Archiv { size_t last_token_end_pos = 0; string logtype; - auto double_escape_handler = [](std::string& logtype, char char_to_escape) -> void { + auto escape_handler + = [](std::string_view constant, size_t char_to_escape_pos, string& logtype) -> void { auto const escape_char{enum_to_underlying_type(ir::VariablePlaceholder::Escape)}; - logtype += escape_char; - if (escape_char != char_to_escape) { + auto const next_char_pos{char_to_escape_pos + 1}; + // NOTE: We don't want to add additional escapes for wildcards that have + // been escaped. E.g., the query "\\*" should remain unchanged. + if (next_char_pos < constant.length() + && false == is_wildcard(constant[next_char_pos])) + { + logtype += escape_char; + } else if (ir::is_variable_placeholder(constant[char_to_escape_pos])) { + logtype += escape_char; logtype += escape_char; } }; for (const auto& query_token : query_tokens) { // Append from end of last token to beginning of this token, to logtype ir::append_constant_to_logtype( - static_cast(processed_search_string).substr(last_token_end_pos, query_token.get_begin_pos() - last_token_end_pos), - logtype, - double_escape_handler + static_cast(processed_search_string) + .substr(last_token_end_pos, + query_token.get_begin_pos() - last_token_end_pos), + escape_handler, + logtype ); last_token_end_pos = query_token.get_end_pos(); @@ -369,7 +379,10 @@ SubQueryMatchabilityResult generate_logtypes_and_vars_for_subquery (const Archiv } } else { if (!query_token.is_var()) { - ir::append_constant_to_logtype(query_token.get_value(), logtype, double_escape_handler); + ir::append_constant_to_logtype( + query_token.get_value(), escape_handler, + logtype + ); } else if (!process_var_token(query_token, archive, ignore_case, sub_query, logtype)) { return SubQueryMatchabilityResult::WontMatch; } @@ -379,9 +392,10 @@ SubQueryMatchabilityResult generate_logtypes_and_vars_for_subquery (const Archiv if (last_token_end_pos < processed_search_string.length()) { // Append from end of last token to end ir::append_constant_to_logtype( - static_cast(processed_search_string).substr(last_token_end_pos, string::npos), - logtype, - double_escape_handler + static_cast(processed_search_string) + .substr(last_token_end_pos, string::npos), + escape_handler, + logtype ); last_token_end_pos = processed_search_string.length(); } diff --git a/components/core/src/LogTypeDictionaryEntry.cpp b/components/core/src/LogTypeDictionaryEntry.cpp index b73f47715..44c3a6c04 100644 --- a/components/core/src/LogTypeDictionaryEntry.cpp +++ b/components/core/src/LogTypeDictionaryEntry.cpp @@ -5,20 +5,21 @@ #include "type_utils.hpp" #include "Utils.hpp" +using std::string_view; using std::string; size_t LogTypeDictionaryEntry::get_placeholder_info( - size_t var_ix, - ir::VariablePlaceholder& var_placeholder + size_t placeholder_ix, + ir::VariablePlaceholder& placeholder ) const { - if (var_ix >= m_placeholder_positions.size()) { + if (placeholder_ix >= m_placeholder_positions.size()) { return SIZE_MAX; } - auto var_position = m_placeholder_positions[var_ix]; - var_placeholder = static_cast(m_value[var_position]); + auto var_position = m_placeholder_positions[placeholder_ix]; + placeholder = static_cast(m_value[var_position]); - return m_placeholder_positions[var_ix]; + return m_placeholder_positions[placeholder_ix]; } size_t LogTypeDictionaryEntry::get_data_size () const { @@ -54,29 +55,35 @@ void LogTypeDictionaryEntry::add_escape() { bool LogTypeDictionaryEntry::parse_next_var (const string& msg, size_t& var_begin_pos, size_t& var_end_pos, string& var) { auto last_var_end_pos = var_end_pos; - auto escape_handler = [&](std::string& logtype, [[maybe_unused]] char char_to_escape) -> void { + // clang-format off + auto escape_handler = [&]( + [[maybe_unused]] string_view constant, + [[maybe_unused]] size_t char_to_escape_pos, + string& logtype + ) -> void { m_placeholder_positions.push_back(logtype.size()); ++m_num_escaped_placeholders; logtype += enum_to_underlying_type(ir::VariablePlaceholder::Escape); }; + // clang-format on if (ir::get_bounds_of_next_var(msg, var_begin_pos, var_end_pos)) { // Append to log type: from end of last variable to start of current variable - auto constant = static_cast(msg).substr( + auto constant = static_cast(msg).substr( last_var_end_pos, var_begin_pos - last_var_end_pos ); - ir::append_constant_to_logtype(constant, m_value, escape_handler); + ir::append_constant_to_logtype(constant, escape_handler, m_value); var.assign(msg, var_begin_pos, var_end_pos - var_begin_pos); return true; } if (last_var_end_pos < msg.length()) { // Append to log type: from end of last variable to end - auto constant = static_cast(msg).substr( + auto constant = static_cast(msg).substr( last_var_end_pos, msg.length() - last_var_end_pos ); - ir::append_constant_to_logtype(constant, m_value, escape_handler); + ir::append_constant_to_logtype(constant, escape_handler, m_value); } return false; diff --git a/components/core/src/LogTypeDictionaryEntry.hpp b/components/core/src/LogTypeDictionaryEntry.hpp index b47f7009f..92f222440 100644 --- a/components/core/src/LogTypeDictionaryEntry.hpp +++ b/components/core/src/LogTypeDictionaryEntry.hpp @@ -72,7 +72,7 @@ class LogTypeDictionaryEntry : public DictionaryEntry { } /** - * @return The number of variable placeholders (included escaped ones) in the logtype. + * @return The number of variable placeholders (including escaped ones) in the logtype. */ size_t get_num_placeholders () const { return m_placeholder_positions.size(); } @@ -84,12 +84,12 @@ class LogTypeDictionaryEntry : public DictionaryEntry { } /** - * Gets all info about a placeholder in the logtype - * @param var_ix The index of the variable to get the info for - * @param var_placeholder - * @return The variable's position in the logtype, or SIZE_MAX if var_ix is out of bounds + * Gets all info about a variable placeholder in the logtype + * @param placeholder_ix The index of the placeholder to get the info for + * @param placeholder + * @return The placeholder's position in the logtype, or SIZE_MAX if var_ix is out of bounds */ - size_t get_placeholder_info (size_t var_ix, ir::VariablePlaceholder& var_placeholder) const; + size_t get_placeholder_info (size_t placeholder_ix, ir::VariablePlaceholder& placeholder) const; /** * Gets the size (in-memory) of the data contained in this entry diff --git a/components/core/src/ffi/search/Subquery.cpp b/components/core/src/ffi/search/Subquery.cpp index 7bdd2a11c..cb594bf08 100644 --- a/components/core/src/ffi/search/Subquery.cpp +++ b/components/core/src/ffi/search/Subquery.cpp @@ -13,7 +13,8 @@ Subquery::Subquery(string logtype_query, Subquery::QueryVari : m_logtype_query{std::move(logtype_query)}, m_logtype_query_contains_wildcards{false}, m_query_vars{std::move(variables)} { - // Determine if the query contains variables + // Determine if the query contains wildcards and record the positions of the + // variable placeholders. bool is_escaped{false}; auto const logtype_query_length{m_logtype_query.size()}; std::vector escaped_placeholder_positions; @@ -23,10 +24,9 @@ Subquery::Subquery(string logtype_query, Subquery::QueryVari char const c{m_logtype_query[idx]}; if (is_escaped) { is_escaped = false; - if (escape_char == c) { - continue; + if (ir::is_variable_placeholder(c)) { + escaped_placeholder_positions.push_back(idx); } - escaped_placeholder_positions.push_back(idx); } else if (escape_char == c) { is_escaped = true; } else if ((enum_to_underlying_type(WildcardType::ZeroOrMoreChars) == c @@ -35,22 +35,21 @@ Subquery::Subquery(string logtype_query, Subquery::QueryVari m_logtype_query_contains_wildcards = true; } } - if (false == m_logtype_query_contains_wildcards) { - return; - } - if (escaped_placeholder_positions.empty()) { + if (false == m_logtype_query_contains_wildcards || escaped_placeholder_positions.empty()) { return; } + + // Query contains wildcards and variable placeholders, so we need to add an + // additional escape for each variable placeholder. std::string double_escaped_logtype_query; - size_t curr_pos{0}; - for (auto const pos : escaped_placeholder_positions) { - double_escaped_logtype_query.append(m_logtype_query, curr_pos, pos - curr_pos); + size_t pos{0}; + for (auto const placeholder_pos : escaped_placeholder_positions) { + double_escaped_logtype_query.append(m_logtype_query, pos, placeholder_pos - pos); double_escaped_logtype_query += escape_char; - curr_pos = pos; + pos = placeholder_pos; } - if (logtype_query_length != curr_pos) { - double_escaped_logtype_query - .append(m_logtype_query, curr_pos, logtype_query_length - curr_pos); + if (logtype_query_length != pos) { + double_escaped_logtype_query.append(m_logtype_query, pos); } m_logtype_query = std::move(double_escaped_logtype_query); } diff --git a/components/core/src/ffi/search/query_methods.cpp b/components/core/src/ffi/search/query_methods.cpp index a09f15782..932792e8a 100644 --- a/components/core/src/ffi/search/query_methods.cpp +++ b/components/core/src/ffi/search/query_methods.cpp @@ -84,6 +84,17 @@ void generate_subqueries( tokenize_query(wildcard_query, tokens, composite_wildcard_token_indexes); bool all_interpretations_complete = false; + auto escape_handler + = [](string_view constant, size_t char_to_escape_pos, string& logtype) -> void { + auto const next_char_pos{char_to_escape_pos + 1}; + // NOTE: We don't want to add additional escapes for wildcards that have + // been escaped. E.g., the query "\\*" should remain unchanged. + if (ir::is_variable_placeholder(constant[char_to_escape_pos]) + || (next_char_pos < constant.length() && false == is_wildcard(constant[next_char_pos]))) + { + logtype += enum_to_underlying_type(ir::VariablePlaceholder::Escape); + } + }; string logtype_query; vector, WildcardToken>> query_vars; @@ -93,8 +104,9 @@ void generate_subqueries( size_t constant_begin_pos = 0; for (auto const& token : tokens) { auto begin_pos = std::visit(TokenGetBeginPos, token); - ir::escape_and_append_const_to_logtype( + ir::append_constant_to_logtype( wildcard_query.substr(constant_begin_pos, begin_pos - constant_begin_pos), + escape_handler, logtype_query ); @@ -116,8 +128,9 @@ void generate_subqueries( constant_begin_pos = std::visit(TokenGetEndPos, token); } - ir::escape_and_append_const_to_logtype( + ir::append_constant_to_logtype( wildcard_query.substr(constant_begin_pos), + escape_handler, logtype_query ); diff --git a/components/core/src/ir/parsing.cpp b/components/core/src/ir/parsing.cpp index 5f3563e41..619bc5e5b 100644 --- a/components/core/src/ir/parsing.cpp +++ b/components/core/src/ir/parsing.cpp @@ -4,6 +4,7 @@ #include "../type_utils.hpp" using std::string_view; +using std::string; namespace ir { /* @@ -86,10 +87,16 @@ bool get_bounds_of_next_var(string_view const str, size_t& begin_pos, size_t& en return (msg_length != begin_pos); } -void escape_and_append_const_to_logtype(std::string_view constant, std::string& logtype) { - auto escape_handler = [&](std::string& logtype, [[maybe_unused]] char char_to_escape) -> void { +void escape_and_append_const_to_logtype(string_view constant, string& logtype) { + // clang-format off + auto escape_handler = [&]( + [[maybe_unused]] string_view constant, + [[maybe_unused]] size_t char_to_escape_pos, + string& logtype + ) -> void { logtype += enum_to_underlying_type(ir::VariablePlaceholder::Escape); }; - append_constant_to_logtype(constant, logtype, escape_handler); + // clang-format on + append_constant_to_logtype(constant, escape_handler, logtype); } } // namespace ir diff --git a/components/core/src/ir/parsing.hpp b/components/core/src/ir/parsing.hpp index 2bb1049d7..64229d590 100644 --- a/components/core/src/ir/parsing.hpp +++ b/components/core/src/ir/parsing.hpp @@ -88,20 +88,23 @@ bool get_bounds_of_next_var(std::string_view str, size_t& begin_pos, size_t& end void escape_and_append_const_to_logtype(std::string_view constant, std::string& logtype); /** - * Appends the given constant to the logtype, escaping any variable placeholders - * by using the escape handler. - * @tparam EscapeHandler Method to append and track escape chars when escaping - * variable placeholders. Signature: - * (std::string& logtype, [[maybe_unused]] char char_to_escape) -> void + * Appends the given constant to the logtype, optionally escaping any variable + * placeholders found within the constant using the given handler. + * @tparam EscapeHandler Method to optionally escape any variable placeholders + * found within the constant. Signature: ( + * [[maybe_unused]] std::string_view constant, + * [[maybe_unused]] size_t char_to_escape_pos, + * std::string& logtype + * ) -> void * @param constant - * @param logtype * @param escape_handler + * @param logtype */ template void append_constant_to_logtype( std::string_view constant, - std::string& logtype, - EscapeHandler escape_handler + EscapeHandler escape_handler, + std::string& logtype ); } // namespace ir diff --git a/components/core/src/ir/parsing.inc b/components/core/src/ir/parsing.inc index d6b015ced..63e4a5250 100644 --- a/components/core/src/ir/parsing.inc +++ b/components/core/src/ir/parsing.inc @@ -10,8 +10,8 @@ namespace ir { template void append_constant_to_logtype( std::string_view constant, - std::string& logtype, - EscapeHandler escape_handler + EscapeHandler escape_handler, + std::string& logtype ) { size_t begin_pos = 0; auto constant_len = constant.length(); @@ -25,7 +25,7 @@ void append_constant_to_logtype( // NOTE: We don't need to append the character of interest // immediately since the next constant copy operation will get it begin_pos = i; - escape_handler(logtype, c); + escape_handler(constant, i, logtype); } logtype.append(constant, begin_pos, constant_len - begin_pos); } diff --git a/components/core/tests/test-query_methods.cpp b/components/core/tests/test-query_methods.cpp index 477d71f8b..e43db31d8 100644 --- a/components/core/tests/test-query_methods.cpp +++ b/components/core/tests/test-query_methods.cpp @@ -54,11 +54,72 @@ struct ExpectedSubquery { vector query_var_types; }; +namespace { +/** + * Generates subqueries from a given wildcard query and validates that they + * match the expected subqueries. + * @tparam encoded_var_t + * @param wildcard_query + * @param logtype_query_to_expected_subquery A map from expected logtype queries + * to expected subqueries. + */ +template +void test_generating_subqueries( + std::string const& wildcard_query, + std::unordered_map const& logtype_query_to_expected_subquery +) { + vector> subqueries; + generate_subqueries(wildcard_query, subqueries); + REQUIRE(subqueries.size() == logtype_query_to_expected_subquery.size()); + auto const map_end = logtype_query_to_expected_subquery.cend(); + for (auto const& subquery : subqueries) { + auto const& logtype_query = subquery.get_logtype_query(); + auto const& query_vars = subquery.get_query_vars(); + + auto idx = logtype_query_to_expected_subquery.find(logtype_query); + REQUIRE(map_end != idx); + auto const& expected_subquery = idx->second; + REQUIRE(subquery.logtype_query_contains_wildcards() + == expected_subquery.logtype_query_contains_wildcards); + auto const& expected_var_types = expected_subquery.query_var_types; + REQUIRE(expected_var_types.size() == query_vars.size()); + for (size_t i = 0; i < expected_var_types.size(); ++i) { + auto const& expected_var_type = expected_var_types[i]; + auto const& query_var = query_vars[i]; + + if (expected_var_type.is_exact) { + REQUIRE(std::holds_alternative>(query_var)); + auto const& exact_var = std::get>(query_var); + REQUIRE(expected_var_type.interpretation == exact_var.get_placeholder()); + } else { + REQUIRE(std::holds_alternative>(query_var)); + auto const& wildcard_var = std::get>(query_var); + switch (expected_var_type.interpretation) { + case VariablePlaceholder::Integer: + REQUIRE(TokenType::IntegerVariable + == wildcard_var.get_current_interpretation()); + break; + case VariablePlaceholder::Float: + REQUIRE(TokenType::FloatVariable + == wildcard_var.get_current_interpretation()); + break; + case VariablePlaceholder::Dictionary: + REQUIRE(TokenType::DictionaryVariable + == wildcard_var.get_current_interpretation()); + break; + default: + REQUIRE(false); + } + } + } + } +} +} // namespace + TEMPLATE_TEST_CASE("ffi::search::query_methods", "[ffi][search][query_methods]", eight_byte_encoded_variable_t, four_byte_encoded_variable_t) { using TestTypeExactVariableToken = ExactVariableToken; - using TestTypeWildcardVariableToken = WildcardToken; string wildcard_query; vector> subqueries; @@ -734,78 +795,36 @@ TEMPLATE_TEST_CASE("ffi::search::query_methods", "[ffi][search][query_methods]", expected_subquery); expected_subquery.clear(); - wildcard_query = "*abc*123?456?"; - generate_subqueries(wildcard_query, subqueries); - REQUIRE(subqueries.size() == logtype_query_to_expected_subquery.size()); - const auto map_end = logtype_query_to_expected_subquery.cend(); - for (const auto& subquery : subqueries) { - const auto& logtype_query = subquery.get_logtype_query(); - const auto& query_vars = subquery.get_query_vars(); - - auto idx = logtype_query_to_expected_subquery.find(logtype_query); - REQUIRE(map_end != idx); - const auto& expected_subquery = idx->second; - REQUIRE(subquery.logtype_query_contains_wildcards() - == expected_subquery.logtype_query_contains_wildcards); - const auto& expected_var_types = expected_subquery.query_var_types; - REQUIRE(expected_var_types.size() == query_vars.size()); - for (size_t i = 0; i < expected_var_types.size(); ++i) { - const auto& expected_var_type = expected_var_types[i]; - const auto& query_var = query_vars[i]; - - if (expected_var_type.is_exact) { - REQUIRE(std::holds_alternative(query_var)); - const auto& exact_var = std::get(query_var); - REQUIRE(expected_var_type.interpretation == exact_var.get_placeholder()); - } else { - REQUIRE(std::holds_alternative(query_var)); - const auto& wildcard_var = std::get(query_var); - switch (expected_var_type.interpretation) { - case VariablePlaceholder::Integer: - REQUIRE(TokenType::IntegerVariable - == wildcard_var.get_current_interpretation()); - break; - case VariablePlaceholder::Float: - REQUIRE(TokenType::FloatVariable - == wildcard_var.get_current_interpretation()); - break; - case VariablePlaceholder::Dictionary: - REQUIRE(TokenType::DictionaryVariable - == wildcard_var.get_current_interpretation()); - break; - default: - REQUIRE(false); - } - } - } - } + test_generating_subqueries("*abc*123?456?", logtype_query_to_expected_subquery); } // In the following wildcard query, `^Q` represents a char with the value of // VariablePlaceholder::Integer and `^R` represents a char with the value of // VariablePlaceholder::Dictionary. - SECTION("*escape ^Q placeholders ^R in subqueries*") { + SECTION("*escape ^Q placeholders ^R in \\? \\* subqueries*") { + std::string const prefix{"*escape"}; + std::string const inner_static_text{ std::string(" ") + enum_to_underlying_type(VariablePlaceholder::Integer) + " placeholders " + enum_to_underlying_type(VariablePlaceholder::Dictionary) - + " in "}; - std::string const double_escaped_static_text{ + + " in \\? \\* "}; + std::string const escaped_inner_static_text{ std::string(" ") + enum_to_underlying_type(VariablePlaceholder::Escape) + enum_to_underlying_type(VariablePlaceholder::Escape) + enum_to_underlying_type(VariablePlaceholder::Integer) + " placeholders " + enum_to_underlying_type(VariablePlaceholder::Escape) + enum_to_underlying_type(VariablePlaceholder::Escape) - + enum_to_underlying_type(VariablePlaceholder::Dictionary) + " in "}; - std::string const prefix{"*escape"}; + + enum_to_underlying_type(VariablePlaceholder::Dictionary) + " in \\? \\* "}; + std::string const postfix{"subqueries*"}; std::unordered_map logtype_query_to_expected_subquery; ExpectedSubquery expected_subquery; - // In the comments below, \d donates VariablePlaceholder::Dictionary - // Expected log type: "*escape \\^Q placeholders \\^R in subqueries*" + // In the comments below, \d denotes VariablePlaceholder::Dictionary + // Expected log type: "*escape \\^Q placeholders \\^R in \\? \\* subqueries*" expected_subquery.logtype_query = prefix; - expected_subquery.logtype_query += double_escaped_static_text; + expected_subquery.logtype_query += escaped_inner_static_text; expected_subquery.logtype_query += postfix; expected_subquery.logtype_query_contains_wildcards = true; logtype_query_to_expected_subquery.emplace( @@ -814,10 +833,10 @@ TEMPLATE_TEST_CASE("ffi::search::query_methods", "[ffi][search][query_methods]", ); expected_subquery.clear(); - // Expected log type: "*\d \\^Q placeholders \\^R in subqueries*" + // Expected log type: "*\d \\^Q placeholders \\^R in \\? \\* subqueries*" expected_subquery.logtype_query = "*"; expected_subquery.logtype_query += enum_to_underlying_type(VariablePlaceholder::Dictionary); - expected_subquery.logtype_query += double_escaped_static_text; + expected_subquery.logtype_query += escaped_inner_static_text; expected_subquery.logtype_query += postfix; expected_subquery.query_var_types.emplace_back(false, VariablePlaceholder::Dictionary); expected_subquery.logtype_query_contains_wildcards = true; @@ -827,9 +846,9 @@ TEMPLATE_TEST_CASE("ffi::search::query_methods", "[ffi][search][query_methods]", ); expected_subquery.clear(); - // Expected log type: "*escape \\^Q placeholders \\^R in \d*" + // Expected log type: "*escape \\^Q placeholders \\^R in \\? \\* \d*" expected_subquery.logtype_query = prefix; - expected_subquery.logtype_query += double_escaped_static_text; + expected_subquery.logtype_query += escaped_inner_static_text; expected_subquery.logtype_query += enum_to_underlying_type(VariablePlaceholder::Dictionary); expected_subquery.logtype_query += "*"; expected_subquery.query_var_types.emplace_back(false, VariablePlaceholder::Dictionary); @@ -840,10 +859,10 @@ TEMPLATE_TEST_CASE("ffi::search::query_methods", "[ffi][search][query_methods]", ); expected_subquery.clear(); - // Expected log type: "*\d \\^Q placeholders \\^R in \d*" + // Expected log type: "*\d \\^Q placeholders \\^R in \\? \\* \d*" expected_subquery.logtype_query = "*"; expected_subquery.logtype_query += enum_to_underlying_type(VariablePlaceholder::Dictionary); - expected_subquery.logtype_query += double_escaped_static_text; + expected_subquery.logtype_query += escaped_inner_static_text; expected_subquery.logtype_query += enum_to_underlying_type(VariablePlaceholder::Dictionary); expected_subquery.logtype_query += "*"; expected_subquery.query_var_types.emplace_back(false, VariablePlaceholder::Dictionary); @@ -856,48 +875,6 @@ TEMPLATE_TEST_CASE("ffi::search::query_methods", "[ffi][search][query_methods]", expected_subquery.clear(); wildcard_query = prefix + inner_static_text + postfix; - generate_subqueries(wildcard_query, subqueries); - - REQUIRE(subqueries.size() == logtype_query_to_expected_subquery.size()); - auto const map_end{logtype_query_to_expected_subquery.cend()}; - for (auto const& subquery : subqueries) { - auto const& logtype_query{subquery.get_logtype_query()}; - auto const& query_vars{subquery.get_query_vars()}; - - auto const idx{logtype_query_to_expected_subquery.find(logtype_query)}; - REQUIRE(map_end != idx); - auto const& expected_subquery{idx->second}; - REQUIRE(subquery.logtype_query_contains_wildcards() - == expected_subquery.logtype_query_contains_wildcards); - auto const& expected_var_types{expected_subquery.query_var_types}; - REQUIRE(expected_var_types.size() == query_vars.size()); - for (size_t i{0}; i < expected_var_types.size(); ++i) { - auto const& expected_var_type{expected_var_types[i]}; - auto const& query_var{query_vars[i]}; - - if (expected_var_type.is_exact) { - REQUIRE(false); - } else { - REQUIRE(std::holds_alternative(query_var)); - auto const& wildcard_var{std::get(query_var)}; - switch (expected_var_type.interpretation) { - case VariablePlaceholder::Integer: - REQUIRE(TokenType::IntegerVariable - == wildcard_var.get_current_interpretation()); - break; - case VariablePlaceholder::Float: - REQUIRE(TokenType::FloatVariable - == wildcard_var.get_current_interpretation()); - break; - case VariablePlaceholder::Dictionary: - REQUIRE(TokenType::DictionaryVariable - == wildcard_var.get_current_interpretation()); - break; - default: - REQUIRE(false); - } - } - } - } + test_generating_subqueries(wildcard_query, logtype_query_to_expected_subquery); } }