diff --git a/components/core/CMakeLists.txt b/components/core/CMakeLists.txt index 1cc7a43c3..503d8f122 100644 --- a/components/core/CMakeLists.txt +++ b/components/core/CMakeLists.txt @@ -7,6 +7,8 @@ if (NOT CMAKE_BUILD_TYPE AND NOT CMAKE_CONFIGURATION_TYPES) set(CMAKE_BUILD_TYPE "${default_build_type}" CACHE STRING "Choose the type of build." FORCE) endif() +set(CMAKE_EXPORT_COMPILE_COMMANDS ON) + # Set general compressor set(GENERAL_COMPRESSOR "zstd" CACHE STRING "The general-purpose compressor used as the 2nd-stage compressor") set_property(CACHE GENERAL_COMPRESSOR PROPERTY STRINGS passthrough zstd) @@ -253,6 +255,7 @@ set(SOURCE_FILES_clp src/ir/LogEventDeserializer.hpp src/ir/parsing.cpp src/ir/parsing.hpp + src/ir/parsing.inc src/ir/utils.cpp src/ir/utils.hpp src/LibarchiveFileReader.cpp @@ -436,6 +439,7 @@ set(SOURCE_FILES_clg src/ir/LogEvent.hpp src/ir/parsing.cpp src/ir/parsing.hpp + src/ir/parsing.inc src/LogTypeDictionaryEntry.cpp src/LogTypeDictionaryEntry.hpp src/LogTypeDictionaryReader.cpp @@ -594,6 +598,7 @@ set(SOURCE_FILES_clo src/ir/LogEvent.hpp src/ir/parsing.cpp src/ir/parsing.hpp + src/ir/parsing.inc src/LogTypeDictionaryEntry.cpp src/LogTypeDictionaryEntry.hpp src/LogTypeDictionaryReader.cpp @@ -797,6 +802,7 @@ set(SOURCE_FILES_unitTest src/ir/LogEventDeserializer.hpp src/ir/parsing.cpp src/ir/parsing.hpp + src/ir/parsing.inc src/ir/utils.cpp src/ir/utils.hpp src/LibarchiveFileReader.cpp diff --git a/components/core/src/EncodedVariableInterpreter.cpp b/components/core/src/EncodedVariableInterpreter.cpp index df241b3a4..e8b681708 100644 --- a/components/core/src/EncodedVariableInterpreter.cpp +++ b/components/core/src/EncodedVariableInterpreter.cpp @@ -273,7 +273,7 @@ void EncodedVariableInterpreter::encode_and_add_to_dictionary( encoded_vars.push_back(encoded_var); }; - ffi::ir_stream::generic_decode_message( + ffi::ir_stream::generic_decode_message( log_event.get_logtype(), log_event.get_encoded_vars(), log_event.get_dict_vars(), @@ -287,13 +287,12 @@ void EncodedVariableInterpreter::encode_and_add_to_dictionary( bool EncodedVariableInterpreter::decode_variables_into_message (const LogTypeDictionaryEntry& logtype_dict_entry, const VariableDictionaryReader& var_dict, const vector& encoded_vars, string& decompressed_msg) { - size_t num_vars_in_logtype = logtype_dict_entry.get_num_vars(); - // Ensure the number of variables in the logtype matches the number of encoded variables given const auto& logtype_value = logtype_dict_entry.get_value(); - if (num_vars_in_logtype != encoded_vars.size()) { + size_t const num_vars = logtype_dict_entry.get_num_variables(); + if (num_vars != encoded_vars.size()) { SPDLOG_ERROR("EncodedVariableInterpreter: Logtype '{}' contains {} variables, but {} were given for decoding.", logtype_value.c_str(), - num_vars_in_logtype, encoded_vars.size()); + num_vars, encoded_vars.size()); return false; } @@ -301,24 +300,27 @@ bool EncodedVariableInterpreter::decode_variables_into_message (const LogTypeDic size_t constant_begin_pos = 0; string float_str; variable_dictionary_id_t var_dict_id; - for (size_t i = 0; i < num_vars_in_logtype; ++i) { - size_t var_position = logtype_dict_entry.get_var_info(i, var_placeholder); + size_t const num_placeholders_in_logtype = logtype_dict_entry.get_num_placeholders(); + for (size_t placeholder_ix = 0, var_ix = 0; placeholder_ix < num_placeholders_in_logtype; ++placeholder_ix) { + size_t placeholder_position = logtype_dict_entry.get_placeholder_info(placeholder_ix, var_placeholder); - // Add the constant that's between the last variable and this one + // Add the constant that's between the last placeholder and this one decompressed_msg.append(logtype_value, constant_begin_pos, - var_position - constant_begin_pos); + placeholder_position - constant_begin_pos); switch (var_placeholder) { case ir::VariablePlaceholder::Integer: - decompressed_msg += std::to_string(encoded_vars[i]); + decompressed_msg += std::to_string(encoded_vars[var_ix++]); break; case ir::VariablePlaceholder::Float: - convert_encoded_float_to_string(encoded_vars[i], float_str); + convert_encoded_float_to_string(encoded_vars[var_ix++], float_str); decompressed_msg += float_str; break; case ir::VariablePlaceholder::Dictionary: - var_dict_id = decode_var_dict_id(encoded_vars[i]); + var_dict_id = decode_var_dict_id(encoded_vars[var_ix++]); decompressed_msg += var_dict.get_value(var_dict_id); break; + case ir::VariablePlaceholder::Escape: + break; default: SPDLOG_ERROR( "EncodedVariableInterpreter: Logtype '{}' contains " @@ -328,7 +330,7 @@ bool EncodedVariableInterpreter::decode_variables_into_message (const LogTypeDic return false; } // Move past the variable placeholder - constant_begin_pos = var_position + 1; + constant_begin_pos = placeholder_position + 1; } // Append remainder of logtype, if any if (constant_begin_pos < logtype_value.length()) { diff --git a/components/core/src/Grep.cpp b/components/core/src/Grep.cpp index 2ab4f7875..e533a4eea 100644 --- a/components/core/src/Grep.cpp +++ b/components/core/src/Grep.cpp @@ -338,9 +338,30 @@ SubQueryMatchabilityResult generate_logtypes_and_vars_for_subquery (const Archiv { size_t last_token_end_pos = 0; string logtype; + auto escape_handler + = [](std::string_view constant, size_t char_to_escape_pos, string& logtype) -> void { + auto const escape_char{enum_to_underlying_type(ir::VariablePlaceholder::Escape)}; + auto const next_char_pos{char_to_escape_pos + 1}; + // NOTE: We don't want to add additional escapes for wildcards that have + // been escaped. E.g., the query "\\*" should remain unchanged. + if (next_char_pos < constant.length() + && false == is_wildcard(constant[next_char_pos])) + { + logtype += escape_char; + } else if (ir::is_variable_placeholder(constant[char_to_escape_pos])) { + logtype += escape_char; + logtype += escape_char; + } + }; for (const auto& query_token : query_tokens) { // Append from end of last token to beginning of this token, to logtype - logtype.append(processed_search_string, last_token_end_pos, query_token.get_begin_pos() - last_token_end_pos); + ir::append_constant_to_logtype( + static_cast(processed_search_string) + .substr(last_token_end_pos, + query_token.get_begin_pos() - last_token_end_pos), + escape_handler, + logtype + ); last_token_end_pos = query_token.get_end_pos(); if (query_token.is_wildcard()) { @@ -358,7 +379,10 @@ SubQueryMatchabilityResult generate_logtypes_and_vars_for_subquery (const Archiv } } else { if (!query_token.is_var()) { - logtype += query_token.get_value(); + ir::append_constant_to_logtype( + query_token.get_value(), escape_handler, + logtype + ); } else if (!process_var_token(query_token, archive, ignore_case, sub_query, logtype)) { return SubQueryMatchabilityResult::WontMatch; } @@ -367,7 +391,12 @@ SubQueryMatchabilityResult generate_logtypes_and_vars_for_subquery (const Archiv if (last_token_end_pos < processed_search_string.length()) { // Append from end of last token to end - logtype.append(processed_search_string, last_token_end_pos, string::npos); + ir::append_constant_to_logtype( + static_cast(processed_search_string) + .substr(last_token_end_pos, string::npos), + escape_handler, + logtype + ); last_token_end_pos = processed_search_string.length(); } diff --git a/components/core/src/LogTypeDictionaryEntry.cpp b/components/core/src/LogTypeDictionaryEntry.cpp index 1d6ec4a7c..44c3a6c04 100644 --- a/components/core/src/LogTypeDictionaryEntry.cpp +++ b/components/core/src/LogTypeDictionaryEntry.cpp @@ -5,25 +5,26 @@ #include "type_utils.hpp" #include "Utils.hpp" +using std::string_view; using std::string; -size_t LogTypeDictionaryEntry::get_var_info( - size_t var_ix, - ir::VariablePlaceholder& var_placeholder +size_t LogTypeDictionaryEntry::get_placeholder_info( + size_t placeholder_ix, + ir::VariablePlaceholder& placeholder ) const { - if (var_ix >= m_var_positions.size()) { + if (placeholder_ix >= m_placeholder_positions.size()) { return SIZE_MAX; } - auto var_position = m_var_positions[var_ix]; - var_placeholder = static_cast(m_value[var_position]); + auto var_position = m_placeholder_positions[placeholder_ix]; + placeholder = static_cast(m_value[var_position]); - return m_var_positions[var_ix]; + return m_placeholder_positions[placeholder_ix]; } size_t LogTypeDictionaryEntry::get_data_size () const { // NOTE: sizeof(vector[0]) is executed at compile time so there's no risk of an exception at runtime - return sizeof(m_id) + m_value.length() + m_var_positions.size() * sizeof(m_var_positions[0]) + + return sizeof(m_id) + m_value.length() + m_placeholder_positions.size() * sizeof(m_placeholder_positions[0]) + m_ids_of_segments_containing_entry.size() * sizeof(segment_id_t); } @@ -32,32 +33,57 @@ void LogTypeDictionaryEntry::add_constant (const string& value_containing_consta } void LogTypeDictionaryEntry::add_dictionary_var () { - m_var_positions.push_back(m_value.length()); + m_placeholder_positions.push_back(m_value.length()); add_dict_var(m_value); } void LogTypeDictionaryEntry::add_int_var () { - m_var_positions.push_back(m_value.length()); + m_placeholder_positions.push_back(m_value.length()); add_int_var(m_value); } void LogTypeDictionaryEntry::add_float_var () { - m_var_positions.push_back(m_value.length()); + m_placeholder_positions.push_back(m_value.length()); add_float_var(m_value); } +void LogTypeDictionaryEntry::add_escape() { + m_placeholder_positions.push_back(m_value.length()); + add_escape(m_value); + ++m_num_escaped_placeholders; +} + bool LogTypeDictionaryEntry::parse_next_var (const string& msg, size_t& var_begin_pos, size_t& var_end_pos, string& var) { auto last_var_end_pos = var_end_pos; + // clang-format off + auto escape_handler = [&]( + [[maybe_unused]] string_view constant, + [[maybe_unused]] size_t char_to_escape_pos, + string& logtype + ) -> void { + m_placeholder_positions.push_back(logtype.size()); + ++m_num_escaped_placeholders; + logtype += enum_to_underlying_type(ir::VariablePlaceholder::Escape); + }; + // clang-format on if (ir::get_bounds_of_next_var(msg, var_begin_pos, var_end_pos)) { // Append to log type: from end of last variable to start of current variable - add_constant(msg, last_var_end_pos, var_begin_pos - last_var_end_pos); + auto constant = static_cast(msg).substr( + last_var_end_pos, + var_begin_pos - last_var_end_pos + ); + ir::append_constant_to_logtype(constant, escape_handler, m_value); var.assign(msg, var_begin_pos, var_end_pos - var_begin_pos); return true; } if (last_var_end_pos < msg.length()) { // Append to log type: from end of last variable to end - add_constant(msg, last_var_end_pos, msg.length() - last_var_end_pos); + auto constant = static_cast(msg).substr( + last_var_end_pos, + msg.length() - last_var_end_pos + ); + ir::append_constant_to_logtype(constant, escape_handler, m_value); } return false; @@ -65,16 +91,15 @@ bool LogTypeDictionaryEntry::parse_next_var (const string& msg, size_t& var_begi void LogTypeDictionaryEntry::clear () { m_value.clear(); - m_var_positions.clear(); + m_placeholder_positions.clear(); + m_num_escaped_placeholders = 0; } void LogTypeDictionaryEntry::write_to_file (streaming_compression::Compressor& compressor) const { compressor.write_numeric_value(m_id); - string escaped_value; - get_value_with_unfounded_variables_escaped(escaped_value); - compressor.write_numeric_value(escaped_value.length()); - compressor.write_string(escaped_value); + compressor.write_numeric_value(m_value.length()); + compressor.write_string(m_value); } ErrorCode LogTypeDictionaryEntry::try_read_from_file (streaming_compression::Decompressor& decompressor) { @@ -107,8 +132,11 @@ ErrorCode LogTypeDictionaryEntry::try_read_from_file (streaming_compression::Dec if (is_escaped) { constant += c; is_escaped = false; - } else if (ir::cVariablePlaceholderEscapeCharacter == c) { + } else if (enum_to_underlying_type(ir::VariablePlaceholder::Escape) == c) { is_escaped = true; + add_constant(constant, 0, constant.length()); + constant.clear(); + add_escape(); } else { if (enum_to_underlying_type(ir::VariablePlaceholder::Integer) == c) { add_constant(constant, 0, constant.length()); @@ -141,27 +169,3 @@ void LogTypeDictionaryEntry::read_from_file (streaming_compression::Decompressor throw OperationFailed(error_code, __FILENAME__, __LINE__); } } - -void LogTypeDictionaryEntry::get_value_with_unfounded_variables_escaped (string& escaped_logtype_value) const { - auto value_view = static_cast(m_value); - size_t begin_ix = 0; - // Reset escaped value and reserve enough space to at least contain the whole value - escaped_logtype_value.clear(); - escaped_logtype_value.reserve(value_view.length()); - for (auto var_position : m_var_positions) { - size_t end_ix = var_position; - - ir::escape_and_append_constant_to_logtype( - value_view.substr(begin_ix, end_ix - begin_ix), - escaped_logtype_value - ); - - // Add variable placeholder - escaped_logtype_value += value_view[end_ix]; - - // Move begin to start of next portion of logtype between variables - begin_ix = end_ix + 1; - } - // Escape any variable placeholders in remainder of value - ir::escape_and_append_constant_to_logtype(value_view.substr(begin_ix), escaped_logtype_value); -} diff --git a/components/core/src/LogTypeDictionaryEntry.hpp b/components/core/src/LogTypeDictionaryEntry.hpp index aade2fbad..92f222440 100644 --- a/components/core/src/LogTypeDictionaryEntry.hpp +++ b/components/core/src/LogTypeDictionaryEntry.hpp @@ -63,15 +63,33 @@ class LogTypeDictionaryEntry : public DictionaryEntry { static void add_float_var (std::string& logtype) { logtype += enum_to_underlying_type(ir::VariablePlaceholder::Float); } + /** + * Adds an escape character to the given logtype + * @param logtype + */ + static void add_escape (std::string& logtype) { + logtype += enum_to_underlying_type(ir::VariablePlaceholder::Escape); + } - size_t get_num_vars () const { return m_var_positions.size(); } /** - * Gets all info about a variable in the logtype - * @param var_ix The index of the variable to get the info for - * @param var_placeholder - * @return The variable's position in the logtype, or SIZE_MAX if var_ix is out of bounds + * @return The number of variable placeholders (including escaped ones) in the logtype. + */ + size_t get_num_placeholders () const { return m_placeholder_positions.size(); } + + /** + * @return The number of variable placeholders (excluding escaped ones) in the logtype. + */ + size_t get_num_variables () const { + return m_placeholder_positions.size() - m_num_escaped_placeholders; + } + + /** + * Gets all info about a variable placeholder in the logtype + * @param placeholder_ix The index of the placeholder to get the info for + * @param placeholder + * @return The placeholder's position in the logtype, or SIZE_MAX if var_ix is out of bounds */ - size_t get_var_info (size_t var_ix, ir::VariablePlaceholder& var_placeholder) const; + size_t get_placeholder_info (size_t placeholder_ix, ir::VariablePlaceholder& placeholder) const; /** * Gets the size (in-memory) of the data contained in this entry @@ -98,6 +116,10 @@ class LogTypeDictionaryEntry : public DictionaryEntry { * Adds a dictionary variable placeholder */ void add_dictionary_var (); + /** + * Adds an escape character + */ + void add_escape (); /** * Parses next variable from a message, constructing the constant part of the message's logtype as well @@ -137,16 +159,9 @@ class LogTypeDictionaryEntry : public DictionaryEntry { void read_from_file (streaming_compression::Decompressor& decompressor); private: - // Methods - /** - * Escapes any variable placeholders that don't correspond to the positions - * of variables in the logtype entry's value - * @param escaped_logtype_value - */ - void get_value_with_unfounded_variables_escaped (std::string& escaped_logtype_value) const; - // Variables - std::vector m_var_positions; + std::vector m_placeholder_positions; + size_t m_num_escaped_placeholders{0}; }; #endif // LOGTYPEDICTIONARYENTRY_HPP diff --git a/components/core/src/ffi/encoding_methods.hpp b/components/core/src/ffi/encoding_methods.hpp index 77de54b0d..121cc0304 100644 --- a/components/core/src/ffi/encoding_methods.hpp +++ b/components/core/src/ffi/encoding_methods.hpp @@ -171,7 +171,7 @@ std::string decode_integer_var(encoded_variable_t encoded_var); * components of the message. * @tparam encoded_variable_t Type of the encoded variable * @tparam ConstantHandler Method to handle constants. Signature: - * (std::string_view constant, std::string& logtype) -> bool + * (std::string_view constant, std::string& logtype) -> void * @tparam EncodedVariableHandler Method to handle encoded variables. * Signature: (encoded_variable_t) -> void * @tparam DictionaryVariableHandler Method to handle dictionary variables. diff --git a/components/core/src/ffi/encoding_methods.inc b/components/core/src/ffi/encoding_methods.inc index b93bab275..f5c675d56 100644 --- a/components/core/src/ffi/encoding_methods.inc +++ b/components/core/src/ffi/encoding_methods.inc @@ -368,9 +368,7 @@ bool encode_message_generically( logtype.reserve(message.length()); while (ir::get_bounds_of_next_var(message, var_begin_pos, var_end_pos)) { std::string_view constant{&message[constant_begin_pos], var_begin_pos - constant_begin_pos}; - if (false == constant_handler(constant, logtype)) { - return false; - } + constant_handler(constant, logtype); constant_begin_pos = var_end_pos; // Encode the variable @@ -394,9 +392,7 @@ bool encode_message_generically( std::string_view constant{ &message[constant_begin_pos], message.length() - constant_begin_pos}; - if (false == constant_handler(constant, logtype)) { - return false; - } + constant_handler(constant, logtype); } return true; @@ -409,20 +405,6 @@ bool encode_message( std::vector& encoded_vars, std::vector& dictionary_var_bounds ) { - auto constant_handler = [](std::string_view constant, std::string& logtype) { - // Ensure constant doesn't contain a variable placeholder - bool contains_variable_placeholder = std::any_of( - constant.cbegin(), - constant.cend(), - ir::is_variable_placeholder - ); - if (contains_variable_placeholder) { - return false; - } - - logtype.append(constant); - return true; - }; auto encoded_variable_handler = [&encoded_vars](encoded_variable_t encoded_variable) { encoded_vars.push_back(encoded_variable); }; @@ -441,7 +423,7 @@ bool encode_message( == encode_message_generically( message, logtype, - constant_handler, + ir::escape_and_append_const_to_logtype, encoded_variable_handler, dictionary_variable_handler )) diff --git a/components/core/src/ffi/ir_stream/decoding_methods.cpp b/components/core/src/ffi/ir_stream/decoding_methods.cpp index 3ceb05ecc..88fef3509 100644 --- a/components/core/src/ffi/ir_stream/decoding_methods.cpp +++ b/components/core/src/ffi/ir_stream/decoding_methods.cpp @@ -295,7 +295,7 @@ generic_decode_next_message(ReaderInterface& reader, string& message, epoch_time auto dict_var_handler = [&](string const& dict_var) { message.append(dict_var); }; try { - generic_decode_message( + generic_decode_message( logtype, encoded_vars, dict_vars, diff --git a/components/core/src/ffi/ir_stream/decoding_methods.hpp b/components/core/src/ffi/ir_stream/decoding_methods.hpp index 000dee70f..f14505e10 100644 --- a/components/core/src/ffi/ir_stream/decoding_methods.hpp +++ b/components/core/src/ffi/ir_stream/decoding_methods.hpp @@ -81,6 +81,8 @@ auto deserialize_ir_message( /** * Decodes the IR message calls the given methods to handle each component of * the message + * @tparam unescape_logtype Whether to remove the escape characters from the + * logtype before calling \p ConstantHandler * @tparam encoded_variable_t Type of the encoded variable * @tparam ConstantHandler Method to handle constants in the logtype. * Signature: (const std::string&, size_t, size_t) -> void @@ -100,6 +102,7 @@ auto deserialize_ir_message( * @throw DecodingException if the message can not be decoded properly */ template < + bool unescape_logtype, typename encoded_variable_t, typename ConstantHandler, typename EncodedIntHandler, diff --git a/components/core/src/ffi/ir_stream/decoding_methods.inc b/components/core/src/ffi/ir_stream/decoding_methods.inc index 139688388..cad66f901 100644 --- a/components/core/src/ffi/ir_stream/decoding_methods.inc +++ b/components/core/src/ffi/ir_stream/decoding_methods.inc @@ -11,6 +11,7 @@ namespace ffi::ir_stream { template < + bool unescape_logtype, typename encoded_variable_t, typename ConstantHandler, typename EncodedIntHandler, @@ -98,7 +99,7 @@ void generic_decode_message( break; } - case ir::cVariablePlaceholderEscapeCharacter: { + case enum_to_underlying_type(ir::VariablePlaceholder::Escape): { // Ensure the escape character is followed by a // character that's being escaped if (cur_pos == logtype_length - 1) { @@ -109,14 +110,17 @@ void generic_decode_message( cUnexpectedEscapeCharacterMessage ); } - constant_handler( - logtype, - next_static_text_begin_pos, - cur_pos - next_static_text_begin_pos - ); - // Skip the escape character - next_static_text_begin_pos = cur_pos + 1; + if constexpr (unescape_logtype) { + constant_handler( + logtype, + next_static_text_begin_pos, + cur_pos - next_static_text_begin_pos + ); + + // Skip the escape character + next_static_text_begin_pos = cur_pos + 1; + } // The character after the escape character is static text // (regardless of whether it is a variable placeholder), so // increment cur_pos by 1 to ensure we don't process the diff --git a/components/core/src/ffi/ir_stream/encoding_methods.cpp b/components/core/src/ffi/ir_stream/encoding_methods.cpp index 35c766aef..4ef749405 100644 --- a/components/core/src/ffi/ir_stream/encoding_methods.cpp +++ b/components/core/src/ffi/ir_stream/encoding_methods.cpp @@ -51,14 +51,6 @@ static void add_base_metadata_fields( nlohmann::json& metadata ); -/** - * Appends a constant to the logtype, escaping any variable placeholders - * @param constant - * @param logtype - * @return true - */ -static bool append_constant_to_logtype(string_view constant, string& logtype); - /** * A functor for encoding dictionary variables in a message */ @@ -162,11 +154,6 @@ static void add_base_metadata_fields( metadata[cProtocol::Metadata::TimeZoneIdKey] = time_zone_id; } -static bool append_constant_to_logtype(string_view constant, string& logtype) { - ir::escape_and_append_constant_to_logtype(constant, logtype); - return true; -} - namespace eight_byte_encoding { bool encode_preamble( string_view timestamp_pattern, @@ -206,7 +193,7 @@ namespace eight_byte_encoding { == encode_message_generically( message, logtype, - append_constant_to_logtype, + ir::escape_and_append_const_to_logtype, encoded_var_handler, DictionaryVariableHandler(ir_buf) )) @@ -280,7 +267,7 @@ namespace four_byte_encoding { == encode_message_generically( message, logtype, - append_constant_to_logtype, + ir::escape_and_append_const_to_logtype, encoded_var_handler, DictionaryVariableHandler(ir_buf) )) diff --git a/components/core/src/ffi/search/Subquery.cpp b/components/core/src/ffi/search/Subquery.cpp index 448521c4a..cb594bf08 100644 --- a/components/core/src/ffi/search/Subquery.cpp +++ b/components/core/src/ffi/search/Subquery.cpp @@ -1,5 +1,6 @@ #include "Subquery.hpp" +#include "../../ir/parsing.hpp" #include "QueryWildcard.hpp" using std::string; @@ -9,23 +10,48 @@ using std::vector; namespace ffi::search { template Subquery::Subquery(string logtype_query, Subquery::QueryVariables variables) - : m_logtype_query(std::move(logtype_query)), - m_logtype_query_contains_wildcards(false), - m_query_vars(variables) { - // Determine if the query contains variables - bool is_escaped = false; - for (auto const c : m_logtype_query) { + : m_logtype_query{std::move(logtype_query)}, + m_logtype_query_contains_wildcards{false}, + m_query_vars{std::move(variables)} { + // Determine if the query contains wildcards and record the positions of the + // variable placeholders. + bool is_escaped{false}; + auto const logtype_query_length{m_logtype_query.size()}; + std::vector escaped_placeholder_positions; + escaped_placeholder_positions.reserve(logtype_query_length / 2); + auto const escape_char{enum_to_underlying_type(ir::VariablePlaceholder::Escape)}; + for (size_t idx = 0; idx < logtype_query_length; ++idx) { + char const c{m_logtype_query[idx]}; if (is_escaped) { is_escaped = false; - } else if ('\\' == c) { + if (ir::is_variable_placeholder(c)) { + escaped_placeholder_positions.push_back(idx); + } + } else if (escape_char == c) { is_escaped = true; } else if ((enum_to_underlying_type(WildcardType::ZeroOrMoreChars) == c || enum_to_underlying_type(WildcardType::AnyChar) == c)) { m_logtype_query_contains_wildcards = true; - break; } } + if (false == m_logtype_query_contains_wildcards || escaped_placeholder_positions.empty()) { + return; + } + + // Query contains wildcards and variable placeholders, so we need to add an + // additional escape for each variable placeholder. + std::string double_escaped_logtype_query; + size_t pos{0}; + for (auto const placeholder_pos : escaped_placeholder_positions) { + double_escaped_logtype_query.append(m_logtype_query, pos, placeholder_pos - pos); + double_escaped_logtype_query += escape_char; + pos = placeholder_pos; + } + if (logtype_query_length != pos) { + double_escaped_logtype_query.append(m_logtype_query, pos); + } + m_logtype_query = std::move(double_escaped_logtype_query); } // Explicitly declare specializations to avoid having to validate that the diff --git a/components/core/src/ffi/search/query_methods.cpp b/components/core/src/ffi/search/query_methods.cpp index e343e78fc..932792e8a 100644 --- a/components/core/src/ffi/search/query_methods.cpp +++ b/components/core/src/ffi/search/query_methods.cpp @@ -84,6 +84,17 @@ void generate_subqueries( tokenize_query(wildcard_query, tokens, composite_wildcard_token_indexes); bool all_interpretations_complete = false; + auto escape_handler + = [](string_view constant, size_t char_to_escape_pos, string& logtype) -> void { + auto const next_char_pos{char_to_escape_pos + 1}; + // NOTE: We don't want to add additional escapes for wildcards that have + // been escaped. E.g., the query "\\*" should remain unchanged. + if (ir::is_variable_placeholder(constant[char_to_escape_pos]) + || (next_char_pos < constant.length() && false == is_wildcard(constant[next_char_pos]))) + { + logtype += enum_to_underlying_type(ir::VariablePlaceholder::Escape); + } + }; string logtype_query; vector, WildcardToken>> query_vars; @@ -93,8 +104,11 @@ void generate_subqueries( size_t constant_begin_pos = 0; for (auto const& token : tokens) { auto begin_pos = std::visit(TokenGetBeginPos, token); - logtype_query - .append(wildcard_query, constant_begin_pos, begin_pos - constant_begin_pos); + ir::append_constant_to_logtype( + wildcard_query.substr(constant_begin_pos, begin_pos - constant_begin_pos), + escape_handler, + logtype_query + ); std::visit( overloaded{ @@ -114,7 +128,11 @@ void generate_subqueries( constant_begin_pos = std::visit(TokenGetEndPos, token); } - logtype_query.append(wildcard_query, constant_begin_pos); + ir::append_constant_to_logtype( + wildcard_query.substr(constant_begin_pos), + escape_handler, + logtype_query + ); // Save sub-query if it's unique bool sub_query_exists = false; diff --git a/components/core/src/ir/parsing.cpp b/components/core/src/ir/parsing.cpp index 940252671..619bc5e5b 100644 --- a/components/core/src/ir/parsing.cpp +++ b/components/core/src/ir/parsing.cpp @@ -4,6 +4,7 @@ #include "../type_utils.hpp" using std::string_view; +using std::string; namespace ir { /* @@ -86,19 +87,16 @@ bool get_bounds_of_next_var(string_view const str, size_t& begin_pos, size_t& en return (msg_length != begin_pos); } -void escape_and_append_constant_to_logtype(string_view constant, std::string& logtype) { - size_t begin_pos = 0; - auto constant_len = constant.length(); - for (size_t i = 0; i < constant_len; ++i) { - auto c = constant[i]; - if (cVariablePlaceholderEscapeCharacter == c || is_variable_placeholder(c)) { - logtype.append(constant, begin_pos, i - begin_pos); - logtype += ir::cVariablePlaceholderEscapeCharacter; - // NOTE: We don't need to append the character of interest - // immediately since the next constant copy operation will get it - begin_pos = i; - } - } - logtype.append(constant, begin_pos, constant_len - begin_pos); +void escape_and_append_const_to_logtype(string_view constant, string& logtype) { + // clang-format off + auto escape_handler = [&]( + [[maybe_unused]] string_view constant, + [[maybe_unused]] size_t char_to_escape_pos, + string& logtype + ) -> void { + logtype += enum_to_underlying_type(ir::VariablePlaceholder::Escape); + }; + // clang-format on + append_constant_to_logtype(constant, escape_handler, logtype); } } // namespace ir diff --git a/components/core/src/ir/parsing.hpp b/components/core/src/ir/parsing.hpp index d4a738366..64229d590 100644 --- a/components/core/src/ir/parsing.hpp +++ b/components/core/src/ir/parsing.hpp @@ -11,16 +11,16 @@ */ #include +#include namespace ir { enum class VariablePlaceholder : char { Integer = 0x11, Dictionary = 0x12, Float = 0x13, + Escape = '\\', }; -constexpr char cVariablePlaceholderEscapeCharacter = '\\'; - /** * Checks if the given character is a delimiter * We treat everything *except* the following quoted characters as a @@ -81,11 +81,32 @@ bool is_var(std::string_view value); bool get_bounds_of_next_var(std::string_view str, size_t& begin_pos, size_t& end_pos); /** - * Appends the given constant to the logtype, escaping any variable placeholders + * Appends a constant to the logtype, escaping any variable placeholders. + * @param constant + * @param logtype +*/ +void escape_and_append_const_to_logtype(std::string_view constant, std::string& logtype); + +/** + * Appends the given constant to the logtype, optionally escaping any variable + * placeholders found within the constant using the given handler. + * @tparam EscapeHandler Method to optionally escape any variable placeholders + * found within the constant. Signature: ( + * [[maybe_unused]] std::string_view constant, + * [[maybe_unused]] size_t char_to_escape_pos, + * std::string& logtype + * ) -> void * @param constant + * @param escape_handler * @param logtype */ -void escape_and_append_constant_to_logtype(std::string_view constant, std::string& logtype); +template +void append_constant_to_logtype( + std::string_view constant, + EscapeHandler escape_handler, + std::string& logtype +); } // namespace ir +#include "parsing.inc" #endif // IR_PARSING_HPP diff --git a/components/core/src/ir/parsing.inc b/components/core/src/ir/parsing.inc new file mode 100644 index 000000000..63e4a5250 --- /dev/null +++ b/components/core/src/ir/parsing.inc @@ -0,0 +1,33 @@ +#ifndef IR_PARSING_INC +#define IR_PARSING_INC + +#include +#include + +#include "../type_utils.hpp" + +namespace ir { +template +void append_constant_to_logtype( + std::string_view constant, + EscapeHandler escape_handler, + std::string& logtype +) { + size_t begin_pos = 0; + auto constant_len = constant.length(); + for (size_t i = 0; i < constant_len; ++i) { + auto const c = constant[i]; + bool const is_escape_char = (enum_to_underlying_type(VariablePlaceholder::Escape) == c); + if (false == is_escape_char && false == is_variable_placeholder(c)) { + continue; + } + logtype.append(constant, begin_pos, i - begin_pos); + // NOTE: We don't need to append the character of interest + // immediately since the next constant copy operation will get it + begin_pos = i; + escape_handler(constant, i, logtype); + } + logtype.append(constant, begin_pos, constant_len - begin_pos); +} +} // namespace ir +#endif diff --git a/components/core/src/streaming_archive/reader/File.cpp b/components/core/src/streaming_archive/reader/File.cpp index 346585fbc..afd0f048c 100644 --- a/components/core/src/streaming_archive/reader/File.cpp +++ b/components/core/src/streaming_archive/reader/File.cpp @@ -212,7 +212,7 @@ namespace streaming_archive::reader { // Get number of variables in logtype const auto& logtype_dictionary_entry = m_archive_logtype_dict->get_entry(logtype_id); - auto num_vars = logtype_dictionary_entry.get_num_vars(); + auto const num_vars = logtype_dictionary_entry.get_num_variables(); auto timestamp = m_timestamps[m_msgs_ix]; if (search_begin_timestamp <= timestamp && timestamp <= search_end_timestamp) { @@ -253,7 +253,7 @@ namespace streaming_archive::reader { // Get number of variables in logtype const auto& logtype_dictionary_entry = m_archive_logtype_dict->get_entry(logtype_id); - auto num_vars = logtype_dictionary_entry.get_num_vars(); + auto const num_vars = logtype_dictionary_entry.get_num_variables(); for (auto sub_query : query.get_relevant_sub_queries()) { // Check if logtype matches search @@ -315,7 +315,7 @@ namespace streaming_archive::reader { // Get variables msg.clear_vars(); const auto& logtype_dictionary_entry = m_archive_logtype_dict->get_entry(logtype_id); - auto num_vars = logtype_dictionary_entry.get_num_vars(); + auto const num_vars = logtype_dictionary_entry.get_num_variables(); if (m_variables_ix + num_vars > m_num_variables) { return false; } diff --git a/components/core/src/utils/make_dictionaries_readable/make-dictionaries-readable.cpp b/components/core/src/utils/make_dictionaries_readable/make-dictionaries-readable.cpp index d1785038f..c6c4d32e4 100644 --- a/components/core/src/utils/make_dictionaries_readable/make-dictionaries-readable.cpp +++ b/components/core/src/utils/make_dictionaries_readable/make-dictionaries-readable.cpp @@ -66,12 +66,12 @@ int main (int argc, const char* argv[]) { human_readable_value.clear(); size_t constant_begin_pos = 0; - for (size_t var_ix = 0; var_ix < entry.get_num_vars(); ++var_ix) { + for (size_t placeholder_ix = 0; placeholder_ix < entry.get_num_placeholders(); ++placeholder_ix) { ir::VariablePlaceholder var_placeholder; - size_t var_pos = entry.get_var_info(var_ix, var_placeholder); + size_t const placeholder_pos = entry.get_placeholder_info(placeholder_ix, var_placeholder); // Add the constant that's between the last variable and this one, with newlines escaped - human_readable_value.append(value, constant_begin_pos, var_pos - constant_begin_pos); + human_readable_value.append(value, constant_begin_pos, placeholder_pos - constant_begin_pos); switch (var_placeholder) { case ir::VariablePlaceholder::Integer: @@ -83,13 +83,15 @@ int main (int argc, const char* argv[]) { case ir::VariablePlaceholder::Dictionary: human_readable_value += "\\d"; break; + case ir::VariablePlaceholder::Escape: + break; default: SPDLOG_ERROR("Logtype '{}' contains unexpected variable placeholder 0x{:x}", value, enum_to_underlying_type(var_placeholder)); return -1; } // Move past the variable placeholder - constant_begin_pos = var_pos + 1; + constant_begin_pos = placeholder_pos + 1; } // Append remainder of value, if any if (constant_begin_pos < value.length()) { diff --git a/components/core/tests/test-EncodedVariableInterpreter.cpp b/components/core/tests/test-EncodedVariableInterpreter.cpp index c95971204..5e494d022 100644 --- a/components/core/tests/test-EncodedVariableInterpreter.cpp +++ b/components/core/tests/test-EncodedVariableInterpreter.cpp @@ -6,6 +6,7 @@ // Project headers #include "../src/EncodedVariableInterpreter.hpp" +#include "../src/ir/parsing.hpp" #include "../src/streaming_archive/Constants.hpp" using std::string; @@ -235,7 +236,11 @@ TEST_CASE("EncodedVariableInterpreter", "[EncodedVariableInterpreter]") { " and a very large int " + var_strs[1] + " and a double " + var_strs[2] + " and a weird double " + var_strs[3] + - " and a str with numbers " + var_strs[4]; + " and a str with numbers " + var_strs[4] + + " and an escape " + enum_to_underlying_type(ir::VariablePlaceholder::Escape) + + " and an int placeholder " + enum_to_underlying_type(ir::VariablePlaceholder::Integer) + + " and a float placeholder " + enum_to_underlying_type(ir::VariablePlaceholder::Float) + + " and a dictionary placeholder " + enum_to_underlying_type(ir::VariablePlaceholder::Dictionary); LogTypeDictionaryEntry logtype_dict_entry; EncodedVariableInterpreter::encode_and_add_to_dictionary(msg, logtype_dict_entry, @@ -246,10 +251,10 @@ TEST_CASE("EncodedVariableInterpreter", "[EncodedVariableInterpreter]") { // Test var_ids is correctly populated size_t encoded_var_id_ix = 0; ir::VariablePlaceholder var_placeholder; - for (auto var_ix = 0; var_ix < logtype_dict_entry.get_num_vars(); var_ix++) { - std::ignore = logtype_dict_entry.get_var_info(var_ix, var_placeholder); + for (auto placeholder_ix = 0; placeholder_ix < logtype_dict_entry.get_num_placeholders(); placeholder_ix++) { + std::ignore = logtype_dict_entry.get_placeholder_info(placeholder_ix, var_placeholder); if (ir::VariablePlaceholder::Dictionary == var_placeholder) { - auto var = encoded_vars[var_ix]; + auto var = encoded_vars[placeholder_ix]; REQUIRE(var_ids.size() > encoded_var_id_ix); REQUIRE(EncodedVariableInterpreter::decode_var_dict_id(var) == var_ids[encoded_var_id_ix]); @@ -275,6 +280,18 @@ TEST_CASE("EncodedVariableInterpreter", "[EncodedVariableInterpreter]") { REQUIRE(EncodedVariableInterpreter::encode_and_search_dictionary(var_strs[3], var_dict_reader, false, search_logtype, sub_query)); search_logtype += " and a str with numbers "; REQUIRE(EncodedVariableInterpreter::encode_and_search_dictionary(var_strs[4], var_dict_reader, false, search_logtype, sub_query)); + search_logtype += " and an escape "; + search_logtype += enum_to_underlying_type(ir::VariablePlaceholder::Escape); + search_logtype += enum_to_underlying_type(ir::VariablePlaceholder::Escape); + search_logtype += " and an int placeholder "; + search_logtype += enum_to_underlying_type(ir::VariablePlaceholder::Escape); + search_logtype += enum_to_underlying_type(ir::VariablePlaceholder::Integer); + search_logtype += " and a float placeholder "; + search_logtype += enum_to_underlying_type(ir::VariablePlaceholder::Escape); + search_logtype += enum_to_underlying_type(ir::VariablePlaceholder::Float); + search_logtype += " and a dictionary placeholder "; + search_logtype += enum_to_underlying_type(ir::VariablePlaceholder::Escape); + search_logtype += enum_to_underlying_type(ir::VariablePlaceholder::Dictionary); auto& vars = sub_query.get_vars(); REQUIRE(vars.size() == encoded_vars.size()); for (size_t i = 0; i < vars.size(); ++i) { @@ -282,7 +299,7 @@ TEST_CASE("EncodedVariableInterpreter", "[EncodedVariableInterpreter]") { } // Test search for unknown variable - REQUIRE(!EncodedVariableInterpreter::encode_and_search_dictionary("abc123", var_dict_reader, false, search_logtype, sub_query)); + REQUIRE(false == EncodedVariableInterpreter::encode_and_search_dictionary("abc123", var_dict_reader, false, search_logtype, sub_query)); REQUIRE(logtype_dict_entry.get_value() == search_logtype); diff --git a/components/core/tests/test-encoding_methods.cpp b/components/core/tests/test-encoding_methods.cpp index 20999f37b..1b83bdea1 100644 --- a/components/core/tests/test-encoding_methods.cpp +++ b/components/core/tests/test-encoding_methods.cpp @@ -401,11 +401,11 @@ TEMPLATE_TEST_CASE("Encoding messages", "[ffi][encode-message]", eight_byte_enco // Test encoding a message with a variable placeholder after the variables message = " test var123 "; message += enum_to_underlying_type(VariablePlaceholder::Integer); - REQUIRE(false == encode_message(message, logtype, encoded_vars, dictionary_var_bounds)); + REQUIRE(encode_message(message, logtype, encoded_vars, dictionary_var_bounds)); // Test encoding a message with a variable placeholder before a variable message += " var234"; - REQUIRE(false == encode_message(message, logtype, encoded_vars, dictionary_var_bounds)); + REQUIRE(encode_message(message, logtype, encoded_vars, dictionary_var_bounds)); } TEMPLATE_TEST_CASE("wildcard_query_matches_any_encoded_var", diff --git a/components/core/tests/test-ir_encoding_methods.cpp b/components/core/tests/test-ir_encoding_methods.cpp index 53dc345c4..8467eaeb7 100644 --- a/components/core/tests/test-ir_encoding_methods.cpp +++ b/components/core/tests/test-ir_encoding_methods.cpp @@ -464,7 +464,8 @@ TEST_CASE("message_decode_error", "[ffi][decode_next_message]") { // Test if a trailing escape triggers a decoder error auto ir_with_extra_escape{ir_buf}; - ir_with_extra_escape.at(logtype_end_pos - 1) = ir::cVariablePlaceholderEscapeCharacter; + ir_with_extra_escape.at(logtype_end_pos - 1) + = enum_to_underlying_type(VariablePlaceholder::Escape); BufferReader ir_with_extra_escape_buffer{ size_checked_pointer_cast(ir_with_extra_escape.data()), ir_with_extra_escape.size() diff --git a/components/core/tests/test-query_methods.cpp b/components/core/tests/test-query_methods.cpp index ea172ef1c..e43db31d8 100644 --- a/components/core/tests/test-query_methods.cpp +++ b/components/core/tests/test-query_methods.cpp @@ -54,11 +54,72 @@ struct ExpectedSubquery { vector query_var_types; }; +namespace { +/** + * Generates subqueries from a given wildcard query and validates that they + * match the expected subqueries. + * @tparam encoded_var_t + * @param wildcard_query + * @param logtype_query_to_expected_subquery A map from expected logtype queries + * to expected subqueries. + */ +template +void test_generating_subqueries( + std::string const& wildcard_query, + std::unordered_map const& logtype_query_to_expected_subquery +) { + vector> subqueries; + generate_subqueries(wildcard_query, subqueries); + REQUIRE(subqueries.size() == logtype_query_to_expected_subquery.size()); + auto const map_end = logtype_query_to_expected_subquery.cend(); + for (auto const& subquery : subqueries) { + auto const& logtype_query = subquery.get_logtype_query(); + auto const& query_vars = subquery.get_query_vars(); + + auto idx = logtype_query_to_expected_subquery.find(logtype_query); + REQUIRE(map_end != idx); + auto const& expected_subquery = idx->second; + REQUIRE(subquery.logtype_query_contains_wildcards() + == expected_subquery.logtype_query_contains_wildcards); + auto const& expected_var_types = expected_subquery.query_var_types; + REQUIRE(expected_var_types.size() == query_vars.size()); + for (size_t i = 0; i < expected_var_types.size(); ++i) { + auto const& expected_var_type = expected_var_types[i]; + auto const& query_var = query_vars[i]; + + if (expected_var_type.is_exact) { + REQUIRE(std::holds_alternative>(query_var)); + auto const& exact_var = std::get>(query_var); + REQUIRE(expected_var_type.interpretation == exact_var.get_placeholder()); + } else { + REQUIRE(std::holds_alternative>(query_var)); + auto const& wildcard_var = std::get>(query_var); + switch (expected_var_type.interpretation) { + case VariablePlaceholder::Integer: + REQUIRE(TokenType::IntegerVariable + == wildcard_var.get_current_interpretation()); + break; + case VariablePlaceholder::Float: + REQUIRE(TokenType::FloatVariable + == wildcard_var.get_current_interpretation()); + break; + case VariablePlaceholder::Dictionary: + REQUIRE(TokenType::DictionaryVariable + == wildcard_var.get_current_interpretation()); + break; + default: + REQUIRE(false); + } + } + } + } +} +} // namespace + TEMPLATE_TEST_CASE("ffi::search::query_methods", "[ffi][search][query_methods]", eight_byte_encoded_variable_t, four_byte_encoded_variable_t) { using TestTypeExactVariableToken = ExactVariableToken; - using TestTypeWildcardVariableToken = WildcardToken; string wildcard_query; vector> subqueries; @@ -95,6 +156,14 @@ TEMPLATE_TEST_CASE("ffi::search::query_methods", "[ffi][search][query_methods]", message += " and a weird double " + var_strs[var_ix++]; message += " and a string with numbers " + var_strs[var_ix++]; message += " and another string with numbers " + var_strs[var_ix++]; + message += " and an escape "; + message += enum_to_underlying_type(ir::VariablePlaceholder::Escape); + message += " and an int placeholder "; + message += enum_to_underlying_type(ir::VariablePlaceholder::Integer); + message += " and a float placeholder "; + message += enum_to_underlying_type(ir::VariablePlaceholder::Float); + message += " and a dictionary placeholder "; + message += enum_to_underlying_type(ir::VariablePlaceholder::Dictionary); REQUIRE(ffi::encode_message(message, logtype, encoded_vars, dictionary_var_bounds)); wildcard_query = message; @@ -726,50 +795,86 @@ TEMPLATE_TEST_CASE("ffi::search::query_methods", "[ffi][search][query_methods]", expected_subquery); expected_subquery.clear(); - wildcard_query = "*abc*123?456?"; - generate_subqueries(wildcard_query, subqueries); - REQUIRE(subqueries.size() == logtype_query_to_expected_subquery.size()); - const auto map_end = logtype_query_to_expected_subquery.cend(); - for (const auto& subquery : subqueries) { - const auto& logtype_query = subquery.get_logtype_query(); - const auto& query_vars = subquery.get_query_vars(); - - auto idx = logtype_query_to_expected_subquery.find(logtype_query); - REQUIRE(map_end != idx); - const auto& expected_subquery = idx->second; - REQUIRE(subquery.logtype_query_contains_wildcards() - == expected_subquery.logtype_query_contains_wildcards); - const auto& expected_var_types = expected_subquery.query_var_types; - REQUIRE(expected_var_types.size() == query_vars.size()); - for (size_t i = 0; i < expected_var_types.size(); ++i) { - const auto& expected_var_type = expected_var_types[i]; - const auto& query_var = query_vars[i]; - - if (expected_var_type.is_exact) { - REQUIRE(std::holds_alternative(query_var)); - const auto& exact_var = std::get(query_var); - REQUIRE(expected_var_type.interpretation == exact_var.get_placeholder()); - } else { - REQUIRE(std::holds_alternative(query_var)); - const auto& wildcard_var = std::get(query_var); - switch (expected_var_type.interpretation) { - case VariablePlaceholder::Integer: - REQUIRE(TokenType::IntegerVariable - == wildcard_var.get_current_interpretation()); - break; - case VariablePlaceholder::Float: - REQUIRE(TokenType::FloatVariable - == wildcard_var.get_current_interpretation()); - break; - case VariablePlaceholder::Dictionary: - REQUIRE(TokenType::DictionaryVariable - == wildcard_var.get_current_interpretation()); - break; - default: - REQUIRE(false); - } - } - } - } + test_generating_subqueries("*abc*123?456?", logtype_query_to_expected_subquery); + } + + // In the following wildcard query, `^Q` represents a char with the value of + // VariablePlaceholder::Integer and `^R` represents a char with the value of + // VariablePlaceholder::Dictionary. + SECTION("*escape ^Q placeholders ^R in \\? \\* subqueries*") { + std::string const prefix{"*escape"}; + + std::string const inner_static_text{ + std::string(" ") + enum_to_underlying_type(VariablePlaceholder::Integer) + + " placeholders " + enum_to_underlying_type(VariablePlaceholder::Dictionary) + + " in \\? \\* "}; + std::string const escaped_inner_static_text{ + std::string(" ") + enum_to_underlying_type(VariablePlaceholder::Escape) + + enum_to_underlying_type(VariablePlaceholder::Escape) + + enum_to_underlying_type(VariablePlaceholder::Integer) + " placeholders " + + enum_to_underlying_type(VariablePlaceholder::Escape) + + enum_to_underlying_type(VariablePlaceholder::Escape) + + enum_to_underlying_type(VariablePlaceholder::Dictionary) + " in \\? \\* "}; + + std::string const postfix{"subqueries*"}; + + std::unordered_map logtype_query_to_expected_subquery; + ExpectedSubquery expected_subquery; + + // In the comments below, \d denotes VariablePlaceholder::Dictionary + // Expected log type: "*escape \\^Q placeholders \\^R in \\? \\* subqueries*" + expected_subquery.logtype_query = prefix; + expected_subquery.logtype_query += escaped_inner_static_text; + expected_subquery.logtype_query += postfix; + expected_subquery.logtype_query_contains_wildcards = true; + logtype_query_to_expected_subquery.emplace( + expected_subquery.logtype_query, + expected_subquery + ); + expected_subquery.clear(); + + // Expected log type: "*\d \\^Q placeholders \\^R in \\? \\* subqueries*" + expected_subquery.logtype_query = "*"; + expected_subquery.logtype_query += enum_to_underlying_type(VariablePlaceholder::Dictionary); + expected_subquery.logtype_query += escaped_inner_static_text; + expected_subquery.logtype_query += postfix; + expected_subquery.query_var_types.emplace_back(false, VariablePlaceholder::Dictionary); + expected_subquery.logtype_query_contains_wildcards = true; + logtype_query_to_expected_subquery.emplace( + expected_subquery.logtype_query, + expected_subquery + ); + expected_subquery.clear(); + + // Expected log type: "*escape \\^Q placeholders \\^R in \\? \\* \d*" + expected_subquery.logtype_query = prefix; + expected_subquery.logtype_query += escaped_inner_static_text; + expected_subquery.logtype_query += enum_to_underlying_type(VariablePlaceholder::Dictionary); + expected_subquery.logtype_query += "*"; + expected_subquery.query_var_types.emplace_back(false, VariablePlaceholder::Dictionary); + expected_subquery.logtype_query_contains_wildcards = true; + logtype_query_to_expected_subquery.emplace( + expected_subquery.logtype_query, + expected_subquery + ); + expected_subquery.clear(); + + // Expected log type: "*\d \\^Q placeholders \\^R in \\? \\* \d*" + expected_subquery.logtype_query = "*"; + expected_subquery.logtype_query += enum_to_underlying_type(VariablePlaceholder::Dictionary); + expected_subquery.logtype_query += escaped_inner_static_text; + expected_subquery.logtype_query += enum_to_underlying_type(VariablePlaceholder::Dictionary); + expected_subquery.logtype_query += "*"; + expected_subquery.query_var_types.emplace_back(false, VariablePlaceholder::Dictionary); + expected_subquery.query_var_types.emplace_back(false, VariablePlaceholder::Dictionary); + expected_subquery.logtype_query_contains_wildcards = true; + logtype_query_to_expected_subquery.emplace( + expected_subquery.logtype_query, + expected_subquery + ); + expected_subquery.clear(); + + wildcard_query = prefix + inner_static_text + postfix; + test_generating_subqueries(wildcard_query, logtype_query_to_expected_subquery); } }