Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix incorrect logtype encoding by properly escaping variable placeholder characters (fixes #163). #162

Merged
merged 32 commits into from
Dec 5, 2023
Merged
Show file tree
Hide file tree
Changes from 18 commits
Commits
Show all changes
32 commits
Select commit Hold shift + click to select a range
c91f625
Move parsing methods from ffi to ir namespace.
kirkrodrigues Sep 16, 2023
d0779ba
Replace existing parsing methods with those from ir namespace.
kirkrodrigues Sep 17, 2023
e2474fa
Improve performance of parsing methods from ffi to match those in CLP.
kirkrodrigues Sep 17, 2023
1c920b4
Fix build error
kirkrodrigues Sep 17, 2023
aa1d327
C++ 101: Inline methods should be defined in the header.
kirkrodrigues Sep 17, 2023
7fd4656
Replace LogTypeDictionaryEntry::VarDelim with ir::VariablePlaceholder…
kirkrodrigues Sep 17, 2023
22dc820
Apply suggestions from code review
kirkrodrigues Sep 17, 2023
10e339a
Add suggestion from code review with formatting.
kirkrodrigues Sep 17, 2023
22bfa84
Fix dictionary collision for compression/decompression
LinZhihao-723 Sep 18, 2023
e73d284
Merge
LinZhihao-723 Sep 18, 2023
7d6d749
Fix the generated log type
LinZhihao-723 Sep 18, 2023
8ebf5de
Fix var tracking
LinZhihao-723 Sep 18, 2023
5963bbd
Add missing const
LinZhihao-723 Sep 18, 2023
6199eec
Fix search
LinZhihao-723 Sep 18, 2023
40f02b0
Track the number of escape placeholders when parsing the logtype in t…
LinZhihao-723 Sep 18, 2023
a72d52f
Don't unescape logtypes when ingesting IR log events.
kirkrodrigues Sep 19, 2023
251b8a7
Double escape when building the search logtype
LinZhihao-723 Sep 23, 2023
b61e324
Refactor the code to reduce the inner branching
LinZhihao-723 Sep 24, 2023
dde3dfb
Apply suggestions from code review
LinZhihao-723 Nov 29, 2023
0fe414d
Undo parsing.hpp
LinZhihao-723 Nov 29, 2023
37eba2c
Rename escape_var
LinZhihao-723 Nov 29, 2023
eb8e9cb
Refactor member variable init
LinZhihao-723 Nov 29, 2023
d44862d
Refactoring the place to do the variabel initializations
LinZhihao-723 Nov 29, 2023
0ab0024
Fix the variable placeholder
LinZhihao-723 Nov 29, 2023
8c77fbb
Removing cVariablePlaceholder
LinZhihao-723 Nov 29, 2023
bf95c9a
Adding a case to the unit test
LinZhihao-723 Nov 29, 2023
469c2b3
Refactor the escape function
LinZhihao-723 Nov 30, 2023
62363ec
Fix subquery
LinZhihao-723 Nov 30, 2023
7c3189f
Fix ffi and search
LinZhihao-723 Nov 30, 2023
ad48240
Delete components/core/src/ffi/ir_stream/Attribute.hpp
LinZhihao-723 Dec 1, 2023
a37f90c
Bug-fix: Apply correct escaping for wildcard queries; Some refactoring.
kirkrodrigues Dec 5, 2023
07d6d33
Merge branch 'main' into dict_collision
LinZhihao-723 Dec 5, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions components/core/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@ if (NOT CMAKE_BUILD_TYPE AND NOT CMAKE_CONFIGURATION_TYPES)
set(CMAKE_BUILD_TYPE "${default_build_type}" CACHE STRING "Choose the type of build." FORCE)
endif()

set(CMAKE_EXPORT_COMPILE_COMMANDS ON)

# Set general compressor
set(GENERAL_COMPRESSOR "zstd" CACHE STRING "The general-purpose compressor used as the 2nd-stage compressor")
set_property(CACHE GENERAL_COMPRESSOR PROPERTY STRINGS passthrough zstd)
Expand Down Expand Up @@ -253,6 +255,7 @@ set(SOURCE_FILES_clp
src/ir/LogEventDeserializer.hpp
src/ir/parsing.cpp
src/ir/parsing.hpp
src/ir/parsing.inc
src/ir/utils.cpp
src/ir/utils.hpp
src/LibarchiveFileReader.cpp
Expand Down Expand Up @@ -436,6 +439,7 @@ set(SOURCE_FILES_clg
src/ir/LogEvent.hpp
src/ir/parsing.cpp
src/ir/parsing.hpp
src/ir/parsing.inc
src/LogTypeDictionaryEntry.cpp
src/LogTypeDictionaryEntry.hpp
src/LogTypeDictionaryReader.cpp
Expand Down Expand Up @@ -594,6 +598,7 @@ set(SOURCE_FILES_clo
src/ir/LogEvent.hpp
src/ir/parsing.cpp
src/ir/parsing.hpp
src/ir/parsing.inc
src/LogTypeDictionaryEntry.cpp
src/LogTypeDictionaryEntry.hpp
src/LogTypeDictionaryReader.cpp
Expand Down Expand Up @@ -797,6 +802,7 @@ set(SOURCE_FILES_unitTest
src/ir/LogEventDeserializer.hpp
src/ir/parsing.cpp
src/ir/parsing.hpp
src/ir/parsing.inc
src/ir/utils.cpp
src/ir/utils.hpp
src/LibarchiveFileReader.cpp
Expand Down
25 changes: 14 additions & 11 deletions components/core/src/EncodedVariableInterpreter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -273,7 +273,7 @@ void EncodedVariableInterpreter::encode_and_add_to_dictionary(
encoded_vars.push_back(encoded_var);
};

ffi::ir_stream::generic_decode_message(
ffi::ir_stream::generic_decode_message<false>(
log_event.get_logtype(),
log_event.get_encoded_vars(),
log_event.get_dict_vars(),
Expand All @@ -287,38 +287,41 @@ void EncodedVariableInterpreter::encode_and_add_to_dictionary(
bool EncodedVariableInterpreter::decode_variables_into_message (const LogTypeDictionaryEntry& logtype_dict_entry, const VariableDictionaryReader& var_dict,
const vector<encoded_variable_t>& encoded_vars, string& decompressed_msg)
{
size_t num_vars_in_logtype = logtype_dict_entry.get_num_vars();
size_t const num_placeholders_in_logtype = logtype_dict_entry.get_num_placeholders();
LinZhihao-723 marked this conversation as resolved.
Show resolved Hide resolved
size_t const num_vars = logtype_dict_entry.get_num_variables();
LinZhihao-723 marked this conversation as resolved.
Show resolved Hide resolved

// Ensure the number of variables in the logtype matches the number of encoded variables given
const auto& logtype_value = logtype_dict_entry.get_value();
if (num_vars_in_logtype != encoded_vars.size()) {
if (num_vars != encoded_vars.size()) {
SPDLOG_ERROR("EncodedVariableInterpreter: Logtype '{}' contains {} variables, but {} were given for decoding.", logtype_value.c_str(),
num_vars_in_logtype, encoded_vars.size());
num_vars, encoded_vars.size());
return false;
}

ir::VariablePlaceholder var_placeholder;
size_t constant_begin_pos = 0;
string float_str;
variable_dictionary_id_t var_dict_id;
for (size_t i = 0; i < num_vars_in_logtype; ++i) {
size_t var_position = logtype_dict_entry.get_var_info(i, var_placeholder);
for (size_t placeholder_ix = 0, var_ix = 0; placeholder_ix < num_placeholders_in_logtype; ++placeholder_ix) {
size_t placeholder_position = logtype_dict_entry.get_placeholder_info(placeholder_ix, var_placeholder);

// Add the constant that's between the last variable and this one
LinZhihao-723 marked this conversation as resolved.
Show resolved Hide resolved
decompressed_msg.append(logtype_value, constant_begin_pos,
var_position - constant_begin_pos);
placeholder_position - constant_begin_pos);
switch (var_placeholder) {
case ir::VariablePlaceholder::Integer:
decompressed_msg += std::to_string(encoded_vars[i]);
decompressed_msg += std::to_string(encoded_vars[var_ix++]);
break;
case ir::VariablePlaceholder::Float:
convert_encoded_float_to_string(encoded_vars[i], float_str);
convert_encoded_float_to_string(encoded_vars[var_ix++], float_str);
decompressed_msg += float_str;
break;
case ir::VariablePlaceholder::Dictionary:
var_dict_id = decode_var_dict_id(encoded_vars[i]);
var_dict_id = decode_var_dict_id(encoded_vars[var_ix++]);
decompressed_msg += var_dict.get_value(var_dict_id);
break;
case ir::VariablePlaceholder::Escape:
break;
default:
SPDLOG_ERROR(
"EncodedVariableInterpreter: Logtype '{}' contains "
Expand All @@ -328,7 +331,7 @@ bool EncodedVariableInterpreter::decode_variables_into_message (const LogTypeDic
return false;
}
// Move past the variable placeholder
constant_begin_pos = var_position + 1;
constant_begin_pos = placeholder_position + 1;
}
// Append remainder of logtype, if any
if (constant_begin_pos < logtype_value.length()) {
Expand Down
12 changes: 9 additions & 3 deletions components/core/src/Grep.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -340,7 +340,10 @@ SubQueryMatchabilityResult generate_logtypes_and_vars_for_subquery (const Archiv
string logtype;
for (const auto& query_token : query_tokens) {
// Append from end of last token to beginning of this token, to logtype
logtype.append(processed_search_string, last_token_end_pos, query_token.get_begin_pos() - last_token_end_pos);
ir::escape_and_append_constant_to_logtype<true>(
static_cast<std::string_view>(processed_search_string).substr(last_token_end_pos, query_token.get_begin_pos() - last_token_end_pos),
logtype
);
last_token_end_pos = query_token.get_end_pos();

if (query_token.is_wildcard()) {
Expand All @@ -358,7 +361,7 @@ SubQueryMatchabilityResult generate_logtypes_and_vars_for_subquery (const Archiv
}
} else {
if (!query_token.is_var()) {
logtype += query_token.get_value();
ir::escape_and_append_constant_to_logtype<true>(query_token.get_value(), logtype);
} else if (!process_var_token(query_token, archive, ignore_case, sub_query, logtype)) {
return SubQueryMatchabilityResult::WontMatch;
}
Expand All @@ -367,7 +370,10 @@ SubQueryMatchabilityResult generate_logtypes_and_vars_for_subquery (const Archiv

if (last_token_end_pos < processed_search_string.length()) {
// Append from end of last token to end
logtype.append(processed_search_string, last_token_end_pos, string::npos);
ir::escape_and_append_constant_to_logtype<true>(
static_cast<std::string_view>(processed_search_string).substr(last_token_end_pos, string::npos),
logtype
);
last_token_end_pos = processed_search_string.length();
}

Expand Down
64 changes: 25 additions & 39 deletions components/core/src/LogTypeDictionaryEntry.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,23 +7,23 @@

using std::string;

size_t LogTypeDictionaryEntry::get_var_info(
size_t LogTypeDictionaryEntry::get_placeholder_info(
size_t var_ix,
ir::VariablePlaceholder& var_placeholder
) const {
if (var_ix >= m_var_positions.size()) {
if (var_ix >= m_placeholder_positions.size()) {
return SIZE_MAX;
}

auto var_position = m_var_positions[var_ix];
auto var_position = m_placeholder_positions[var_ix];
var_placeholder = static_cast<ir::VariablePlaceholder>(m_value[var_position]);

return m_var_positions[var_ix];
return m_placeholder_positions[var_ix];
}

size_t LogTypeDictionaryEntry::get_data_size () const {
// NOTE: sizeof(vector[0]) is executed at compile time so there's no risk of an exception at runtime
return sizeof(m_id) + m_value.length() + m_var_positions.size() * sizeof(m_var_positions[0]) +
return sizeof(m_id) + m_value.length() + m_placeholder_positions.size() * sizeof(m_placeholder_positions[0]) +
m_ids_of_segments_containing_entry.size() * sizeof(segment_id_t);
}

Expand All @@ -32,49 +32,56 @@ void LogTypeDictionaryEntry::add_constant (const string& value_containing_consta
}

void LogTypeDictionaryEntry::add_dictionary_var () {
m_var_positions.push_back(m_value.length());
m_placeholder_positions.push_back(m_value.length());
add_dict_var(m_value);
}

void LogTypeDictionaryEntry::add_int_var () {
m_var_positions.push_back(m_value.length());
m_placeholder_positions.push_back(m_value.length());
add_int_var(m_value);
}

void LogTypeDictionaryEntry::add_float_var () {
m_var_positions.push_back(m_value.length());
m_placeholder_positions.push_back(m_value.length());
add_float_var(m_value);
}

void LogTypeDictionaryEntry::add_escape_var() {
m_placeholder_positions.push_back(m_value.length());
add_escape_var(m_value);
++m_num_escape_placeholders;
}

bool LogTypeDictionaryEntry::parse_next_var (const string& msg, size_t& var_begin_pos, size_t& var_end_pos, string& var) {
auto last_var_end_pos = var_end_pos;
if (ir::get_bounds_of_next_var(msg, var_begin_pos, var_end_pos)) {
// Append to log type: from end of last variable to start of current variable
add_constant(msg, last_var_end_pos, var_begin_pos - last_var_end_pos);
auto constant = std::string_view(msg).substr(last_var_end_pos, var_begin_pos - last_var_end_pos);
LinZhihao-723 marked this conversation as resolved.
Show resolved Hide resolved
m_num_escape_placeholders += ir::escape_and_append_constant_to_logtype_with_tracking(constant, m_value, m_placeholder_positions);

var.assign(msg, var_begin_pos, var_end_pos - var_begin_pos);
return true;
}
if (last_var_end_pos < msg.length()) {
// Append to log type: from end of last variable to end
add_constant(msg, last_var_end_pos, msg.length() - last_var_end_pos);
auto constant = std::string_view(msg).substr(last_var_end_pos, msg.length() - last_var_end_pos);
LinZhihao-723 marked this conversation as resolved.
Show resolved Hide resolved
m_num_escape_placeholders += ir::escape_and_append_constant_to_logtype_with_tracking(constant, m_value, m_placeholder_positions);
}

return false;
}

void LogTypeDictionaryEntry::clear () {
m_value.clear();
m_var_positions.clear();
m_placeholder_positions.clear();
m_num_escape_placeholders = 0;
}

void LogTypeDictionaryEntry::write_to_file (streaming_compression::Compressor& compressor) const {
compressor.write_numeric_value(m_id);

string escaped_value;
get_value_with_unfounded_variables_escaped(escaped_value);
compressor.write_numeric_value<uint64_t>(escaped_value.length());
compressor.write_string(escaped_value);
compressor.write_numeric_value<uint64_t>(m_value.length());
compressor.write_string(m_value);
}

ErrorCode LogTypeDictionaryEntry::try_read_from_file (streaming_compression::Decompressor& decompressor) {
Expand Down Expand Up @@ -109,6 +116,9 @@ ErrorCode LogTypeDictionaryEntry::try_read_from_file (streaming_compression::Dec
is_escaped = false;
} else if (ir::cVariablePlaceholderEscapeCharacter == c) {
is_escaped = true;
add_constant(constant, 0, constant.length());
constant.clear();
add_escape_var();
} else {
if (enum_to_underlying_type(ir::VariablePlaceholder::Integer) == c) {
add_constant(constant, 0, constant.length());
Expand Down Expand Up @@ -141,27 +151,3 @@ void LogTypeDictionaryEntry::read_from_file (streaming_compression::Decompressor
throw OperationFailed(error_code, __FILENAME__, __LINE__);
}
}

void LogTypeDictionaryEntry::get_value_with_unfounded_variables_escaped (string& escaped_logtype_value) const {
auto value_view = static_cast<std::string_view>(m_value);
size_t begin_ix = 0;
// Reset escaped value and reserve enough space to at least contain the whole value
escaped_logtype_value.clear();
escaped_logtype_value.reserve(value_view.length());
for (auto var_position : m_var_positions) {
size_t end_ix = var_position;

ir::escape_and_append_constant_to_logtype(
value_view.substr(begin_ix, end_ix - begin_ix),
escaped_logtype_value
);

// Add variable placeholder
escaped_logtype_value += value_view[end_ix];

// Move begin to start of next portion of logtype between variables
begin_ix = end_ix + 1;
}
// Escape any variable placeholders in remainder of value
ir::escape_and_append_constant_to_logtype(value_view.substr(begin_ix), escaped_logtype_value);
}
42 changes: 29 additions & 13 deletions components/core/src/LogTypeDictionaryEntry.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ class LogTypeDictionaryEntry : public DictionaryEntry<logtype_dictionary_id_t> {
};

// Constructors
LogTypeDictionaryEntry () = default;
LogTypeDictionaryEntry () : m_num_escape_placeholders{0} {};
LinZhihao-723 marked this conversation as resolved.
Show resolved Hide resolved
// Use default copy constructor
LogTypeDictionaryEntry (const LogTypeDictionaryEntry&) = default;

Expand Down Expand Up @@ -63,15 +63,34 @@ class LogTypeDictionaryEntry : public DictionaryEntry<logtype_dictionary_id_t> {
static void add_float_var (std::string& logtype) {
logtype += enum_to_underlying_type(ir::VariablePlaceholder::Float);
}
/**
* Adds an escape variable placeholder to the given logtype
LinZhihao-723 marked this conversation as resolved.
Show resolved Hide resolved
* @param logtype
*/
static void add_escape_var (std::string& logtype) {
LinZhihao-723 marked this conversation as resolved.
Show resolved Hide resolved
logtype += enum_to_underlying_type(ir::VariablePlaceholder::Escape);
}

size_t get_num_vars () const { return m_var_positions.size(); }
/**
* Gets all info about a variable in the logtype
* @return The total number of placeholders on record. The placeholders may
* represent an escape characters.
LinZhihao-723 marked this conversation as resolved.
Show resolved Hide resolved
*/
size_t get_num_placeholders () const { return m_placeholder_positions.size(); }

/**
* @return The total number of variables on record.
LinZhihao-723 marked this conversation as resolved.
Show resolved Hide resolved
*/
size_t get_num_variables () const {
return m_placeholder_positions.size() - m_num_escape_placeholders;
}

/**
* Gets all info about a placeholder in the logtype
* @param var_ix The index of the variable to get the info for
* @param var_placeholder
* @return The variable's position in the logtype, or SIZE_MAX if var_ix is out of bounds
*/
size_t get_var_info (size_t var_ix, ir::VariablePlaceholder& var_placeholder) const;
size_t get_placeholder_info (size_t var_ix, ir::VariablePlaceholder& var_placeholder) const;

/**
* Gets the size (in-memory) of the data contained in this entry
Expand All @@ -98,6 +117,10 @@ class LogTypeDictionaryEntry : public DictionaryEntry<logtype_dictionary_id_t> {
* Adds a dictionary variable placeholder
*/
void add_dictionary_var ();
/**
* Adds an escape variable placeholder
LinZhihao-723 marked this conversation as resolved.
Show resolved Hide resolved
*/
void add_escape_var ();
LinZhihao-723 marked this conversation as resolved.
Show resolved Hide resolved

/**
* Parses next variable from a message, constructing the constant part of the message's logtype as well
Expand Down Expand Up @@ -137,16 +160,9 @@ class LogTypeDictionaryEntry : public DictionaryEntry<logtype_dictionary_id_t> {
void read_from_file (streaming_compression::Decompressor& decompressor);

private:
// Methods
/**
* Escapes any variable placeholders that don't correspond to the positions
* of variables in the logtype entry's value
* @param escaped_logtype_value
*/
void get_value_with_unfounded_variables_escaped (std::string& escaped_logtype_value) const;

// Variables
std::vector<size_t> m_var_positions;
std::vector<size_t> m_placeholder_positions;
size_t m_num_escape_placeholders;
LinZhihao-723 marked this conversation as resolved.
Show resolved Hide resolved
};

#endif // LOGTYPEDICTIONARYENTRY_HPP
2 changes: 1 addition & 1 deletion components/core/src/ffi/ir_stream/decoding_methods.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -288,7 +288,7 @@ generic_decode_next_message(ReaderInterface& reader, string& message, epoch_time
auto dict_var_handler = [&](string const& dict_var) { message.append(dict_var); };

try {
generic_decode_message(
generic_decode_message<true>(
logtype,
encoded_vars,
dict_vars,
Expand Down
3 changes: 3 additions & 0 deletions components/core/src/ffi/ir_stream/decoding_methods.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,8 @@ auto deserialize_ir_message(
/**
* Decodes the IR message calls the given methods to handle each component of
* the message
* @tparam unescape_logtype Whether to remove the escape characters from the
* logtype before calling \p ConstantHandler
* @tparam encoded_variable_t Type of the encoded variable
* @tparam ConstantHandler Method to handle constants in the logtype.
* Signature: (const std::string&, size_t, size_t) -> void
Expand All @@ -93,6 +95,7 @@ auto deserialize_ir_message(
* @throw DecodingException if the message can not be decoded properly
*/
template <
bool unescape_logtype,
typename encoded_variable_t,
typename ConstantHandler,
typename EncodedIntHandler,
Expand Down
Loading