From b37bcd5a13dd31e76c3f6db822250396a8c949e2 Mon Sep 17 00:00:00 2001 From: Lin Zhihao <59785146+LinZhihao-723@users.noreply.github.com> Date: Tue, 30 Jul 2024 18:39:13 -0400 Subject: [PATCH] core-clp: Add `EncodedTextAst` class to represent parsed and encoded unstructured text strings. (#495) --- components/core/CMakeLists.txt | 1 + .../src/clp/EncodedVariableInterpreter.cpp | 9 +-- components/core/src/clp/clg/CMakeLists.txt | 1 + components/core/src/clp/clo/CMakeLists.txt | 1 + components/core/src/clp/clp/CMakeLists.txt | 1 + components/core/src/clp/ir/EncodedTextAst.hpp | 61 +++++++++++++++++++ components/core/src/clp/ir/LogEvent.hpp | 25 +++----- .../core/src/clp/ir/LogEventDeserializer.cpp | 7 ++- .../core/tests/test-ir_encoding_methods.cpp | 2 +- 9 files changed, 84 insertions(+), 24 deletions(-) create mode 100644 components/core/src/clp/ir/EncodedTextAst.hpp diff --git a/components/core/CMakeLists.txt b/components/core/CMakeLists.txt index 19c9151bf..32dfa4d99 100644 --- a/components/core/CMakeLists.txt +++ b/components/core/CMakeLists.txt @@ -347,6 +347,7 @@ set(SOURCE_FILES_unitTest src/clp/Grep.cpp src/clp/Grep.hpp src/clp/ir/constants.hpp + src/clp/ir/EncodedTextAst.hpp src/clp/ir/LogEvent.hpp src/clp/ir/LogEventDeserializer.cpp src/clp/ir/LogEventDeserializer.hpp diff --git a/components/core/src/clp/EncodedVariableInterpreter.cpp b/components/core/src/clp/EncodedVariableInterpreter.cpp index ad7116bfe..8170f2ddc 100644 --- a/components/core/src/clp/EncodedVariableInterpreter.cpp +++ b/components/core/src/clp/EncodedVariableInterpreter.cpp @@ -234,7 +234,8 @@ void EncodedVariableInterpreter::encode_and_add_to_dictionary( size_t& raw_num_bytes ) { logtype_dict_entry.clear(); - logtype_dict_entry.reserve_constant_length(log_event.get_logtype().length()); + auto const& log_message = log_event.get_message(); + logtype_dict_entry.reserve_constant_length(log_message.get_logtype().length()); raw_num_bytes = 0; @@ -284,9 +285,9 @@ void EncodedVariableInterpreter::encode_and_add_to_dictionary( }; ffi::ir_stream::generic_decode_message( - log_event.get_logtype(), - log_event.get_encoded_vars(), - log_event.get_dict_vars(), + log_message.get_logtype(), + log_message.get_encoded_vars(), + log_message.get_dict_vars(), constant_handler, encoded_int_handler, encoded_float_handler, diff --git a/components/core/src/clp/clg/CMakeLists.txt b/components/core/src/clp/clg/CMakeLists.txt index bed6c11fc..29b805e87 100644 --- a/components/core/src/clp/clg/CMakeLists.txt +++ b/components/core/src/clp/clg/CMakeLists.txt @@ -33,6 +33,7 @@ set( ../GlobalSQLiteMetadataDB.hpp ../Grep.cpp ../Grep.hpp + ../ir/EncodedTextAst.hpp ../ir/LogEvent.hpp ../ir/parsing.cpp ../ir/parsing.hpp diff --git a/components/core/src/clp/clo/CMakeLists.txt b/components/core/src/clp/clo/CMakeLists.txt index 1eea2b5bb..d8c3fcac1 100644 --- a/components/core/src/clp/clo/CMakeLists.txt +++ b/components/core/src/clp/clo/CMakeLists.txt @@ -34,6 +34,7 @@ set( ../FileWriter.hpp ../Grep.cpp ../Grep.hpp + ../ir/EncodedTextAst.hpp ../ir/LogEvent.hpp ../ir/LogEventSerializer.cpp ../ir/LogEventSerializer.hpp diff --git a/components/core/src/clp/clp/CMakeLists.txt b/components/core/src/clp/clp/CMakeLists.txt index 0f18777d9..ada6680bd 100644 --- a/components/core/src/clp/clp/CMakeLists.txt +++ b/components/core/src/clp/clp/CMakeLists.txt @@ -41,6 +41,7 @@ set( ../GlobalSQLiteMetadataDB.cpp ../GlobalSQLiteMetadataDB.hpp ../ir/constants.hpp + ../ir/EncodedTextAst.hpp ../ir/LogEvent.hpp ../ir/LogEventDeserializer.cpp ../ir/LogEventDeserializer.hpp diff --git a/components/core/src/clp/ir/EncodedTextAst.hpp b/components/core/src/clp/ir/EncodedTextAst.hpp new file mode 100644 index 000000000..2bbf30b4d --- /dev/null +++ b/components/core/src/clp/ir/EncodedTextAst.hpp @@ -0,0 +1,61 @@ +#ifndef CLP_IR_ENCODEDTEXTAST_HPP +#define CLP_IR_ENCODEDTEXTAST_HPP + +#include +#include +#include + +#include "types.hpp" + +namespace clp::ir { +/** + * A parsed and encoded unstructured text string. + * @tparam encoded_variable_t The type of encoded variables in the string. + */ +template +class EncodedTextAst { +public: + // Constructor + explicit EncodedTextAst( + std::string logtype, + std::vector dict_vars, + std::vector encoded_vars + ) + : m_logtype{std::move(logtype)}, + m_dict_vars{std::move(dict_vars)}, + m_encoded_vars{std::move(encoded_vars)} {} + + // Disable copy constructor and assignment operator + EncodedTextAst(EncodedTextAst const&) = delete; + auto operator=(EncodedTextAst const&) -> EncodedTextAst& = delete; + + // Default move constructor and assignment operator + EncodedTextAst(EncodedTextAst&&) = default; + auto operator=(EncodedTextAst&&) -> EncodedTextAst& = default; + + // Destructor + ~EncodedTextAst() = default; + + // Methods + [[nodiscard]] auto get_logtype() const -> std::string const& { return m_logtype; } + + [[nodiscard]] auto get_dict_vars() const -> std::vector const& { + return m_dict_vars; + } + + [[nodiscard]] auto get_encoded_vars() const -> std::vector const& { + return m_encoded_vars; + } + +private: + // Variables + std::string m_logtype; + std::vector m_dict_vars; + std::vector m_encoded_vars; +}; + +using EightByteEncodedTextAst = EncodedTextAst; +using FourByteEncodedTextAst = EncodedTextAst; +} // namespace clp::ir + +#endif // CLP_IR_ENCODEDTEXTAST_HPP diff --git a/components/core/src/clp/ir/LogEvent.hpp b/components/core/src/clp/ir/LogEvent.hpp index d32aabb41..4a3ef7567 100644 --- a/components/core/src/clp/ir/LogEvent.hpp +++ b/components/core/src/clp/ir/LogEvent.hpp @@ -2,9 +2,10 @@ #define CLP_IR_LOGEVENT_HPP #include +#include #include -#include "../Defs.h" +#include "EncodedTextAst.hpp" #include "time_types.hpp" #include "types.hpp" @@ -20,38 +21,26 @@ class LogEvent { LogEvent( epoch_time_ms_t timestamp, UtcOffset utc_offset, - std::string logtype, - std::vector dict_vars, - std::vector encoded_vars + EncodedTextAst message ) : m_timestamp{timestamp}, m_utc_offset{utc_offset}, - m_logtype{std::move(logtype)}, - m_dict_vars{std::move(dict_vars)}, - m_encoded_vars{std::move(encoded_vars)} {} + m_message{std::move(message)} {} // Methods [[nodiscard]] auto get_timestamp() const -> epoch_time_ms_t { return m_timestamp; } [[nodiscard]] auto get_utc_offset() const -> UtcOffset { return m_utc_offset; } - [[nodiscard]] auto get_logtype() const -> std::string const& { return m_logtype; } - - [[nodiscard]] auto get_dict_vars() const -> std::vector const& { - return m_dict_vars; - } - - [[nodiscard]] auto get_encoded_vars() const -> std::vector const& { - return m_encoded_vars; + [[nodiscard]] auto get_message() const -> EncodedTextAst const& { + return m_message; } private: // Variables epoch_time_ms_t m_timestamp{0}; UtcOffset m_utc_offset{0}; - std::string m_logtype; - std::vector m_dict_vars; - std::vector m_encoded_vars; + EncodedTextAst m_message; }; } // namespace clp::ir diff --git a/components/core/src/clp/ir/LogEventDeserializer.cpp b/components/core/src/clp/ir/LogEventDeserializer.cpp index 9e0bf1723..6106568dd 100644 --- a/components/core/src/clp/ir/LogEventDeserializer.cpp +++ b/components/core/src/clp/ir/LogEventDeserializer.cpp @@ -8,6 +8,7 @@ #include "../ffi/ir_stream/decoding_methods.hpp" #include "../ffi/ir_stream/protocol_constants.hpp" +#include "EncodedTextAst.hpp" #include "types.hpp" namespace clp::ir { @@ -124,7 +125,11 @@ auto LogEventDeserializer::deserialize_log_event( timestamp = m_prev_msg_timestamp; } - return LogEvent{timestamp, m_utc_offset, logtype, dict_vars, encoded_vars}; + return LogEvent{ + timestamp, + m_utc_offset, + EncodedTextAst{logtype, dict_vars, encoded_vars} + }; } // Explicitly declare template specializations so that we can define the template methods in this diff --git a/components/core/tests/test-ir_encoding_methods.cpp b/components/core/tests/test-ir_encoding_methods.cpp index 4199428ff..e9b161b8a 100644 --- a/components/core/tests/test-ir_encoding_methods.cpp +++ b/components/core/tests/test-ir_encoding_methods.cpp @@ -912,7 +912,7 @@ TEMPLATE_TEST_CASE( REQUIRE(log_event.get_utc_offset() == ref_log_event.get_utc_offset()); // We only compare the logtype since decoding messages from logtype + variables is not yet // supported by our public interfaces - REQUIRE(log_event.get_logtype() == encoded_logtypes.at(log_event_idx)); + REQUIRE(log_event.get_message().get_logtype() == encoded_logtypes.at(log_event_idx)); ++log_event_idx; } auto result = log_event_deserializer.deserialize_log_event();