diff --git a/README.md b/README.md index 9611382f..a687ec05 100644 --- a/README.md +++ b/README.md @@ -48,12 +48,8 @@ parser.reset_and_set_reader(reader); optional loglevel_id{parser.get_variable_id("loglevel")}; // -// Create a LogEventView (similar to a string_view) -LogEventView event{&parser.get_log_parser()}; while (false == parser.done()) { - // Parse the next event - auto err = parser.get_next_event_view(event); - if (ErrorCode::Success != err) { + if (ErrorCode err{parser.parse_next_event()}; ErrorCode::Success != err) { throw runtime_error("Parsing Failed"); } @@ -73,6 +69,7 @@ while (false == parser.done()) { // Other analysis... // Print the entire event + LogEventView const& event = parser.get_log_parser().get_log_event_view(); cout << event->to_string() << endl; } ``` diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt index 61c8be8b..c1ed0c92 100644 --- a/examples/CMakeLists.txt +++ b/examples/CMakeLists.txt @@ -7,12 +7,7 @@ if (NOT CMAKE_BUILD_TYPE AND NOT CMAKE_CONFIGURATION_TYPES) set(CMAKE_BUILD_TYPE "${default_build_type}" CACHE STRING "Choose the type of build." FORCE) endif() -# If log-surgeon was not installed to the system library path, set -# log_surgeon_DIR to the installed location containing the cmake configuration -# files. -# For example, if log-surgeon was installed to ~/.local: -# set(log_surgeon_DIR "~/.local/lib/cmake/log_surgeon/") -find_package(log_surgeon REQUIRED) +add_subdirectory(.. log-surgeon-build EXCLUDE_FROM_ALL) function(add_to_target target libraries) target_link_libraries(${target} ${libraries}) diff --git a/examples/buffer-parser.cpp b/examples/buffer-parser.cpp index 11b30c83..fc2ce987 100644 --- a/examples/buffer-parser.cpp +++ b/examples/buffer-parser.cpp @@ -27,7 +27,7 @@ auto process_logs(string& schema_path, string const& input_path) -> void { return; } - constexpr ssize_t const cSize{4096L * 8}; // 8 pages + constexpr ssize_t const cSize{4096L * 8}; // 8 pages vector buf(cSize); infs.read(buf.data(), cSize); ssize_t valid_size{infs.gcount()}; @@ -35,16 +35,16 @@ auto process_logs(string& schema_path, string const& input_path) -> void { if (infs.eof()) { input_done = true; } + parser.reset(); cout << "# Parsing timestamp and loglevel for each log event in " << input_path << ":" << endl; vector multiline_logs; size_t offset{0}; - LogEventView event{&parser.get_log_parser()}; while (false == parser.done()) { - if (ErrorCode err{ - parser.get_next_event_view(buf.data(), valid_size, offset, event, input_done)}; - ErrorCode::Success != err) { + if (ErrorCode err{parser.parse_next_event(buf.data(), valid_size, offset, input_done)}; + ErrorCode::Success != err) + { // The only expected error is the parser has read to the bound // of the buffer. if (ErrorCode::BufferOutOfBounds != err) { @@ -75,6 +75,7 @@ auto process_logs(string& schema_path, string const& input_path) -> void { continue; } + LogEventView const& event = parser.get_log_parser().get_log_event_view(); cout << "log: " << event.to_string() << endl; print_timestamp_loglevel(event, *loglevel_id); cout << "logtype: " << event.get_logtype() << endl; @@ -82,7 +83,6 @@ auto process_logs(string& schema_path, string const& input_path) -> void { multiline_logs.emplace_back(event); } } - parser.reset(); cout << endl << "# Printing multiline logs:" << endl; for (auto const& log : multiline_logs) { diff --git a/examples/reader-parser.cpp b/examples/reader-parser.cpp index ba83a325..71a5a3da 100644 --- a/examples/reader-parser.cpp +++ b/examples/reader-parser.cpp @@ -39,12 +39,12 @@ auto process_logs(string& schema_path, string const& input_path) -> void { cout << "# Parsing timestamp and loglevel for each log event in " << input_path << ":" << endl; vector multiline_logs; - LogEventView event{&parser.get_log_parser()}; while (false == parser.done()) { - if (ErrorCode err{parser.get_next_event_view(event)}; ErrorCode::Success != err) { + if (ErrorCode err{parser.parse_next_event()}; ErrorCode::Success != err) { throw runtime_error("Parsing Failed"); } + LogEventView const& event = parser.get_log_parser().get_log_event_view(); cout << "log: " << event.to_string() << endl; print_timestamp_loglevel(event, *loglevel_id); cout << "logtype: " << event.get_logtype() << endl; diff --git a/src/log_surgeon/BufferParser.cpp b/src/log_surgeon/BufferParser.cpp index 29a06233..e792f9a1 100644 --- a/src/log_surgeon/BufferParser.cpp +++ b/src/log_surgeon/BufferParser.cpp @@ -17,23 +17,24 @@ auto BufferParser::reset() -> void { m_done = false; } -auto BufferParser::get_next_event_view( +auto BufferParser::parse_next_event( char* buf, size_t size, size_t& offset, - LogEventView& event_view, bool finished_reading_input ) -> ErrorCode { - event_view.reset(); + m_log_parser.reset_log_event_view(); // TODO in order to allow logs/tokens to wrap user buffers this function // will need more parameters or the input buffer may need to be exposed to // the user m_log_parser.set_input_buffer(buf, size, offset, finished_reading_input); LogParser::ParsingAction parsing_action{LogParser::ParsingAction::None}; - ErrorCode error_code = m_log_parser.parse(event_view.m_log_output_buffer, parsing_action); + ErrorCode error_code = m_log_parser.parse_and_generate_metadata(parsing_action); if (ErrorCode::Success != error_code) { - if (0 != event_view.m_log_output_buffer->pos()) { - offset = event_view.m_log_output_buffer->get_token(0).m_start_pos; + if (0 != m_log_parser.get_log_event_view().m_log_output_buffer->pos()) { + offset = m_log_parser.get_log_event_view() + .m_log_output_buffer->get_token(0) + .m_start_pos; } reset(); return error_code; @@ -42,27 +43,6 @@ auto BufferParser::get_next_event_view( m_done = true; } offset = m_log_parser.get_input_pos(); - - uint32_t start = 0; - if (false == event_view.m_log_output_buffer->has_timestamp()) { - start = 1; - } - uint32_t first_newline_pos{0}; - for (uint32_t i = start; i < event_view.m_log_output_buffer->pos(); i++) { - Token* token = &event_view.m_log_output_buffer->get_mutable_token(i); - event_view.add_token(token->m_type_ids_ptr->at(0), token); - if (token->m_type_ids_ptr->at(0) == (int)SymbolID::TokenNewlineId && first_newline_pos == 0) - { - first_newline_pos = i; - } - } - // To be a multiline log there must be at least one token between the - // newline token and the last token in the output buffer. - if (event_view.m_log_output_buffer->has_timestamp() && 0 < first_newline_pos - && first_newline_pos + 1 < event_view.m_log_output_buffer->pos()) - { - event_view.set_multiline(true); - } return ErrorCode::Success; } } // namespace log_surgeon diff --git a/src/log_surgeon/BufferParser.hpp b/src/log_surgeon/BufferParser.hpp index 8b4a9eb9..e6ca2b88 100644 --- a/src/log_surgeon/BufferParser.hpp +++ b/src/log_surgeon/BufferParser.hpp @@ -36,7 +36,7 @@ class BufferParser { /** * Clears the internal state of the log parser (lexer and input buffer) so - * that the next call to get_next_event_view will begin parsing from + * that the next call to parse_next_event will begin parsing from * scratch. This is an alternative to constructing a new Parser that would * require rebuilding the LogParser (generating a new lexer and input * buffer). This should be called whenever you mutate the input buffer, but @@ -50,14 +50,13 @@ class BufferParser { * bytes between offset and size may contain a partial log event. It is the * user's responsibility to preserve these bytes when mutating the buffer * to contain more of the log event before the next call of - * get_next_log_view. + * get_next_log_view. The result is stored internally and is only valid if + * ErrorCode::Success is returned. * @param buf The byte buffer containing raw log events to be parsed. * @param size The size of the buffer. * @param offset The starting position in the buffer of the current log * event to be parsed. Updated to be the starting position of the next * unparsed log event. If no log event is parsed it remains unchanged. - * @param event_view Populated with the log event view parsed from the - * buffer. Only valid if ErrorCode::Success is returned. * @param finished_reading_input Indicates if the end of the buffer is the * end of input and therefore the end of the final log event. * @return ErrorCode::Success if a log event is successfully parsed as a @@ -67,13 +66,9 @@ class BufferParser { * internally before this method returns. * @return ErrorCode from LogParser::parse. */ - auto get_next_event_view( - char* buf, - size_t size, - size_t& offset, - LogEventView& event_view, - bool finished_reading_input = false - ) -> ErrorCode; + auto + parse_next_event(char* buf, size_t size, size_t& offset, bool finished_reading_input = false) + -> ErrorCode; /** * @return The underlying LogParser. @@ -93,7 +88,7 @@ class BufferParser { /** * @return true when the BufferParser has completed parsing all of the * provided input. This can only occur if finished_reading_input was set to - * true in get_next_event_view. Otherwise, the BufferParser will always + * true in parse_next_event. Otherwise, the BufferParser will always * assume more input can be read. */ auto done() const -> bool { return m_done; } diff --git a/src/log_surgeon/LogEvent.cpp b/src/log_surgeon/LogEvent.cpp index c76208c4..f2db5830 100644 --- a/src/log_surgeon/LogEvent.cpp +++ b/src/log_surgeon/LogEvent.cpp @@ -10,9 +10,9 @@ #include namespace log_surgeon { -LogEventView::LogEventView(LogParser const* log_parser) +LogEventView::LogEventView(LogParser const& log_parser) : m_log_parser{log_parser}, - m_log_var_occurrences{log_parser->m_lexer.m_id_symbol.size()} { + m_log_var_occurrences{log_parser.m_lexer.m_id_symbol.size()} { m_log_output_buffer = std::make_unique(); } @@ -48,16 +48,18 @@ auto LogEventView::reset() -> void { return raw_log; } -auto LogEventView::get_logtype() -> std::string { +auto LogEventView::get_logtype() const -> std::string { std::string logtype; for (uint32_t i = 1; i < m_log_output_buffer->pos(); i++) { Token& token = m_log_output_buffer->get_mutable_token(i); if (token.m_type_ids_ptr->at(0) == (int)log_surgeon::SymbolID::TokenUncaughtStringID) { logtype += token.to_string_view(); } else { - logtype += token.get_delimiter(); + if ((int)log_surgeon::SymbolID::TokenNewlineId != token.m_type_ids_ptr->at(0)) { + logtype += token.get_delimiter(); + } logtype += "<"; - logtype += m_log_parser->get_id_symbol(token.m_type_ids_ptr->at(0)); + logtype += m_log_parser.get_id_symbol(token.m_type_ids_ptr->at(0)); logtype += ">"; } } diff --git a/src/log_surgeon/LogEvent.hpp b/src/log_surgeon/LogEvent.hpp index 08922b1a..3dc97dcd 100644 --- a/src/log_surgeon/LogEvent.hpp +++ b/src/log_surgeon/LogEvent.hpp @@ -5,11 +5,11 @@ #include #include -#include #include #include namespace log_surgeon { +class LogParser; class LogEvent; /** @@ -27,7 +27,7 @@ class LogEventView { * @param log_parser The LogParser whose input buffer the view will * reference */ - explicit LogEventView(LogParser const* log_parser); + explicit LogEventView(LogParser const& log_parser); /** * Copies the tokens representing a log event from the source buffer. This @@ -57,7 +57,7 @@ class LogEventView { /** * @return The LogParser whose input buffer this LogEventView references */ - [[nodiscard]] auto get_log_parser() const -> LogParser const* { return m_log_parser; } + [[nodiscard]] auto get_log_parser() const -> LogParser const& { return m_log_parser; } /** * @return The LogParserOutputBuffer containing the tokens that make up the @@ -99,7 +99,7 @@ class LogEventView { * events from the same logging source code may have the same logtype. * @return The logtype of the log. */ - auto get_logtype() -> std::string; + auto get_logtype() const -> std::string; /** * Adds a Token to the array of tokens of a particular token type. @@ -121,8 +121,8 @@ class LogEventView { private: bool m_multiline{false}; - LogParser const* m_log_parser; - std::vector> m_log_var_occurrences; + LogParser const& m_log_parser; + std::vector> m_log_var_occurrences{}; }; /** diff --git a/src/log_surgeon/LogParser.cpp b/src/log_surgeon/LogParser.cpp index 2d26fb98..40b5a0b8 100644 --- a/src/log_surgeon/LogParser.cpp +++ b/src/log_surgeon/LogParser.cpp @@ -30,9 +30,10 @@ using finite_automata::RegexNFAByteState; LogParser::LogParser(string const& schema_file_path) : LogParser::LogParser(SchemaParser::try_schema_file(schema_file_path).get()) {} -LogParser::LogParser(SchemaAST const* schema_ast) : m_has_start_of_log(false) { +LogParser::LogParser(SchemaAST const* schema_ast) { add_rules(schema_ast); m_lexer.generate(); + m_log_event_view = make_unique(*this); } auto LogParser::add_delimiters(unique_ptr const& delimiters) -> void { @@ -154,13 +155,16 @@ auto LogParser::reset() -> void { m_lexer.prepend_start_of_file_char(m_input_buffer); } -// TODO: if the first text is a variable in the no timestamp case you lose the -// first character to static text since it has no leading delim -// TODO: switching between timestamped and non-timestamped logs -auto LogParser::parse( - std::unique_ptr& output_buffer, - LogParser::ParsingAction& parsing_action -) -> ErrorCode { +auto LogParser::parse_and_generate_metadata(LogParser::ParsingAction& parsing_action) -> ErrorCode { + ErrorCode error_code = parse(parsing_action); + if (ErrorCode::Success == error_code) { + generate_log_event_view_metadata(); + } + return error_code; +} + +auto LogParser::parse(LogParser::ParsingAction& parsing_action) -> ErrorCode { + std::unique_ptr& output_buffer = m_log_event_view->m_log_output_buffer; if (0 == output_buffer->pos()) { output_buffer->set_has_delimiters(m_lexer.get_has_delimiters()); Token next_token; @@ -278,4 +282,26 @@ auto LogParser::get_symbol_id(std::string const& symbol) const -> std::optional< auto LogParser::get_next_symbol(Token& token) -> ErrorCode { return m_lexer.scan(m_input_buffer, token); } + +auto LogParser::generate_log_event_view_metadata() -> void { + uint32_t start = 0; + if (false == m_log_event_view->m_log_output_buffer->has_timestamp()) { + start = 1; + } + uint32_t first_newline_pos{0}; + for (uint32_t i = start; i < m_log_event_view->m_log_output_buffer->pos(); i++) { + Token* token = &m_log_event_view->m_log_output_buffer->get_mutable_token(i); + m_log_event_view->add_token(token->m_type_ids_ptr->at(0), token); + if (token->get_delimiter() == "\n" && first_newline_pos == 0) { + first_newline_pos = i; + } + } + // To be a multiline log there must be at least one token between the + // newline token and the last token in the output buffer. + if (m_log_event_view->m_log_output_buffer->has_timestamp() && 0 < first_newline_pos + && first_newline_pos + 1 < m_log_event_view->m_log_output_buffer->pos()) + { + m_log_event_view->set_multiline(true); + } +} } // namespace log_surgeon diff --git a/src/log_surgeon/LogParser.hpp b/src/log_surgeon/LogParser.hpp index 5d503306..74a3280f 100644 --- a/src/log_surgeon/LogParser.hpp +++ b/src/log_surgeon/LogParser.hpp @@ -7,6 +7,7 @@ #include #include +#include #include #include #include @@ -47,16 +48,13 @@ class LogParser auto reset() -> void; /** - * Parses the input buffer until a complete log event has been parsed and - * its tokens are stored into output_buffer. - * @param output_buffer Buffer to write Token objects to as they are parsed. + * Parses and generates metadata if parse was successful. * @param parsing_action Returns the action for CLP to take by reference. * @return ErrorCode::Success if successfully parsed to the start of a new * log event. - * @return ErrorCode from LogParser::get_next_symbol. + * @return ErrorCode from LogParser::parse. */ - auto parse(std::unique_ptr& output_buffer, ParsingAction& parsing_action) - -> ErrorCode; + auto parse_and_generate_metadata(ParsingAction& parsing_action) -> ErrorCode; // TODO protect against invalid id (use optional) /** @@ -111,7 +109,33 @@ class LogParser */ auto increase_capacity() -> void { m_lexer.increase_buffer_capacity(m_input_buffer); } + /** + * Resets the log event view to prepare for the next parse + */ + auto reset_log_event_view() -> void { m_log_event_view->reset(); } + + /** + * @return the log event view based on the last parse + */ + auto get_log_event_view() const -> LogEventView const& { return *m_log_event_view; } + private: + /** + * Parses the input buffer until a complete log event has been parsed and + * its tokens are stored into m_log_event_view. + * @param parsing_action Returns the action for CLP to take by reference. + * @return ErrorCode::Success if successfully parsed to the start of a new + * log event. + * @return ErrorCode from LogParser::get_next_symbol. + */ + auto parse(ParsingAction& parsing_action) -> ErrorCode; + + /** + * Generates metadata for last parsed log event indicating occurrences of + * each variable and if the log event is multiline + */ + auto generate_log_event_view_metadata() -> void; + /** * Requests the next token from the lexer. * @param token is populated with the next token found by the parser. @@ -138,8 +162,9 @@ class LogParser // TODO: move ownership of the buffer to the lexer ParserInputBuffer m_input_buffer; - bool m_has_start_of_log; + bool m_has_start_of_log{false}; Token m_start_of_log_message{}; + std::unique_ptr m_log_event_view{nullptr}; }; } // namespace log_surgeon diff --git a/src/log_surgeon/ReaderParser.cpp b/src/log_surgeon/ReaderParser.cpp index 84ced346..105a19d4 100644 --- a/src/log_surgeon/ReaderParser.cpp +++ b/src/log_surgeon/ReaderParser.cpp @@ -17,8 +17,8 @@ auto ReaderParser::reset_and_set_reader(Reader& reader) -> void { m_reader = reader; } -auto ReaderParser::get_next_event_view(LogEventView& event_view) -> ErrorCode { - event_view.reset(); +auto ReaderParser::parse_next_event() -> ErrorCode { + m_log_parser.reset_log_event_view(); if (ErrorCode err = m_log_parser.read_into_input(m_reader); ErrorCode::Success != err && ErrorCode::EndOfFile != err) { @@ -26,7 +26,7 @@ auto ReaderParser::get_next_event_view(LogEventView& event_view) -> ErrorCode { } while (true) { LogParser::ParsingAction parsing_action{LogParser::ParsingAction::None}; - ErrorCode parse_error = m_log_parser.parse(event_view.m_log_output_buffer, parsing_action); + ErrorCode parse_error = m_log_parser.parse_and_generate_metadata(parsing_action); if (ErrorCode::Success == parse_error) { if (LogParser::ParsingAction::CompressAndFinish == parsing_action) { m_done = true; @@ -44,26 +44,6 @@ auto ReaderParser::get_next_event_view(LogEventView& event_view) -> ErrorCode { return parse_error; } } - uint32_t start = 0; - if (false == event_view.m_log_output_buffer->has_timestamp()) { - start = 1; - } - uint32_t first_newline_pos{0}; - for (uint32_t i = start; i < event_view.m_log_output_buffer->pos(); i++) { - Token* token = &event_view.m_log_output_buffer->get_mutable_token(i); - event_view.add_token(token->m_type_ids_ptr->at(0), token); - if (token->m_type_ids_ptr->at(0) == (int)SymbolID::TokenNewlineId && first_newline_pos == 0) - { - first_newline_pos = i; - } - } - // To be a multiline log there must be at least one token between the - // newline token and the last token in the output buffer. - if (event_view.m_log_output_buffer->has_timestamp() && 0 < first_newline_pos - && first_newline_pos + 1 < event_view.m_log_output_buffer->pos()) - { - event_view.set_multiline(true); - } return ErrorCode::Success; } } // namespace log_surgeon diff --git a/src/log_surgeon/ReaderParser.hpp b/src/log_surgeon/ReaderParser.hpp index 938977a8..1cf5efa9 100644 --- a/src/log_surgeon/ReaderParser.hpp +++ b/src/log_surgeon/ReaderParser.hpp @@ -36,7 +36,7 @@ class ReaderParser { /** * Clears the internal state of the log parser (lexer and input buffer), * and sets the reader containing the logs to be parsed. The next call to - * get_next_event_view will begin parsing from scratch. This is an + * parse_next_event will begin parsing from scratch. This is an * alternative to constructing a new Parser that would require rebuilding * the LogParser (generating a new lexer and input buffer). This should be * called whenever new input is needed. @@ -47,16 +47,15 @@ class ReaderParser { /** * Attempts to parse the next log event from the internal `Reader`. Users * should add their own error handling and tracking logic to Reader::read, - * in order to retrieve IO errors. - * @param event_view Populated with the LogEventView parsed from the reader. - * Only valid if ErrorCode::Success is returned. + * in order to retrieve IO errors. The result is stored internally and is + * only valid if ErrorCode::Success is returned. * @return ErrorCode::Success if a log event is successfully parsed as a * LogEventView. * @return ErrorCode from LogParser::parse. * @return ErrorCode from the user defined Reader::read. * @throw std::bad_alloc if a log event is large enough to exhaust memory. */ - auto get_next_event_view(LogEventView& event_view) -> ErrorCode; + auto parse_next_event() -> ErrorCode; /** * @return The underlying LogParser.