Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add LogEventView as a member within LogParser #11

Merged
merged 22 commits into from
Nov 23, 2023
Merged
Show file tree
Hide file tree
Changes from 12 commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
c8af703
Moved LogEventView into LogParser
SharafMohamed Nov 20, 2023
6e5ac10
Fixed buffer-parser to reset before calling parse() instead of after
SharafMohamed Nov 20, 2023
dbc51b7
Ran outformatter
SharafMohamed Nov 20, 2023
7b493e8
generate_log_event_view_metadata() and parse() are now private
SharafMohamed Nov 22, 2023
4c42268
fixed newline to not print itself as delimiter in LogEventView::get_l…
SharafMohamed Nov 22, 2023
31c46b8
LogEventView uses its constructor instead of init() like previously, …
SharafMohamed Nov 22, 2023
298b12c
Fixed check for multiline logs in metadata
SharafMohamed Nov 22, 2023
e852580
removed obsolete comment
SharafMohamed Nov 22, 2023
a36fecf
removed todo about switching between timestamped and non-timestamped …
SharafMohamed Nov 22, 2023
02acdc8
changed m_log_parser to a const reference
SharafMohamed Nov 22, 2023
cd45f29
reorganized code for printing logtype
SharafMohamed Nov 22, 2023
932bc49
Update src/log_surgeon/ReaderParser.hpp
SharafMohamed Nov 22, 2023
8e445a9
Update src/log_surgeon/LogParser.hpp
SharafMohamed Nov 22, 2023
19713b1
Moved default values to declaration instead of initializer list
SharafMohamed Nov 22, 2023
2ed0559
Update src/log_surgeon/BufferParser.hpp
SharafMohamed Nov 22, 2023
32112d7
moved event view closer to usage
SharafMohamed Nov 22, 2023
043332f
Clarify log-surgeon usage in example CMakeLists.txt
kirkrodrigues Nov 23, 2023
72379f7
Merge branch 'event_view' of https://github.com/SharafMohamed/log-sur…
SharafMohamed Nov 22, 2023
602c809
Update src/log_surgeon/ReaderParser.hpp
SharafMohamed Nov 23, 2023
b87298c
Update src/log_surgeon/LogParser.hpp
SharafMohamed Nov 23, 2023
a8e05ed
removed todo that was fixed in previous PR
SharafMohamed Nov 22, 2023
d3bd032
Remove comment about including log-surgeon in project.
kirkrodrigues Nov 23, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 2 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -49,11 +49,8 @@ optional<uint32_t> loglevel_id{parser.get_variable_id("loglevel")};
// <Omitted validation of loglevel_id>

// Create a LogEventView (similar to a string_view)
SharafMohamed marked this conversation as resolved.
Show resolved Hide resolved
LogEventView event{&parser.get_log_parser()};
while (false == parser.done()) {
// Parse the next event
auto err = parser.get_next_event_view(event);
if (ErrorCode::Success != err) {
if (ErrorCode err{parser.parse_next_event()}; ErrorCode::Success != err) {
throw runtime_error("Parsing Failed");
}

Expand All @@ -73,6 +70,7 @@ while (false == parser.done()) {
// Other analysis...

// Print the entire event
LogEventView const& event = parser.get_log_parser().get_log_event_view();
cout << event->to_string() << endl;
}
```
Expand Down
2 changes: 1 addition & 1 deletion examples/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ endif()
# files.
# For example, if log-surgeon was installed to ~/.local:
# set(log_surgeon_DIR "~/.local/lib/cmake/log_surgeon/")
find_package(log_surgeon REQUIRED)
add_subdirectory(.. log-surgeon-build EXCLUDE_FROM_ALL)
SharafMohamed marked this conversation as resolved.
Show resolved Hide resolved

function(add_to_target target libraries)
target_link_libraries(${target} ${libraries})
Expand Down
12 changes: 6 additions & 6 deletions examples/buffer-parser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,24 +27,24 @@ auto process_logs(string& schema_path, string const& input_path) -> void {
return;
}

constexpr ssize_t const cSize{4096L * 8}; // 8 pages
constexpr ssize_t const cSize{4096L * 8}; // 8 pages
vector<char> buf(cSize);
infs.read(buf.data(), cSize);
ssize_t valid_size{infs.gcount()};
bool input_done{false};
if (infs.eof()) {
input_done = true;
}
parser.reset();

cout << "# Parsing timestamp and loglevel for each log event in " << input_path << ":" << endl;

vector<LogEvent> multiline_logs;
size_t offset{0};
LogEventView event{&parser.get_log_parser()};
while (false == parser.done()) {
if (ErrorCode err{
parser.get_next_event_view(buf.data(), valid_size, offset, event, input_done)};
ErrorCode::Success != err) {
if (ErrorCode err{parser.parse_next_event(buf.data(), valid_size, offset, input_done)};
ErrorCode::Success != err)
{
// The only expected error is the parser has read to the bound
// of the buffer.
if (ErrorCode::BufferOutOfBounds != err) {
Expand Down Expand Up @@ -75,14 +75,14 @@ auto process_logs(string& schema_path, string const& input_path) -> void {
continue;
}

LogEventView const& event = parser.get_log_parser().get_log_event_view();
cout << "log: " << event.to_string() << endl;
print_timestamp_loglevel(event, *loglevel_id);
cout << "logtype: " << event.get_logtype() << endl;
if (event.is_multiline()) {
multiline_logs.emplace_back(event);
}
}
parser.reset();

cout << endl << "# Printing multiline logs:" << endl;
for (auto const& log : multiline_logs) {
Expand Down
4 changes: 2 additions & 2 deletions examples/reader-parser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,12 +39,12 @@ auto process_logs(string& schema_path, string const& input_path) -> void {
cout << "# Parsing timestamp and loglevel for each log event in " << input_path << ":" << endl;

vector<LogEvent> multiline_logs;
LogEventView event{&parser.get_log_parser()};
while (false == parser.done()) {
if (ErrorCode err{parser.get_next_event_view(event)}; ErrorCode::Success != err) {
if (ErrorCode err{parser.parse_next_event()}; ErrorCode::Success != err) {
throw runtime_error("Parsing Failed");
}

LogEventView const& event = parser.get_log_parser().get_log_event_view();
cout << "log: " << event.to_string() << endl;
print_timestamp_loglevel(event, *loglevel_id);
cout << "logtype: " << event.get_logtype() << endl;
Expand Down
34 changes: 7 additions & 27 deletions src/log_surgeon/BufferParser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,23 +17,24 @@ auto BufferParser::reset() -> void {
m_done = false;
}

auto BufferParser::get_next_event_view(
auto BufferParser::parse_next_event(
char* buf,
size_t size,
size_t& offset,
LogEventView& event_view,
bool finished_reading_input
) -> ErrorCode {
event_view.reset();
m_log_parser.reset_log_event_view();
// TODO in order to allow logs/tokens to wrap user buffers this function
// will need more parameters or the input buffer may need to be exposed to
// the user
m_log_parser.set_input_buffer(buf, size, offset, finished_reading_input);
LogParser::ParsingAction parsing_action{LogParser::ParsingAction::None};
ErrorCode error_code = m_log_parser.parse(event_view.m_log_output_buffer, parsing_action);
ErrorCode error_code = m_log_parser.parse_and_generate_metadata(parsing_action);
if (ErrorCode::Success != error_code) {
if (0 != event_view.m_log_output_buffer->pos()) {
offset = event_view.m_log_output_buffer->get_token(0).m_start_pos;
if (0 != m_log_parser.get_log_event_view().m_log_output_buffer->pos()) {
offset = m_log_parser.get_log_event_view()
.m_log_output_buffer->get_token(0)
.m_start_pos;
}
reset();
return error_code;
Expand All @@ -42,27 +43,6 @@ auto BufferParser::get_next_event_view(
m_done = true;
}
offset = m_log_parser.get_input_pos();

uint32_t start = 0;
if (false == event_view.m_log_output_buffer->has_timestamp()) {
start = 1;
}
uint32_t first_newline_pos{0};
for (uint32_t i = start; i < event_view.m_log_output_buffer->pos(); i++) {
Token* token = &event_view.m_log_output_buffer->get_mutable_token(i);
event_view.add_token(token->m_type_ids_ptr->at(0), token);
if (token->m_type_ids_ptr->at(0) == (int)SymbolID::TokenNewlineId && first_newline_pos == 0)
{
first_newline_pos = i;
}
}
// To be a multiline log there must be at least one token between the
// newline token and the last token in the output buffer.
if (event_view.m_log_output_buffer->has_timestamp() && 0 < first_newline_pos
&& first_newline_pos + 1 < event_view.m_log_output_buffer->pos())
{
event_view.set_multiline(true);
}
return ErrorCode::Success;
}
} // namespace log_surgeon
19 changes: 7 additions & 12 deletions src/log_surgeon/BufferParser.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ class BufferParser {

/**
* Clears the internal state of the log parser (lexer and input buffer) so
* that the next call to get_next_event_view will begin parsing from
* that the next call to parse_next_event will begin parsing from
* scratch. This is an alternative to constructing a new Parser that would
* require rebuilding the LogParser (generating a new lexer and input
* buffer). This should be called whenever you mutate the input buffer, but
Expand All @@ -50,14 +50,13 @@ class BufferParser {
* bytes between offset and size may contain a partial log event. It is the
* user's responsibility to preserve these bytes when mutating the buffer
* to contain more of the log event before the next call of
* get_next_log_view.
* get_next_log_view. The result is stored internally and is only valid if
* ErrorCode::Success is returned.
* @param buf The byte buffer containing raw log events to be parsed.
* @param size The size of the buffer.
* @param offset The starting position in the buffer of the current log
* event to be parsed. Updated to be the starting position of the next
* unparsed log event. If no log event is parsed it remains unchanged.
* @param event_view Populated with the log event view parsed from the
* buffer. Only valid if ErrorCode::Success is returned.
* @param finished_reading_input Indicates if the end of the buffer is the
* end of input and therefore the end of the final log event.
* @return ErrorCode::Success if a log event is successfully parsed as a
Expand All @@ -67,13 +66,9 @@ class BufferParser {
* internally before this method returns.
* @return ErrorCode from LogParser::parse.
*/
auto get_next_event_view(
char* buf,
size_t size,
size_t& offset,
LogEventView& event_view,
bool finished_reading_input = false
) -> ErrorCode;
auto
parse_next_event(char* buf, size_t size, size_t& offset, bool finished_reading_input = false)
-> ErrorCode;

/**
* @return The underlying LogParser.
Expand All @@ -93,7 +88,7 @@ class BufferParser {
/**
* @return true when the BufferParser has completed parsing all of the
* provided input. This can only occur if finished_reading_input was set to
* true in get_next_event_view. Otherwise, the BufferParser will always
* true in parse_next_event. Otherwise, the BufferParser will always
* assume more input can be read.
*/
auto done() const -> bool { return m_done; }
Expand Down
6 changes: 5 additions & 1 deletion src/log_surgeon/LogEvent.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -48,12 +48,16 @@ auto LogEventView::reset() -> void {
return raw_log;
}

auto LogEventView::get_logtype() -> std::string {
auto LogEventView::get_logtype() const -> std::string {
std::string logtype;
for (uint32_t i = 1; i < m_log_output_buffer->pos(); i++) {
Token& token = m_log_output_buffer->get_mutable_token(i);
if (token.m_type_ids_ptr->at(0) == (int)log_surgeon::SymbolID::TokenUncaughtStringID) {
logtype += token.to_string_view();
} else if (token.m_type_ids_ptr->at(0) == (int)log_surgeon::SymbolID::TokenNewlineId) {
logtype += "<";
logtype += m_log_parser->get_id_symbol(token.m_type_ids_ptr->at(0));
logtype += ">";
SharafMohamed marked this conversation as resolved.
Show resolved Hide resolved
} else {
logtype += token.get_delimiter();
logtype += "<";
Expand Down
8 changes: 4 additions & 4 deletions src/log_surgeon/LogEvent.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,11 @@
#include <string>
#include <vector>

#include <log_surgeon/LogParser.hpp>
#include <log_surgeon/LogParserOutputBuffer.hpp>
#include <log_surgeon/Token.hpp>

namespace log_surgeon {
class LogParser;
class LogEvent;

/**
Expand Down Expand Up @@ -99,7 +99,7 @@ class LogEventView {
* events from the same logging source code may have the same logtype.
* @return The logtype of the log.
*/
auto get_logtype() -> std::string;
auto get_logtype() const -> std::string;

/**
* Adds a Token to the array of tokens of a particular token type.
Expand All @@ -121,8 +121,8 @@ class LogEventView {

private:
bool m_multiline{false};
LogParser const* m_log_parser;
std::vector<std::vector<Token*>> m_log_var_occurrences;
LogParser const* m_log_parser{nullptr};
SharafMohamed marked this conversation as resolved.
Show resolved Hide resolved
std::vector<std::vector<Token*>> m_log_var_occurrences{};
};

/**
Expand Down
39 changes: 34 additions & 5 deletions src/log_surgeon/LogParser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,9 +30,10 @@ using finite_automata::RegexNFAByteState;
LogParser::LogParser(string const& schema_file_path)
: LogParser::LogParser(SchemaParser::try_schema_file(schema_file_path).get()) {}

LogParser::LogParser(SchemaAST const* schema_ast) : m_has_start_of_log(false) {
LogParser::LogParser(SchemaAST const* schema_ast) {
add_rules(schema_ast);
m_lexer.generate();
m_log_event_view = make_unique<LogEventView>(this);
}

auto LogParser::add_delimiters(unique_ptr<ParserAST> const& delimiters) -> void {
Expand Down Expand Up @@ -154,13 +155,19 @@ auto LogParser::reset() -> void {
m_lexer.prepend_start_of_file_char(m_input_buffer);
}

auto LogParser::parse_and_generate_metadata(LogParser::ParsingAction& parsing_action) -> ErrorCode {
ErrorCode error_code = parse(parsing_action);
if (ErrorCode::Success == error_code) {
generate_log_event_view_metadata();
}
return error_code;
}

// TODO: if the first text is a variable in the no timestamp case you lose the
// first character to static text since it has no leading delim
// TODO: switching between timestamped and non-timestamped logs
auto LogParser::parse(
std::unique_ptr<LogParserOutputBuffer>& output_buffer,
LogParser::ParsingAction& parsing_action
) -> ErrorCode {
auto LogParser::parse(LogParser::ParsingAction& parsing_action) -> ErrorCode {
std::unique_ptr<LogParserOutputBuffer>& output_buffer = m_log_event_view->m_log_output_buffer;
if (0 == output_buffer->pos()) {
output_buffer->set_has_delimiters(m_lexer.get_has_delimiters());
Token next_token;
Expand Down Expand Up @@ -278,4 +285,26 @@ auto LogParser::get_symbol_id(std::string const& symbol) const -> std::optional<
auto LogParser::get_next_symbol(Token& token) -> ErrorCode {
return m_lexer.scan(m_input_buffer, token);
}

auto LogParser::generate_log_event_view_metadata() -> void {
uint32_t start = 0;
if (false == m_log_event_view->m_log_output_buffer->has_timestamp()) {
start = 1;
}
uint32_t first_newline_pos{0};
for (uint32_t i = start; i < m_log_event_view->m_log_output_buffer->pos(); i++) {
Token* token = &m_log_event_view->m_log_output_buffer->get_mutable_token(i);
m_log_event_view->add_token(token->m_type_ids_ptr->at(0), token);
if (token->get_delimiter() == "\n" && first_newline_pos == 0) {
first_newline_pos = i;
}
}
// To be a multiline log there must be at least one token between the
// newline token and the last token in the output buffer.
if (m_log_event_view->m_log_output_buffer->has_timestamp() && 0 < first_newline_pos
&& first_newline_pos + 1 < m_log_event_view->m_log_output_buffer->pos())
{
m_log_event_view->set_multiline(true);
}
}
} // namespace log_surgeon
39 changes: 32 additions & 7 deletions src/log_surgeon/LogParser.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@

#include <log_surgeon/Constants.hpp>
#include <log_surgeon/LALR1Parser.hpp>
#include <log_surgeon/LogEvent.hpp>
#include <log_surgeon/LogParserOutputBuffer.hpp>
#include <log_surgeon/Parser.hpp>
#include <log_surgeon/ParserInputBuffer.hpp>
Expand Down Expand Up @@ -47,16 +48,13 @@ class LogParser
auto reset() -> void;

/**
* Parses the input buffer until a complete log event has been parsed and
* its tokens are stored into output_buffer.
* @param output_buffer Buffer to write Token objects to as they are parsed.
* Parses and generates metadata if parse was successful.
* @param parsing_action Returns the action for CLP to take by reference.
* @return ErrorCode::Success if successfully parsed to the start of a new
* log event.
* @return ErrorCode from LogParser::get_next_symbol.
* @return ErrorCode from LogParser::parse.
*/
auto parse(std::unique_ptr<LogParserOutputBuffer>& output_buffer, ParsingAction& parsing_action)
-> ErrorCode;
auto parse_and_generate_metadata(ParsingAction& parsing_action) -> ErrorCode;

// TODO protect against invalid id (use optional)
/**
Expand Down Expand Up @@ -111,7 +109,33 @@ class LogParser
*/
auto increase_capacity() -> void { m_lexer.increase_buffer_capacity(m_input_buffer); }

/**
* Resets the log event view to prepare for the next parse
*/
auto reset_log_event_view() -> void { m_log_event_view->reset(); }

/**
* @return the log event view based on the last parse
*/
auto get_log_event_view() const -> LogEventView const& { return *m_log_event_view; }

private:
/**
* Parses the input buffer until a complete log event has been parsed and
* its tokens are stored into output_buffer.
SharafMohamed marked this conversation as resolved.
Show resolved Hide resolved
* @param parsing_action Returns the action for CLP to take by reference.
* @return ErrorCode::Success if successfully parsed to the start of a new
* log event.
* @return ErrorCode from LogParser::get_next_symbol.
*/
auto parse(ParsingAction& parsing_action) -> ErrorCode;

/**
* Generates metadata for last parsed log event indicating occurrences of
* each variable and if the log event is multiline
*/
auto generate_log_event_view_metadata() -> void;

/**
* Requests the next token from the lexer.
* @param token is populated with the next token found by the parser.
Expand All @@ -138,8 +162,9 @@ class LogParser

// TODO: move ownership of the buffer to the lexer
ParserInputBuffer m_input_buffer;
bool m_has_start_of_log;
bool m_has_start_of_log{false};
Token m_start_of_log_message{};
std::unique_ptr<LogEventView> m_log_event_view{nullptr};
};
} // namespace log_surgeon

Expand Down
Loading