Skip to content

Commit

Permalink
Change LogParser to output to an internal LogEventView. (#11)
Browse files Browse the repository at this point in the history
  • Loading branch information
SharafMohamed authored Nov 23, 2023
1 parent 05b5d87 commit 895f464
Show file tree
Hide file tree
Showing 12 changed files with 111 additions and 112 deletions.
7 changes: 2 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -48,12 +48,8 @@ parser.reset_and_set_reader(reader);
optional<uint32_t> loglevel_id{parser.get_variable_id("loglevel")};
// <Omitted validation of loglevel_id>

// Create a LogEventView (similar to a string_view)
LogEventView event{&parser.get_log_parser()};
while (false == parser.done()) {
// Parse the next event
auto err = parser.get_next_event_view(event);
if (ErrorCode::Success != err) {
if (ErrorCode err{parser.parse_next_event()}; ErrorCode::Success != err) {
throw runtime_error("Parsing Failed");
}

Expand All @@ -73,6 +69,7 @@ while (false == parser.done()) {
// Other analysis...

// Print the entire event
LogEventView const& event = parser.get_log_parser().get_log_event_view();
cout << event->to_string() << endl;
}
```
Expand Down
7 changes: 1 addition & 6 deletions examples/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,7 @@ if (NOT CMAKE_BUILD_TYPE AND NOT CMAKE_CONFIGURATION_TYPES)
set(CMAKE_BUILD_TYPE "${default_build_type}" CACHE STRING "Choose the type of build." FORCE)
endif()

# If log-surgeon was not installed to the system library path, set
# log_surgeon_DIR to the installed location containing the cmake configuration
# files.
# For example, if log-surgeon was installed to ~/.local:
# set(log_surgeon_DIR "~/.local/lib/cmake/log_surgeon/")
find_package(log_surgeon REQUIRED)
add_subdirectory(.. log-surgeon-build EXCLUDE_FROM_ALL)

function(add_to_target target libraries)
target_link_libraries(${target} ${libraries})
Expand Down
12 changes: 6 additions & 6 deletions examples/buffer-parser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,24 +27,24 @@ auto process_logs(string& schema_path, string const& input_path) -> void {
return;
}

constexpr ssize_t const cSize{4096L * 8}; // 8 pages
constexpr ssize_t const cSize{4096L * 8}; // 8 pages
vector<char> buf(cSize);
infs.read(buf.data(), cSize);
ssize_t valid_size{infs.gcount()};
bool input_done{false};
if (infs.eof()) {
input_done = true;
}
parser.reset();

cout << "# Parsing timestamp and loglevel for each log event in " << input_path << ":" << endl;

vector<LogEvent> multiline_logs;
size_t offset{0};
LogEventView event{&parser.get_log_parser()};
while (false == parser.done()) {
if (ErrorCode err{
parser.get_next_event_view(buf.data(), valid_size, offset, event, input_done)};
ErrorCode::Success != err) {
if (ErrorCode err{parser.parse_next_event(buf.data(), valid_size, offset, input_done)};
ErrorCode::Success != err)
{
// The only expected error is the parser has read to the bound
// of the buffer.
if (ErrorCode::BufferOutOfBounds != err) {
Expand Down Expand Up @@ -75,14 +75,14 @@ auto process_logs(string& schema_path, string const& input_path) -> void {
continue;
}

LogEventView const& event = parser.get_log_parser().get_log_event_view();
cout << "log: " << event.to_string() << endl;
print_timestamp_loglevel(event, *loglevel_id);
cout << "logtype: " << event.get_logtype() << endl;
if (event.is_multiline()) {
multiline_logs.emplace_back(event);
}
}
parser.reset();

cout << endl << "# Printing multiline logs:" << endl;
for (auto const& log : multiline_logs) {
Expand Down
4 changes: 2 additions & 2 deletions examples/reader-parser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,12 +39,12 @@ auto process_logs(string& schema_path, string const& input_path) -> void {
cout << "# Parsing timestamp and loglevel for each log event in " << input_path << ":" << endl;

vector<LogEvent> multiline_logs;
LogEventView event{&parser.get_log_parser()};
while (false == parser.done()) {
if (ErrorCode err{parser.get_next_event_view(event)}; ErrorCode::Success != err) {
if (ErrorCode err{parser.parse_next_event()}; ErrorCode::Success != err) {
throw runtime_error("Parsing Failed");
}

LogEventView const& event = parser.get_log_parser().get_log_event_view();
cout << "log: " << event.to_string() << endl;
print_timestamp_loglevel(event, *loglevel_id);
cout << "logtype: " << event.get_logtype() << endl;
Expand Down
34 changes: 7 additions & 27 deletions src/log_surgeon/BufferParser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,23 +17,24 @@ auto BufferParser::reset() -> void {
m_done = false;
}

auto BufferParser::get_next_event_view(
auto BufferParser::parse_next_event(
char* buf,
size_t size,
size_t& offset,
LogEventView& event_view,
bool finished_reading_input
) -> ErrorCode {
event_view.reset();
m_log_parser.reset_log_event_view();
// TODO in order to allow logs/tokens to wrap user buffers this function
// will need more parameters or the input buffer may need to be exposed to
// the user
m_log_parser.set_input_buffer(buf, size, offset, finished_reading_input);
LogParser::ParsingAction parsing_action{LogParser::ParsingAction::None};
ErrorCode error_code = m_log_parser.parse(event_view.m_log_output_buffer, parsing_action);
ErrorCode error_code = m_log_parser.parse_and_generate_metadata(parsing_action);
if (ErrorCode::Success != error_code) {
if (0 != event_view.m_log_output_buffer->pos()) {
offset = event_view.m_log_output_buffer->get_token(0).m_start_pos;
if (0 != m_log_parser.get_log_event_view().m_log_output_buffer->pos()) {
offset = m_log_parser.get_log_event_view()
.m_log_output_buffer->get_token(0)
.m_start_pos;
}
reset();
return error_code;
Expand All @@ -42,27 +43,6 @@ auto BufferParser::get_next_event_view(
m_done = true;
}
offset = m_log_parser.get_input_pos();

uint32_t start = 0;
if (false == event_view.m_log_output_buffer->has_timestamp()) {
start = 1;
}
uint32_t first_newline_pos{0};
for (uint32_t i = start; i < event_view.m_log_output_buffer->pos(); i++) {
Token* token = &event_view.m_log_output_buffer->get_mutable_token(i);
event_view.add_token(token->m_type_ids_ptr->at(0), token);
if (token->m_type_ids_ptr->at(0) == (int)SymbolID::TokenNewlineId && first_newline_pos == 0)
{
first_newline_pos = i;
}
}
// To be a multiline log there must be at least one token between the
// newline token and the last token in the output buffer.
if (event_view.m_log_output_buffer->has_timestamp() && 0 < first_newline_pos
&& first_newline_pos + 1 < event_view.m_log_output_buffer->pos())
{
event_view.set_multiline(true);
}
return ErrorCode::Success;
}
} // namespace log_surgeon
19 changes: 7 additions & 12 deletions src/log_surgeon/BufferParser.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ class BufferParser {

/**
* Clears the internal state of the log parser (lexer and input buffer) so
* that the next call to get_next_event_view will begin parsing from
* that the next call to parse_next_event will begin parsing from
* scratch. This is an alternative to constructing a new Parser that would
* require rebuilding the LogParser (generating a new lexer and input
* buffer). This should be called whenever you mutate the input buffer, but
Expand All @@ -50,14 +50,13 @@ class BufferParser {
* bytes between offset and size may contain a partial log event. It is the
* user's responsibility to preserve these bytes when mutating the buffer
* to contain more of the log event before the next call of
* get_next_log_view.
* get_next_log_view. The result is stored internally and is only valid if
* ErrorCode::Success is returned.
* @param buf The byte buffer containing raw log events to be parsed.
* @param size The size of the buffer.
* @param offset The starting position in the buffer of the current log
* event to be parsed. Updated to be the starting position of the next
* unparsed log event. If no log event is parsed it remains unchanged.
* @param event_view Populated with the log event view parsed from the
* buffer. Only valid if ErrorCode::Success is returned.
* @param finished_reading_input Indicates if the end of the buffer is the
* end of input and therefore the end of the final log event.
* @return ErrorCode::Success if a log event is successfully parsed as a
Expand All @@ -67,13 +66,9 @@ class BufferParser {
* internally before this method returns.
* @return ErrorCode from LogParser::parse.
*/
auto get_next_event_view(
char* buf,
size_t size,
size_t& offset,
LogEventView& event_view,
bool finished_reading_input = false
) -> ErrorCode;
auto
parse_next_event(char* buf, size_t size, size_t& offset, bool finished_reading_input = false)
-> ErrorCode;

/**
* @return The underlying LogParser.
Expand All @@ -93,7 +88,7 @@ class BufferParser {
/**
* @return true when the BufferParser has completed parsing all of the
* provided input. This can only occur if finished_reading_input was set to
* true in get_next_event_view. Otherwise, the BufferParser will always
* true in parse_next_event. Otherwise, the BufferParser will always
* assume more input can be read.
*/
auto done() const -> bool { return m_done; }
Expand Down
12 changes: 7 additions & 5 deletions src/log_surgeon/LogEvent.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,9 @@
#include <log_surgeon/Token.hpp>

namespace log_surgeon {
LogEventView::LogEventView(LogParser const* log_parser)
LogEventView::LogEventView(LogParser const& log_parser)
: m_log_parser{log_parser},
m_log_var_occurrences{log_parser->m_lexer.m_id_symbol.size()} {
m_log_var_occurrences{log_parser.m_lexer.m_id_symbol.size()} {
m_log_output_buffer = std::make_unique<LogParserOutputBuffer>();
}

Expand Down Expand Up @@ -48,16 +48,18 @@ auto LogEventView::reset() -> void {
return raw_log;
}

auto LogEventView::get_logtype() -> std::string {
auto LogEventView::get_logtype() const -> std::string {
std::string logtype;
for (uint32_t i = 1; i < m_log_output_buffer->pos(); i++) {
Token& token = m_log_output_buffer->get_mutable_token(i);
if (token.m_type_ids_ptr->at(0) == (int)log_surgeon::SymbolID::TokenUncaughtStringID) {
logtype += token.to_string_view();
} else {
logtype += token.get_delimiter();
if ((int)log_surgeon::SymbolID::TokenNewlineId != token.m_type_ids_ptr->at(0)) {
logtype += token.get_delimiter();
}
logtype += "<";
logtype += m_log_parser->get_id_symbol(token.m_type_ids_ptr->at(0));
logtype += m_log_parser.get_id_symbol(token.m_type_ids_ptr->at(0));
logtype += ">";
}
}
Expand Down
12 changes: 6 additions & 6 deletions src/log_surgeon/LogEvent.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,11 @@
#include <string>
#include <vector>

#include <log_surgeon/LogParser.hpp>
#include <log_surgeon/LogParserOutputBuffer.hpp>
#include <log_surgeon/Token.hpp>

namespace log_surgeon {
class LogParser;
class LogEvent;

/**
Expand All @@ -27,7 +27,7 @@ class LogEventView {
* @param log_parser The LogParser whose input buffer the view will
* reference
*/
explicit LogEventView(LogParser const* log_parser);
explicit LogEventView(LogParser const& log_parser);

/**
* Copies the tokens representing a log event from the source buffer. This
Expand Down Expand Up @@ -57,7 +57,7 @@ class LogEventView {
/**
* @return The LogParser whose input buffer this LogEventView references
*/
[[nodiscard]] auto get_log_parser() const -> LogParser const* { return m_log_parser; }
[[nodiscard]] auto get_log_parser() const -> LogParser const& { return m_log_parser; }

/**
* @return The LogParserOutputBuffer containing the tokens that make up the
Expand Down Expand Up @@ -99,7 +99,7 @@ class LogEventView {
* events from the same logging source code may have the same logtype.
* @return The logtype of the log.
*/
auto get_logtype() -> std::string;
auto get_logtype() const -> std::string;

/**
* Adds a Token to the array of tokens of a particular token type.
Expand All @@ -121,8 +121,8 @@ class LogEventView {

private:
bool m_multiline{false};
LogParser const* m_log_parser;
std::vector<std::vector<Token*>> m_log_var_occurrences;
LogParser const& m_log_parser;
std::vector<std::vector<Token*>> m_log_var_occurrences{};
};

/**
Expand Down
42 changes: 34 additions & 8 deletions src/log_surgeon/LogParser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,9 +30,10 @@ using finite_automata::RegexNFAByteState;
LogParser::LogParser(string const& schema_file_path)
: LogParser::LogParser(SchemaParser::try_schema_file(schema_file_path).get()) {}

LogParser::LogParser(SchemaAST const* schema_ast) : m_has_start_of_log(false) {
LogParser::LogParser(SchemaAST const* schema_ast) {
add_rules(schema_ast);
m_lexer.generate();
m_log_event_view = make_unique<LogEventView>(*this);
}

auto LogParser::add_delimiters(unique_ptr<ParserAST> const& delimiters) -> void {
Expand Down Expand Up @@ -154,13 +155,16 @@ auto LogParser::reset() -> void {
m_lexer.prepend_start_of_file_char(m_input_buffer);
}

// TODO: if the first text is a variable in the no timestamp case you lose the
// first character to static text since it has no leading delim
// TODO: switching between timestamped and non-timestamped logs
auto LogParser::parse(
std::unique_ptr<LogParserOutputBuffer>& output_buffer,
LogParser::ParsingAction& parsing_action
) -> ErrorCode {
auto LogParser::parse_and_generate_metadata(LogParser::ParsingAction& parsing_action) -> ErrorCode {
ErrorCode error_code = parse(parsing_action);
if (ErrorCode::Success == error_code) {
generate_log_event_view_metadata();
}
return error_code;
}

auto LogParser::parse(LogParser::ParsingAction& parsing_action) -> ErrorCode {
std::unique_ptr<LogParserOutputBuffer>& output_buffer = m_log_event_view->m_log_output_buffer;
if (0 == output_buffer->pos()) {
output_buffer->set_has_delimiters(m_lexer.get_has_delimiters());
Token next_token;
Expand Down Expand Up @@ -278,4 +282,26 @@ auto LogParser::get_symbol_id(std::string const& symbol) const -> std::optional<
auto LogParser::get_next_symbol(Token& token) -> ErrorCode {
return m_lexer.scan(m_input_buffer, token);
}

auto LogParser::generate_log_event_view_metadata() -> void {
uint32_t start = 0;
if (false == m_log_event_view->m_log_output_buffer->has_timestamp()) {
start = 1;
}
uint32_t first_newline_pos{0};
for (uint32_t i = start; i < m_log_event_view->m_log_output_buffer->pos(); i++) {
Token* token = &m_log_event_view->m_log_output_buffer->get_mutable_token(i);
m_log_event_view->add_token(token->m_type_ids_ptr->at(0), token);
if (token->get_delimiter() == "\n" && first_newline_pos == 0) {
first_newline_pos = i;
}
}
// To be a multiline log there must be at least one token between the
// newline token and the last token in the output buffer.
if (m_log_event_view->m_log_output_buffer->has_timestamp() && 0 < first_newline_pos
&& first_newline_pos + 1 < m_log_event_view->m_log_output_buffer->pos())
{
m_log_event_view->set_multiline(true);
}
}
} // namespace log_surgeon
Loading

0 comments on commit 895f464

Please sign in to comment.