Skip to content

Commit

Permalink
Allow non-timestamped log messages to start with a variable. (#9)
Browse files Browse the repository at this point in the history
  • Loading branch information
SharafMohamed authored Sep 22, 2023
1 parent 20ef752 commit e2f94cf
Show file tree
Hide file tree
Showing 6 changed files with 33 additions and 4 deletions.
4 changes: 2 additions & 2 deletions docs/schema.md
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ delimiters: \t\r\n:,!;%
timestamp:\d{4}\-\d{2}\-\d{2} \d{2}:\d{2}:\d{2}(\.\d{3}){0,1}
timestamp:\[\d{8}\-\d{2}:\d{2}:\d{2}\]
int:\-{0,1}[0-9]+
double:\-{0,1}[0-9]+\.[0-9]+
float:\-{0,1}[0-9]+\.[0-9]+
// Custom variables
hex:[a-fA-F]+
Expand All @@ -109,7 +109,7 @@ equals:.*=.*[a-zA-Z0-9].*
* `timestamp` matches two different patterns:
* 2023-04-19 12:32:08.064
* [20230419-12:32:08]
* `int`, `double`, `hex`, `hasNumber`, and `equals` all match different user
* `int`, `float`, `hex`, `hasNumber`, and `equals` all match different user
defined variables.

## Regular Expression Syntax
Expand Down
2 changes: 1 addition & 1 deletion examples/schema.txt
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ delimiters: \t\r\n:,!;%

// First set of variables
int:\-{0,1}[0-9]+
double:\-{0,1}[0-9]+\.[0-9]+
float:\-{0,1}[0-9]+\.[0-9]+

// Second set of variables
hex:[a-fA-F]+
Expand Down
1 change: 1 addition & 0 deletions src/log_surgeon/Constants.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ namespace utf8 {
// 0xFF are invalid UTF-8 code units
static unsigned char const cCharEOF = 0xFF;
static unsigned char const cCharErr = 0xFE;
static unsigned char const cCharStartOfFile = 0xFD;
} // namespace utf8
} // namespace log_surgeon

Expand Down
7 changes: 7 additions & 0 deletions src/log_surgeon/Lexer.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,13 @@ class Lexer {
*/
auto reset() -> void;

/**
* Set the lexer state as if it had already read a delimiter (used for
* treating start of file as a delimiter)
* @param input_buffer containing the data to be lexed
*/
auto prepend_start_of_file_char(ParserInputBuffer& input_buffer) -> void;

/**
* Flip lexer states to match static buffer flipping.
* @param old_storage_size The previous buffer size used to calculate the
Expand Down
12 changes: 12 additions & 0 deletions src/log_surgeon/Lexer.tpp
Original file line number Diff line number Diff line change
Expand Up @@ -329,6 +329,17 @@ void Lexer<NFAStateType, DFAStateType>::reset() {
m_prev_state = nullptr;
}

template <typename NFAStateType, typename DFAStateType>
void Lexer<NFAStateType, DFAStateType>::prepend_start_of_file_char(ParserInputBuffer& input_buffer
) {
m_prev_state = m_dfa->get_root()->next(utf8::cCharStartOfFile);
m_asked_for_more_data = true;
m_start_pos = input_buffer.storage().pos();
m_match_pos = input_buffer.storage().pos();
m_match_line = m_line;
m_type_ids = nullptr;
}

template <typename NFAStateType, typename DFAStateType>
void Lexer<NFAStateType, DFAStateType>::add_delimiters(std::vector<uint32_t> const& delimiters) {
assert(!delimiters.empty());
Expand All @@ -339,6 +350,7 @@ void Lexer<NFAStateType, DFAStateType>::add_delimiters(std::vector<uint32_t> con
for (uint32_t delimiter : delimiters) {
m_is_delimiter[delimiter] = true;
}
m_is_delimiter[utf8::cCharStartOfFile] = true;
}

template <typename NFAStateType, typename DFAStateType>
Expand Down
11 changes: 10 additions & 1 deletion src/log_surgeon/LogParser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,15 @@ void LogParser::add_rules(SchemaAST const* schema_ast) {
unique_ptr<RegexAST<RegexNFAByteState>> first_timestamp_regex_ast(
rule->m_regex_ptr->clone()
);
add_rule("firstTimestamp", std::move(first_timestamp_regex_ast));
unique_ptr<RegexASTLiteral<RegexNFAByteState>> r1
= make_unique<RegexASTLiteral<RegexNFAByteState>>(utf8::cCharStartOfFile);
add_rule(
"firstTimestamp",
make_unique<RegexASTCat<RegexNFAByteState>>(
std::move(r1),
std::move(first_timestamp_regex_ast)
)
);
unique_ptr<RegexAST<RegexNFAByteState>> newline_timestamp_regex_ast(
rule->m_regex_ptr->clone()
);
Expand Down Expand Up @@ -143,6 +151,7 @@ void LogParser::add_rules(SchemaAST const* schema_ast) {
auto LogParser::reset() -> void {
m_input_buffer.reset();
m_lexer.reset();
m_lexer.prepend_start_of_file_char(m_input_buffer);
}

// TODO: if the first text is a variable in the no timestamp case you lose the
Expand Down

0 comments on commit e2f94cf

Please sign in to comment.