Skip to content

Commit

Permalink
initial setup
Browse files Browse the repository at this point in the history
  • Loading branch information
SharafMohamed committed Jun 12, 2023
1 parent bebcf98 commit e82fef0
Show file tree
Hide file tree
Showing 4 changed files with 179 additions and 1 deletion.
10 changes: 9 additions & 1 deletion components/core/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ endif()
set(log_surgeon_DIR "/home/sharaf/.local/lib/cmake/log_surgeon/")
find_package(log_surgeon REQUIRED)
if(log_surgeon_FOUND)
message(STATUS "Found spdlog ${log_surgeon_VERSION}")
message(STATUS "Found log_surgeon ${log_surgeon_VERSION}")
else()
message(FATAL_ERROR "Could not find static libraries for log_surgeon")
endif()
Expand Down Expand Up @@ -287,6 +287,8 @@ set(SOURCE_FILES_clp
src/string_utils.hpp
src/StringReader.cpp
src/StringReader.hpp
src/TimeFormatFileParser.cpp
src/TimeFormatFileParser.hpp
src/TimestampPattern.cpp
src/TimestampPattern.hpp
src/TraceableException.cpp
Expand Down Expand Up @@ -418,6 +420,8 @@ set(SOURCE_FILES_clg
src/string_utils.hpp
src/StringReader.cpp
src/StringReader.hpp
src/TimeFormatFileParser.cpp
src/TimeFormatFileParser.hpp
src/TimestampPattern.cpp
src/TimestampPattern.hpp
src/TraceableException.cpp
Expand Down Expand Up @@ -542,6 +546,8 @@ set(SOURCE_FILES_clo
src/StringReader.hpp
src/Thread.cpp
src/Thread.hpp
src/TimeFormatFileParser.cpp
src/TimeFormatFileParser.hpp
src/TimestampPattern.cpp
src/TimestampPattern.hpp
src/TraceableException.cpp
Expand Down Expand Up @@ -725,6 +731,8 @@ set(SOURCE_FILES_unitTest
src/string_utils.tpp
src/StringReader.cpp
src/StringReader.hpp
src/TimeFormatFileParser.cpp
src/TimeFormatFileParser.hpp
src/TimestampPattern.cpp
src/TimestampPattern.hpp
src/TraceableException.cpp
Expand Down
1 change: 1 addition & 0 deletions components/core/src/TimeFormatFileParser.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
#include "TimeFormatFileParser.hpp
110 changes: 110 additions & 0 deletions components/core/src/TimeFormatFileParser.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
#ifndef TIME_FORMAT_FILE_PARSER_HPP
#define TIME_FORMAT_FILE_PARSER_HPP

#include <log_surgeon/LALR1Parser.hpp>

// ASTs used in SchemaParser AST
class SchemaAST : public log_surgeon::ParserAST {
public:
// Constructor
SchemaAST() = default;

/// TODO: shouldn't this add delimiters instead of setting it?
auto set_delimiters(std::unique_ptr<ParserAST> delimiters_in) -> void {
m_delimiters = std::move(delimiters_in);
}

auto add_schema_var(std::unique_ptr<ParserAST> schema_var) -> void {
m_schema_vars.push_back(std::move(schema_var));
}

std::vector<std::unique_ptr<ParserAST>> m_schema_vars;
std::unique_ptr<ParserAST> m_delimiters;
std::string m_file_path;
};

class IdentifierAST : public ParserAST {
public:
// Constructor
explicit IdentifierAST(char character) { m_name.push_back(character); }

auto add_character(char character) -> void { m_name.push_back(character); }

std::string m_name;
};

class SchemaVarAST : public ParserAST {
public:
// Constructor
SchemaVarAST(std::string name,
std::unique_ptr<finite_automata::RegexAST<finite_automata::RegexNFAByteState>>
regex_ptr,
uint32_t line_num)
: m_line_num(line_num),
m_name(std::move(name)),
m_regex_ptr(std::move(regex_ptr)) {}

uint32_t m_line_num;
std::string m_name;
std::unique_ptr<finite_automata::RegexAST<finite_automata::RegexNFAByteState>> m_regex_ptr;
};

class DelimiterStringAST : public ParserAST {
public:
// Constructor
explicit DelimiterStringAST(uint32_t delimiter) { m_delimiters.push_back(delimiter); }

auto add_delimiter(uint32_t delimiter) -> void { m_delimiters.push_back(delimiter); }

std::vector<uint32_t> m_delimiters;
};

class TimeFormatFileParser :
public log_surgeon::LALR1Parser<log_surgeon::finite_automata::RegexNFAByteState,
log_surgeon::finite_automata::RegexDFAByteState> {
public:
// Constructor
TimeFormatFileParser();

/**
* A semantic rule that needs access to soft_reset()
* @param m
* @return std::unique_ptr<SchemaAST>
*/
auto existing_schema_rule(NonTerminal* m) -> std::unique_ptr<SchemaAST>;

/**
* Parse a user defined schema to generate a schema AST used for generating the log lexer
* @param reader
* @return std::unique_ptr<SchemaAST>
*/
auto generate_schema_ast(Reader& reader) -> std::unique_ptr<SchemaAST>;

/**
* Wrapper around generate_schema_ast()
* @param schema_file_path
* @return std::unique_ptr<SchemaAST>
*/
static auto try_schema_file(std::string const& schema_file_path)
-> std::unique_ptr<SchemaAST>;

private:
/**
* After lexing half of the buffer, reads into that half of the buffer and changes variables
* accordingly
* @param next_children_start
*/
auto soft_reset(uint32_t& next_children_start) -> void;

/**
* Add all lexical rules needed for schema lexing
*/
auto add_lexical_rules() -> void;

/**
* Add all productions needed for schema parsing
*/
auto add_productions() -> void;
};

#endif //TIME_FORMAT_FILE_PARSER_HPP
59 changes: 59 additions & 0 deletions components/core/tests/test_time_format_files/default_formats.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
# E.g. 2015-01-31T15:50:45.392
0:%Y-%m-%dT%H:%M:%S.%3
# E.g. 2015-01-31T15:50:45,392
0:%Y-%m-%dT%H:%M:%S,%3
# E.g. [2015-01-31T15:50:45
0:[%Y-%m-%dT%H:%M:%S
# E.g. [20170106-16:56:41]
0:[%Y%m%d-%H:%M:%S]
# E.g. 2015-01-31 15:50:45,392
0:"%Y-%m-%d %H:%M:%S,%3
# E.g. 2015-01-31 15:50:45.392
0:%Y-%m-%d %H:%M:%S.%3
# E.g. [2015-01-31 15:50:45,085]
0:[%Y-%m-%d %H:%M:%S,%3]
# E.g. 2015-01-31 15:50:45
0:%Y-%m-%d %H:%M:%S
# E.g. Start-Date: 2015-01-31 15:50:45
1:%Y-%m-%d %H:%M:%S
# E.g. 2015/01/31 15:50:45
0:%Y/%m/%d %H:%M:%S
# E.g. 15/01/31 15:50:45
0:%y/%m/%d %H:%M:%S
# E.g. 150131 9:50:45
0:%y%m%d %k:%M:%S
# E.g. 01 Jan 2016 15:50:17,085
0:%d %b %Y %H:%M:%S,%3
# E.g. Jan 01, 2016 3:50:17 PM
0:%b %d, %Y %l:%M:%S %p
# E.g. January 31, 2015 15:50
0:%B %d, %Y %H:%M
# E.g. E [31/Jan/2015:15:50:45
1:[%d/%b/%Y:%H:%M:%S
# E.g. localhost - - [01/Jan/2016:15:50:17
# E.g. 192.168.4.5 - - [01/Jan/2016:15:50:17
3:[%d/%b/%Y:%H:%M:%S
# E.g. 192.168.4.5 - - [01/01/2016:15:50:17
3:[%d/%m/%Y:%H:%M:%S
# E.g. INFO [main] 2015-01-31 15:50:45,085
2:%Y-%m-%d %H:%M:%S,%3
# E.g. Started POST "/api/v3/internal/allowed" for 127.0.0.1 at 2017-06-18 00:20:44
6:%Y-%m-%d %H:%M:%S
# E.g. update-alternatives 2015-01-31 15:50:45
1:%Y-%m-%d %H:%M:%S
# E.g. ERROR: apport (pid 4557) Sun Jan 1 15:50:45 2015
4:%a %b %e %H:%M:%S %Y
# E.g. <<<2016-11-10 03:02:29:936
0:<<<%Y-%m-%d %H:%M:%S:%3

# TODO These patterns are imprecise and will prevent searching by timestamp; but for now, it's no worse than not parsing a timestamp
# E.g. Jan 21 11:56:42
0:%b %d %H:%M:%S
# E.g. 01-21 11:56:42.392
0:%m-%d %H:%M:%S.%3
# E.g. 925123679
0:%rn
# E.g. 925123679ns
0:%rnns
# This is not allowed: 0:%rs:%3 E.g. 925:913
# This is not allowed: 0:%rs913 E.g. 925913

0 comments on commit e82fef0

Please sign in to comment.