-
Notifications
You must be signed in to change notification settings - Fork 72
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
bebcf98
commit e82fef0
Showing
4 changed files
with
179 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
#include "TimeFormatFileParser.hpp |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,110 @@ | ||
#ifndef TIME_FORMAT_FILE_PARSER_HPP | ||
#define TIME_FORMAT_FILE_PARSER_HPP | ||
|
||
#include <log_surgeon/LALR1Parser.hpp> | ||
|
||
// ASTs used in SchemaParser AST | ||
class SchemaAST : public log_surgeon::ParserAST { | ||
public: | ||
// Constructor | ||
SchemaAST() = default; | ||
|
||
/// TODO: shouldn't this add delimiters instead of setting it? | ||
auto set_delimiters(std::unique_ptr<ParserAST> delimiters_in) -> void { | ||
m_delimiters = std::move(delimiters_in); | ||
} | ||
|
||
auto add_schema_var(std::unique_ptr<ParserAST> schema_var) -> void { | ||
m_schema_vars.push_back(std::move(schema_var)); | ||
} | ||
|
||
std::vector<std::unique_ptr<ParserAST>> m_schema_vars; | ||
std::unique_ptr<ParserAST> m_delimiters; | ||
std::string m_file_path; | ||
}; | ||
|
||
class IdentifierAST : public ParserAST { | ||
public: | ||
// Constructor | ||
explicit IdentifierAST(char character) { m_name.push_back(character); } | ||
|
||
auto add_character(char character) -> void { m_name.push_back(character); } | ||
|
||
std::string m_name; | ||
}; | ||
|
||
class SchemaVarAST : public ParserAST { | ||
public: | ||
// Constructor | ||
SchemaVarAST(std::string name, | ||
std::unique_ptr<finite_automata::RegexAST<finite_automata::RegexNFAByteState>> | ||
regex_ptr, | ||
uint32_t line_num) | ||
: m_line_num(line_num), | ||
m_name(std::move(name)), | ||
m_regex_ptr(std::move(regex_ptr)) {} | ||
|
||
uint32_t m_line_num; | ||
std::string m_name; | ||
std::unique_ptr<finite_automata::RegexAST<finite_automata::RegexNFAByteState>> m_regex_ptr; | ||
}; | ||
|
||
class DelimiterStringAST : public ParserAST { | ||
public: | ||
// Constructor | ||
explicit DelimiterStringAST(uint32_t delimiter) { m_delimiters.push_back(delimiter); } | ||
|
||
auto add_delimiter(uint32_t delimiter) -> void { m_delimiters.push_back(delimiter); } | ||
|
||
std::vector<uint32_t> m_delimiters; | ||
}; | ||
|
||
class TimeFormatFileParser : | ||
public log_surgeon::LALR1Parser<log_surgeon::finite_automata::RegexNFAByteState, | ||
log_surgeon::finite_automata::RegexDFAByteState> { | ||
public: | ||
// Constructor | ||
TimeFormatFileParser(); | ||
|
||
/** | ||
* A semantic rule that needs access to soft_reset() | ||
* @param m | ||
* @return std::unique_ptr<SchemaAST> | ||
*/ | ||
auto existing_schema_rule(NonTerminal* m) -> std::unique_ptr<SchemaAST>; | ||
|
||
/** | ||
* Parse a user defined schema to generate a schema AST used for generating the log lexer | ||
* @param reader | ||
* @return std::unique_ptr<SchemaAST> | ||
*/ | ||
auto generate_schema_ast(Reader& reader) -> std::unique_ptr<SchemaAST>; | ||
|
||
/** | ||
* Wrapper around generate_schema_ast() | ||
* @param schema_file_path | ||
* @return std::unique_ptr<SchemaAST> | ||
*/ | ||
static auto try_schema_file(std::string const& schema_file_path) | ||
-> std::unique_ptr<SchemaAST>; | ||
|
||
private: | ||
/** | ||
* After lexing half of the buffer, reads into that half of the buffer and changes variables | ||
* accordingly | ||
* @param next_children_start | ||
*/ | ||
auto soft_reset(uint32_t& next_children_start) -> void; | ||
|
||
/** | ||
* Add all lexical rules needed for schema lexing | ||
*/ | ||
auto add_lexical_rules() -> void; | ||
|
||
/** | ||
* Add all productions needed for schema parsing | ||
*/ | ||
auto add_productions() -> void; | ||
}; | ||
|
||
#endif //TIME_FORMAT_FILE_PARSER_HPP |
59 changes: 59 additions & 0 deletions
59
components/core/tests/test_time_format_files/default_formats.txt
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,59 @@ | ||
# E.g. 2015-01-31T15:50:45.392 | ||
0:%Y-%m-%dT%H:%M:%S.%3 | ||
# E.g. 2015-01-31T15:50:45,392 | ||
0:%Y-%m-%dT%H:%M:%S,%3 | ||
# E.g. [2015-01-31T15:50:45 | ||
0:[%Y-%m-%dT%H:%M:%S | ||
# E.g. [20170106-16:56:41] | ||
0:[%Y%m%d-%H:%M:%S] | ||
# E.g. 2015-01-31 15:50:45,392 | ||
0:"%Y-%m-%d %H:%M:%S,%3 | ||
# E.g. 2015-01-31 15:50:45.392 | ||
0:%Y-%m-%d %H:%M:%S.%3 | ||
# E.g. [2015-01-31 15:50:45,085] | ||
0:[%Y-%m-%d %H:%M:%S,%3] | ||
# E.g. 2015-01-31 15:50:45 | ||
0:%Y-%m-%d %H:%M:%S | ||
# E.g. Start-Date: 2015-01-31 15:50:45 | ||
1:%Y-%m-%d %H:%M:%S | ||
# E.g. 2015/01/31 15:50:45 | ||
0:%Y/%m/%d %H:%M:%S | ||
# E.g. 15/01/31 15:50:45 | ||
0:%y/%m/%d %H:%M:%S | ||
# E.g. 150131 9:50:45 | ||
0:%y%m%d %k:%M:%S | ||
# E.g. 01 Jan 2016 15:50:17,085 | ||
0:%d %b %Y %H:%M:%S,%3 | ||
# E.g. Jan 01, 2016 3:50:17 PM | ||
0:%b %d, %Y %l:%M:%S %p | ||
# E.g. January 31, 2015 15:50 | ||
0:%B %d, %Y %H:%M | ||
# E.g. E [31/Jan/2015:15:50:45 | ||
1:[%d/%b/%Y:%H:%M:%S | ||
# E.g. localhost - - [01/Jan/2016:15:50:17 | ||
# E.g. 192.168.4.5 - - [01/Jan/2016:15:50:17 | ||
3:[%d/%b/%Y:%H:%M:%S | ||
# E.g. 192.168.4.5 - - [01/01/2016:15:50:17 | ||
3:[%d/%m/%Y:%H:%M:%S | ||
# E.g. INFO [main] 2015-01-31 15:50:45,085 | ||
2:%Y-%m-%d %H:%M:%S,%3 | ||
# E.g. Started POST "/api/v3/internal/allowed" for 127.0.0.1 at 2017-06-18 00:20:44 | ||
6:%Y-%m-%d %H:%M:%S | ||
# E.g. update-alternatives 2015-01-31 15:50:45 | ||
1:%Y-%m-%d %H:%M:%S | ||
# E.g. ERROR: apport (pid 4557) Sun Jan 1 15:50:45 2015 | ||
4:%a %b %e %H:%M:%S %Y | ||
# E.g. <<<2016-11-10 03:02:29:936 | ||
0:<<<%Y-%m-%d %H:%M:%S:%3 | ||
|
||
# TODO These patterns are imprecise and will prevent searching by timestamp; but for now, it's no worse than not parsing a timestamp | ||
# E.g. Jan 21 11:56:42 | ||
0:%b %d %H:%M:%S | ||
# E.g. 01-21 11:56:42.392 | ||
0:%m-%d %H:%M:%S.%3 | ||
# E.g. 925123679 | ||
0:%rn | ||
# E.g. 925123679ns | ||
0:%rnns | ||
# This is not allowed: 0:%rs:%3 E.g. 925:913 | ||
# This is not allowed: 0:%rs913 E.g. 925913 |