Skip to content

Commit

Permalink
- Initial timestamp format parser implementation
Browse files Browse the repository at this point in the history
- Current bug with multiple newlines before timestamps
  • Loading branch information
SharafMohamed committed Jun 26, 2023
1 parent e82fef0 commit d78922d
Show file tree
Hide file tree
Showing 16 changed files with 696 additions and 196 deletions.
16 changes: 8 additions & 8 deletions components/core/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -287,8 +287,8 @@ set(SOURCE_FILES_clp
src/string_utils.hpp
src/StringReader.cpp
src/StringReader.hpp
src/TimeFormatFileParser.cpp
src/TimeFormatFileParser.hpp
src/TimestampPatternsFileParser.cpp
src/TimestampPatternsFileParser.hpp
src/TimestampPattern.cpp
src/TimestampPattern.hpp
src/TraceableException.cpp
Expand Down Expand Up @@ -420,8 +420,8 @@ set(SOURCE_FILES_clg
src/string_utils.hpp
src/StringReader.cpp
src/StringReader.hpp
src/TimeFormatFileParser.cpp
src/TimeFormatFileParser.hpp
src/TimestampPatternsFileParser.cpp
src/TimestampPatternsFileParser.hpp
src/TimestampPattern.cpp
src/TimestampPattern.hpp
src/TraceableException.cpp
Expand Down Expand Up @@ -546,8 +546,8 @@ set(SOURCE_FILES_clo
src/StringReader.hpp
src/Thread.cpp
src/Thread.hpp
src/TimeFormatFileParser.cpp
src/TimeFormatFileParser.hpp
src/TimestampPatternsFileParser.cpp
src/TimestampPatternsFileParser.hpp
src/TimestampPattern.cpp
src/TimestampPattern.hpp
src/TraceableException.cpp
Expand Down Expand Up @@ -731,8 +731,8 @@ set(SOURCE_FILES_unitTest
src/string_utils.tpp
src/StringReader.cpp
src/StringReader.hpp
src/TimeFormatFileParser.cpp
src/TimeFormatFileParser.hpp
src/TimestampPatternsFileParser.cpp
src/TimestampPatternsFileParser.hpp
src/TimestampPattern.cpp
src/TimestampPattern.hpp
src/TraceableException.cpp
Expand Down
1 change: 0 additions & 1 deletion components/core/src/TimeFormatFileParser.cpp

This file was deleted.

110 changes: 0 additions & 110 deletions components/core/src/TimeFormatFileParser.hpp

This file was deleted.

93 changes: 36 additions & 57 deletions components/core/src/TimestampPattern.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -81,63 +81,7 @@ static bool convert_string_to_number (const string& str, const size_t begin_ix,
* To initialize m_known_ts_patterns, we first create a vector of patterns then copy it to a dynamic array. This eases
* maintenance of the list and the cost doesn't matter since it is only done once when the program starts.
*/
void TimestampPattern::init () {
// First create vector of observed patterns so that it's easy to maintain
vector<TimestampPattern> patterns;
// E.g. 2015-01-31T15:50:45.392
patterns.emplace_back(0, "%Y-%m-%dT%H:%M:%S.%3");
// E.g. 2015-01-31T15:50:45,392
patterns.emplace_back(0, "%Y-%m-%dT%H:%M:%S,%3");
// E.g. [2015-01-31T15:50:45
patterns.emplace_back(0, "[%Y-%m-%dT%H:%M:%S");
// E.g. [20170106-16:56:41]
patterns.emplace_back(0, "[%Y%m%d-%H:%M:%S]");
// E.g. 2015-01-31 15:50:45,392
patterns.emplace_back(0, "%Y-%m-%d %H:%M:%S,%3");
// E.g. 2015-01-31 15:50:45.392
patterns.emplace_back(0, "%Y-%m-%d %H:%M:%S.%3");
// E.g. [2015-01-31 15:50:45,085]
patterns.emplace_back(0, "[%Y-%m-%d %H:%M:%S,%3]");
// E.g. 2015-01-31 15:50:45
patterns.emplace_back(0, "%Y-%m-%d %H:%M:%S");
// E.g. Start-Date: 2015-01-31 15:50:45
patterns.emplace_back(1, "%Y-%m-%d %H:%M:%S");
// E.g. 2015/01/31 15:50:45
patterns.emplace_back(0, "%Y/%m/%d %H:%M:%S");
// E.g. 15/01/31 15:50:45
patterns.emplace_back(0, "%y/%m/%d %H:%M:%S");
// E.g. 150131 9:50:45
patterns.emplace_back(0, "%y%m%d %k:%M:%S");
// E.g. 01 Jan 2016 15:50:17,085
patterns.emplace_back(0, "%d %b %Y %H:%M:%S,%3");
// E.g. Jan 01, 2016 3:50:17 PM
patterns.emplace_back(0, "%b %d, %Y %l:%M:%S %p");
// E.g. January 31, 2015 15:50
patterns.emplace_back(0, "%B %d, %Y %H:%M");
// E.g. E [31/Jan/2015:15:50:45
patterns.emplace_back(1, "[%d/%b/%Y:%H:%M:%S");
// E.g. localhost - - [01/Jan/2016:15:50:17
// E.g. 192.168.4.5 - - [01/Jan/2016:15:50:17
patterns.emplace_back(3, "[%d/%b/%Y:%H:%M:%S");
// E.g. 192.168.4.5 - - [01/01/2016:15:50:17
patterns.emplace_back(3, "[%d/%m/%Y:%H:%M:%S");
// E.g. INFO [main] 2015-01-31 15:50:45,085
patterns.emplace_back(2, "%Y-%m-%d %H:%M:%S,%3");
// E.g. Started POST "/api/v3/internal/allowed" for 127.0.0.1 at 2017-06-18 00:20:44
patterns.emplace_back(6, "%Y-%m-%d %H:%M:%S");
// E.g. update-alternatives 2015-01-31 15:50:45
patterns.emplace_back(1, "%Y-%m-%d %H:%M:%S");
// E.g. ERROR: apport (pid 4557) Sun Jan 1 15:50:45 2015
patterns.emplace_back(4, "%a %b %e %H:%M:%S %Y");
// E.g. <<<2016-11-10 03:02:29:936
patterns.emplace_back(0, "<<<%Y-%m-%d %H:%M:%S:%3");

// TODO These patterns are imprecise and will prevent searching by timestamp; but for now, it's no worse than not parsing a timestamp
// E.g. Jan 21 11:56:42
patterns.emplace_back(0, "%b %d %H:%M:%S");
// E.g. 01-21 11:56:42.392
patterns.emplace_back(0, "%m-%d %H:%M:%S.%3");

void TimestampPattern::init (vector<TimestampPattern>& patterns) {
// Initialize m_known_ts_patterns with vector's contents
m_known_ts_patterns_len = patterns.size();
m_known_ts_patterns = std::make_unique<TimestampPattern[]>(m_known_ts_patterns_len);
Expand All @@ -160,6 +104,10 @@ const TimestampPattern* TimestampPattern::search_known_ts_patterns (const string
return nullptr;
}

const string& TimestampPattern::get_regex () const {
return m_regex;
}

const string& TimestampPattern::get_format () const {
return m_format;
}
Expand All @@ -175,6 +123,7 @@ bool TimestampPattern::is_empty () const {
void TimestampPattern::clear () {
m_num_spaces_before_ts = 0;
m_format.clear();
m_regex.clear();
}

bool TimestampPattern::parse_timestamp (const string& line, epochtime_t& timestamp, size_t& timestamp_begin_pos, size_t& timestamp_end_pos) const {
Expand Down Expand Up @@ -503,6 +452,32 @@ bool TimestampPattern::parse_timestamp (const string& line, epochtime_t& timesta
break;
}

case 'r': { // Relative timestamp in millisecond
int cFieldLength = 0;
while(line_ix + cFieldLength < line_length) {
if('0' <= line[line_ix + cFieldLength] && line[line_ix + cFieldLength] <=
'9')
{
cFieldLength++;
} else {
break;
}
}
if(cFieldLength == 0) {
return false;
}
int value;
if (!convert_string_to_number(line, line_ix, line_ix + cFieldLength, '0',
value) || value < 0)
{
return false;
}
millisecond = value;
line_ix += cFieldLength;

break;
}

default:
return false;
}
Expand Down Expand Up @@ -698,6 +673,10 @@ void TimestampPattern::insert_formatted_timestamp (const epochtime_t timestamp,
append_padded_value(millisecond, '0', 3, new_msg);
break;

case 'r': // Relative timestamp
new_msg += std::to_string(timestamp);
break;

default: {
throw OperationFailed(ErrorCode_Unsupported, __FILENAME__, __LINE__);
}
Expand Down
15 changes: 13 additions & 2 deletions components/core/src/TimestampPattern.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
#include <cstddef>
#include <cstdint>
#include <memory>
#include <vector>

// Project headers
#include "Defs.h"
Expand Down Expand Up @@ -54,13 +55,16 @@ class TimestampPattern {

// Constructors
TimestampPattern () : m_num_spaces_before_ts(0) {}
TimestampPattern (uint8_t num_spaces_before_ts, const std::string& format) : m_num_spaces_before_ts(num_spaces_before_ts), m_format(format) {}
TimestampPattern (uint8_t num_spaces_before_ts, const std::string& format,
const std::string& regex) :
m_num_spaces_before_ts(num_spaces_before_ts), m_format(format), m_regex(regex) {}

// Methods
/**
* Static initializer for class. This must be called before using the class.
* @param patterns
*/
static void init ();
static void init (std::vector<TimestampPattern>& patterns);

/**
* Searches for a known timestamp pattern which can parse the timestamp from the given line, and if found, parses the timestamp
Expand All @@ -73,6 +77,12 @@ class TimestampPattern {
static const TimestampPattern* search_known_ts_patterns (const std::string& line, epochtime_t& timestamp, size_t& timestamp_begin_pos,
size_t& timestamp_end_pos);

/**
* Gets the timestamp pattern's regex string
* @return See description
*/
const std::string& get_regex () const;

/**
* Gets the timestamp pattern's format string
* @return See description
Expand Down Expand Up @@ -136,6 +146,7 @@ class TimestampPattern {
// ^ ^ ^
uint8_t m_num_spaces_before_ts;
std::string m_format;
std::string m_regex;
};

#endif // TIMESTAMPPATTERN_HPP
Loading

0 comments on commit d78922d

Please sign in to comment.