From d78922d9777ada1973140577e3f03b78eb09bb02 Mon Sep 17 00:00:00 2001
From: SharafMohamed <chrismohamed91@gmail.com>
Date: Mon, 26 Jun 2023 16:24:50 -0400
Subject: [PATCH] - Initial timestamp format parser implementation - Current
 bug with multiple newlines before timestamps

---
 components/core/CMakeLists.txt                |  16 +-
 components/core/src/TimeFormatFileParser.cpp  |   1 -
 components/core/src/TimeFormatFileParser.hpp  | 110 ------
 components/core/src/TimestampPattern.cpp      |  93 ++---
 components/core/src/TimestampPattern.hpp      |  15 +-
 .../core/src/TimestampPatternsFileParser.cpp  | 353 ++++++++++++++++++
 .../core/src/TimestampPatternsFileParser.hpp  | 228 +++++++++++
 components/core/src/Utils.cpp                 |   8 +-
 .../core/src/clp/CommandLineArguments.cpp     |  19 +-
 .../core/src/clp/CommandLineArguments.hpp     |   2 +
 components/core/src/clp/FileCompressor.cpp    |   3 +-
 components/core/src/clp/run.cpp               |  19 +-
 .../src/streaming_archive/reader/File.cpp     |   2 +-
 .../src/streaming_archive/writer/Archive.cpp  |   3 +-
 components/core/tests/test_log_files/log.txt  |  10 +
 .../default_formats.txt                       |  10 +-
 16 files changed, 696 insertions(+), 196 deletions(-)
 delete mode 100644 components/core/src/TimeFormatFileParser.cpp
 delete mode 100644 components/core/src/TimeFormatFileParser.hpp
 create mode 100644 components/core/src/TimestampPatternsFileParser.cpp
 create mode 100644 components/core/src/TimestampPatternsFileParser.hpp

diff --git a/components/core/CMakeLists.txt b/components/core/CMakeLists.txt
index 072a1a06d..20550c483 100644
--- a/components/core/CMakeLists.txt
+++ b/components/core/CMakeLists.txt
@@ -287,8 +287,8 @@ set(SOURCE_FILES_clp
         src/string_utils.hpp
         src/StringReader.cpp
         src/StringReader.hpp
-        src/TimeFormatFileParser.cpp
-        src/TimeFormatFileParser.hpp
+        src/TimestampPatternsFileParser.cpp
+        src/TimestampPatternsFileParser.hpp
         src/TimestampPattern.cpp
         src/TimestampPattern.hpp
         src/TraceableException.cpp
@@ -420,8 +420,8 @@ set(SOURCE_FILES_clg
         src/string_utils.hpp
         src/StringReader.cpp
         src/StringReader.hpp
-        src/TimeFormatFileParser.cpp
-        src/TimeFormatFileParser.hpp
+        src/TimestampPatternsFileParser.cpp
+        src/TimestampPatternsFileParser.hpp
         src/TimestampPattern.cpp
         src/TimestampPattern.hpp
         src/TraceableException.cpp
@@ -546,8 +546,8 @@ set(SOURCE_FILES_clo
         src/StringReader.hpp
         src/Thread.cpp
         src/Thread.hpp
-        src/TimeFormatFileParser.cpp
-        src/TimeFormatFileParser.hpp
+        src/TimestampPatternsFileParser.cpp
+        src/TimestampPatternsFileParser.hpp
         src/TimestampPattern.cpp
         src/TimestampPattern.hpp
         src/TraceableException.cpp
@@ -731,8 +731,8 @@ set(SOURCE_FILES_unitTest
         src/string_utils.tpp
         src/StringReader.cpp
         src/StringReader.hpp
-        src/TimeFormatFileParser.cpp
-        src/TimeFormatFileParser.hpp
+        src/TimestampPatternsFileParser.cpp
+        src/TimestampPatternsFileParser.hpp
         src/TimestampPattern.cpp
         src/TimestampPattern.hpp
         src/TraceableException.cpp
diff --git a/components/core/src/TimeFormatFileParser.cpp b/components/core/src/TimeFormatFileParser.cpp
deleted file mode 100644
index d886b7021..000000000
--- a/components/core/src/TimeFormatFileParser.cpp
+++ /dev/null
@@ -1 +0,0 @@
-#include "TimeFormatFileParser.hpp
\ No newline at end of file
diff --git a/components/core/src/TimeFormatFileParser.hpp b/components/core/src/TimeFormatFileParser.hpp
deleted file mode 100644
index d4f09daa4..000000000
--- a/components/core/src/TimeFormatFileParser.hpp
+++ /dev/null
@@ -1,110 +0,0 @@
-#ifndef TIME_FORMAT_FILE_PARSER_HPP
-#define TIME_FORMAT_FILE_PARSER_HPP
-
-#include <log_surgeon/LALR1Parser.hpp>
-
-// ASTs used in SchemaParser AST
-class SchemaAST : public log_surgeon::ParserAST {
-public:
-    // Constructor
-    SchemaAST() = default;
-
-    /// TODO: shouldn't this add delimiters instead of setting it?
-    auto set_delimiters(std::unique_ptr<ParserAST> delimiters_in) -> void {
-        m_delimiters = std::move(delimiters_in);
-    }
-
-    auto add_schema_var(std::unique_ptr<ParserAST> schema_var) -> void {
-        m_schema_vars.push_back(std::move(schema_var));
-    }
-
-    std::vector<std::unique_ptr<ParserAST>> m_schema_vars;
-    std::unique_ptr<ParserAST> m_delimiters;
-    std::string m_file_path;
-};
-
-class IdentifierAST : public ParserAST {
-public:
-    // Constructor
-    explicit IdentifierAST(char character) { m_name.push_back(character); }
-
-    auto add_character(char character) -> void { m_name.push_back(character); }
-
-    std::string m_name;
-};
-
-class SchemaVarAST : public ParserAST {
-public:
-    // Constructor
-    SchemaVarAST(std::string name,
-                 std::unique_ptr<finite_automata::RegexAST<finite_automata::RegexNFAByteState>>
-    regex_ptr,
-    uint32_t line_num)
-    : m_line_num(line_num),
-    m_name(std::move(name)),
-    m_regex_ptr(std::move(regex_ptr)) {}
-
-    uint32_t m_line_num;
-    std::string m_name;
-    std::unique_ptr<finite_automata::RegexAST<finite_automata::RegexNFAByteState>> m_regex_ptr;
-};
-
-class DelimiterStringAST : public ParserAST {
-public:
-    // Constructor
-    explicit DelimiterStringAST(uint32_t delimiter) { m_delimiters.push_back(delimiter); }
-
-    auto add_delimiter(uint32_t delimiter) -> void { m_delimiters.push_back(delimiter); }
-
-    std::vector<uint32_t> m_delimiters;
-};
-
-class TimeFormatFileParser : 
-    public log_surgeon::LALR1Parser<log_surgeon::finite_automata::RegexNFAByteState, 
-                                    log_surgeon::finite_automata::RegexDFAByteState> {
-public:
-    // Constructor
-    TimeFormatFileParser();
-
-    /**
-     * A semantic rule that needs access to soft_reset()
-     * @param m
-     * @return std::unique_ptr<SchemaAST>
-     */
-    auto existing_schema_rule(NonTerminal* m) -> std::unique_ptr<SchemaAST>;
-
-    /**
-     * Parse a user defined schema to generate a schema AST used for generating the log lexer
-     * @param reader
-     * @return std::unique_ptr<SchemaAST>
-     */
-    auto generate_schema_ast(Reader& reader) -> std::unique_ptr<SchemaAST>;
-
-    /**
-     * Wrapper around generate_schema_ast()
-     * @param schema_file_path
-     * @return std::unique_ptr<SchemaAST>
-     */
-    static auto try_schema_file(std::string const& schema_file_path)
-    -> std::unique_ptr<SchemaAST>;
-
-private:
-    /**
-     * After lexing half of the buffer, reads into that half of the buffer and changes variables
-     * accordingly
-     * @param next_children_start
-     */
-    auto soft_reset(uint32_t& next_children_start) -> void;
-
-    /**
-     * Add all lexical rules needed for schema lexing
-     */
-    auto add_lexical_rules() -> void;
-
-    /**
-     * Add all productions needed for schema parsing
-     */
-    auto add_productions() -> void;
-};
-
-#endif //TIME_FORMAT_FILE_PARSER_HPP
diff --git a/components/core/src/TimestampPattern.cpp b/components/core/src/TimestampPattern.cpp
index c07811660..f081e84f6 100644
--- a/components/core/src/TimestampPattern.cpp
+++ b/components/core/src/TimestampPattern.cpp
@@ -81,63 +81,7 @@ static bool convert_string_to_number (const string& str, const size_t begin_ix,
  * To initialize m_known_ts_patterns, we first create a vector of patterns then copy it to a dynamic array. This eases
  * maintenance of the list and the cost doesn't matter since it is only done once when the program starts.
  */
-void TimestampPattern::init () {
-    // First create vector of observed patterns so that it's easy to maintain
-    vector<TimestampPattern> patterns;
-    // E.g. 2015-01-31T15:50:45.392
-    patterns.emplace_back(0, "%Y-%m-%dT%H:%M:%S.%3");
-    // E.g. 2015-01-31T15:50:45,392
-    patterns.emplace_back(0, "%Y-%m-%dT%H:%M:%S,%3");
-    // E.g. [2015-01-31T15:50:45
-    patterns.emplace_back(0, "[%Y-%m-%dT%H:%M:%S");
-    // E.g. [20170106-16:56:41]
-    patterns.emplace_back(0, "[%Y%m%d-%H:%M:%S]");
-    // E.g. 2015-01-31 15:50:45,392
-    patterns.emplace_back(0, "%Y-%m-%d %H:%M:%S,%3");
-    // E.g. 2015-01-31 15:50:45.392
-    patterns.emplace_back(0, "%Y-%m-%d %H:%M:%S.%3");
-    // E.g. [2015-01-31 15:50:45,085]
-    patterns.emplace_back(0, "[%Y-%m-%d %H:%M:%S,%3]");
-    // E.g. 2015-01-31 15:50:45
-    patterns.emplace_back(0, "%Y-%m-%d %H:%M:%S");
-    // E.g. Start-Date: 2015-01-31  15:50:45
-    patterns.emplace_back(1, "%Y-%m-%d  %H:%M:%S");
-    // E.g. 2015/01/31 15:50:45
-    patterns.emplace_back(0, "%Y/%m/%d %H:%M:%S");
-    // E.g. 15/01/31 15:50:45
-    patterns.emplace_back(0, "%y/%m/%d %H:%M:%S");
-    // E.g. 150131  9:50:45
-    patterns.emplace_back(0, "%y%m%d %k:%M:%S");
-    // E.g. 01 Jan 2016 15:50:17,085
-    patterns.emplace_back(0, "%d %b %Y %H:%M:%S,%3");
-    // E.g. Jan 01, 2016 3:50:17 PM
-    patterns.emplace_back(0, "%b %d, %Y %l:%M:%S %p");
-    // E.g. January 31, 2015 15:50
-    patterns.emplace_back(0, "%B %d, %Y %H:%M");
-    // E.g. E [31/Jan/2015:15:50:45
-    patterns.emplace_back(1, "[%d/%b/%Y:%H:%M:%S");
-    // E.g. localhost - - [01/Jan/2016:15:50:17
-    // E.g. 192.168.4.5 - - [01/Jan/2016:15:50:17
-    patterns.emplace_back(3, "[%d/%b/%Y:%H:%M:%S");
-    // E.g. 192.168.4.5 - - [01/01/2016:15:50:17
-    patterns.emplace_back(3, "[%d/%m/%Y:%H:%M:%S");
-    // E.g. INFO [main] 2015-01-31 15:50:45,085
-    patterns.emplace_back(2, "%Y-%m-%d %H:%M:%S,%3");
-    // E.g. Started POST "/api/v3/internal/allowed" for 127.0.0.1 at 2017-06-18 00:20:44
-    patterns.emplace_back(6, "%Y-%m-%d %H:%M:%S");
-    // E.g. update-alternatives 2015-01-31 15:50:45
-    patterns.emplace_back(1, "%Y-%m-%d %H:%M:%S");
-    // E.g. ERROR: apport (pid 4557) Sun Jan  1 15:50:45 2015
-    patterns.emplace_back(4, "%a %b %e %H:%M:%S %Y");
-    // E.g. <<<2016-11-10 03:02:29:936
-    patterns.emplace_back(0, "<<<%Y-%m-%d %H:%M:%S:%3");
-
-    // TODO These patterns are imprecise and will prevent searching by timestamp; but for now, it's no worse than not parsing a timestamp
-    // E.g. Jan 21 11:56:42
-    patterns.emplace_back(0, "%b %d %H:%M:%S");
-    // E.g. 01-21 11:56:42.392
-    patterns.emplace_back(0, "%m-%d %H:%M:%S.%3");
-
+void TimestampPattern::init (vector<TimestampPattern>& patterns) {
     // Initialize m_known_ts_patterns with vector's contents
     m_known_ts_patterns_len = patterns.size();
     m_known_ts_patterns = std::make_unique<TimestampPattern[]>(m_known_ts_patterns_len);
@@ -160,6 +104,10 @@ const TimestampPattern* TimestampPattern::search_known_ts_patterns (const string
     return nullptr;
 }
 
+const string& TimestampPattern::get_regex () const {
+    return m_regex;
+}
+
 const string& TimestampPattern::get_format () const {
     return m_format;
 }
@@ -175,6 +123,7 @@ bool TimestampPattern::is_empty () const {
 void TimestampPattern::clear () {
     m_num_spaces_before_ts = 0;
     m_format.clear();
+    m_regex.clear();
 }
 
 bool TimestampPattern::parse_timestamp (const string& line, epochtime_t& timestamp, size_t& timestamp_begin_pos, size_t& timestamp_end_pos) const {
@@ -503,6 +452,32 @@ bool TimestampPattern::parse_timestamp (const string& line, epochtime_t& timesta
                     break;
                 }
 
+                case 'r': { // Relative timestamp in millisecond
+                    int cFieldLength = 0;
+                    while(line_ix + cFieldLength < line_length) {
+                        if('0' <= line[line_ix + cFieldLength] && line[line_ix + cFieldLength] <= 
+                           '9') 
+                        {
+                            cFieldLength++;
+                        } else {
+                            break;
+                        }
+                    }
+                    if(cFieldLength == 0) {
+                        return false;
+                    }
+                    int value;
+                    if (!convert_string_to_number(line, line_ix, line_ix + cFieldLength, '0', 
+                                                  value) || value < 0) 
+                    {
+                        return false;
+                    }
+                    millisecond = value;
+                    line_ix += cFieldLength;
+
+                    break;
+                }
+                
                 default:
                     return false;
             }
@@ -698,6 +673,10 @@ void TimestampPattern::insert_formatted_timestamp (const epochtime_t timestamp,
                     append_padded_value(millisecond, '0', 3, new_msg);
                     break;
 
+                case 'r': // Relative timestamp
+                    new_msg += std::to_string(timestamp);
+                    break;
+                    
                 default: {
                     throw OperationFailed(ErrorCode_Unsupported, __FILENAME__, __LINE__);
                 }
diff --git a/components/core/src/TimestampPattern.hpp b/components/core/src/TimestampPattern.hpp
index f7653c3f6..b6899eba1 100644
--- a/components/core/src/TimestampPattern.hpp
+++ b/components/core/src/TimestampPattern.hpp
@@ -5,6 +5,7 @@
 #include <cstddef>
 #include <cstdint>
 #include <memory>
+#include <vector>
 
 // Project headers
 #include "Defs.h"
@@ -54,13 +55,16 @@ class TimestampPattern {
 
     // Constructors
     TimestampPattern () : m_num_spaces_before_ts(0) {}
-    TimestampPattern (uint8_t num_spaces_before_ts, const std::string& format) : m_num_spaces_before_ts(num_spaces_before_ts), m_format(format) {}
+    TimestampPattern (uint8_t num_spaces_before_ts, const std::string& format, 
+                      const std::string& regex) : 
+            m_num_spaces_before_ts(num_spaces_before_ts), m_format(format), m_regex(regex) {}
 
     // Methods
     /**
      * Static initializer for class. This must be called before using the class.
+     * @param patterns 
      */
-    static void init ();
+    static void init (std::vector<TimestampPattern>& patterns);
 
     /**
      * Searches for a known timestamp pattern which can parse the timestamp from the given line, and if found, parses the timestamp
@@ -73,6 +77,12 @@ class TimestampPattern {
     static const TimestampPattern* search_known_ts_patterns (const std::string& line, epochtime_t& timestamp, size_t& timestamp_begin_pos,
                                                              size_t& timestamp_end_pos);
 
+    /**
+     * Gets the timestamp pattern's regex string
+     * @return See description
+     */
+    const std::string& get_regex () const;
+    
     /**
      * Gets the timestamp pattern's format string
      * @return See description
@@ -136,6 +146,7 @@ class TimestampPattern {
     //                   ^ ^ ^
     uint8_t m_num_spaces_before_ts;
     std::string m_format;
+    std::string m_regex;
 };
 
 #endif // TIMESTAMPPATTERN_HPP
diff --git a/components/core/src/TimestampPatternsFileParser.cpp b/components/core/src/TimestampPatternsFileParser.cpp
new file mode 100644
index 000000000..8a645ed5e
--- /dev/null
+++ b/components/core/src/TimestampPatternsFileParser.cpp
@@ -0,0 +1,353 @@
+#include "TimestampPatternsFileParser.hpp"
+
+// C++ libraries
+#include <cmath>
+#include <memory>
+#include <stdexcept>
+
+// Log Surgeon
+#include <log_surgeon/Constants.hpp>
+#include <log_surgeon/FileReader.hpp>
+#include <log_surgeon/LALR1Parser.hpp>
+#include <log_surgeon/Lexer.hpp>
+#include <log_surgeon/finite_automata/RegexAST.hpp>
+#include <log_surgeon/utils.hpp>
+
+using FileReader = log_surgeon::FileReader;
+using NonTerminal = log_surgeon::NonTerminal;
+using ParserAST = log_surgeon::ParserAST;
+template <typename T> using ParserValue = log_surgeon::ParserValue<T>;
+using Reader = log_surgeon::Reader;
+using RegexASTByte =
+        log_surgeon::finite_automata::RegexAST<log_surgeon::finite_automata::RegexNFAByteState>;
+using RegexASTGroupByte = log_surgeon::finite_automata::RegexASTGroup<
+        log_surgeon::finite_automata::RegexNFAByteState>;
+using RegexASTCatByte =
+        log_surgeon::finite_automata::RegexASTCat<log_surgeon::finite_automata::RegexNFAByteState>;
+using Token = log_surgeon::Token;
+
+using std::make_unique;
+using std::string;
+using std::unique_ptr;
+
+TimestampPatternsFileParser::TimestampPatternsFileParser() : m_timestamp_patterns(),
+                                                             m_current_timestamp_num_spaces(""),
+                                                             m_current_timestamp_format(""),
+                                                             m_current_timestamp_regex("")
+{
+    add_lexical_rules();
+    add_productions();
+    generate();
+}
+
+auto TimestampPatternsFileParser::generate_timestamp_patterns(Reader& reader) -> void {
+    parse(reader);
+}
+
+auto TimestampPatternsFileParser::try_timestamp_patterns_file(string const& schema_file_path) 
+-> std::vector<TimestampPattern> {
+    FileReader file_reader;
+    log_surgeon::ErrorCode error_code = file_reader.try_open(schema_file_path);
+    if (log_surgeon::ErrorCode::Success != error_code) {
+        if (log_surgeon::ErrorCode::Errno == error_code) {
+            throw std::runtime_error(
+                    strfmt("Failed to read '%s', errno=%d", schema_file_path.c_str(), errno));
+        }
+        int code{static_cast<std::underlying_type_t<log_surgeon::ErrorCode>>(error_code)};
+        throw std::runtime_error(
+                strfmt("Failed to read '%s', error_code=%d", schema_file_path.c_str(), code));
+    }
+    TimestampPatternsFileParser parser;
+    Reader reader{[&](char* buf, size_t count, size_t& read_to) -> log_surgeon::ErrorCode {
+        file_reader.read(buf, count, read_to);
+        if (read_to == 0) {
+            return log_surgeon::ErrorCode::EndOfFile;
+        }
+        return log_surgeon::ErrorCode::Success;
+    }};
+    parser.generate_timestamp_patterns(reader);
+    file_reader.close();
+    return parser.m_timestamp_patterns;
+}
+
+auto TimestampPatternsFileParser::timestamp_pattern_rule(NonTerminal* m) -> unique_ptr<ParserAST> {
+    ///TODO: how should this fail if m_current_timestamp_num_spaces is too big for uint8_t?
+    for(uint8_t i = 0; i < stoi(m_current_timestamp_num_spaces); i++) {
+        m_current_timestamp_regex.insert(0, "[^ ]+ ");
+    }
+    m_timestamp_patterns.emplace_back(stoi(m_current_timestamp_num_spaces), 
+                                      m_current_timestamp_format, 
+                                      m_current_timestamp_regex);
+    m_current_timestamp_num_spaces.clear();
+    m_current_timestamp_format.clear();
+    m_current_timestamp_regex.clear();
+    return nullptr;
+}
+
+auto TimestampPatternsFileParser::existing_num_spaces_rule(NonTerminal* m) -> unique_ptr<ParserAST> {
+    m_current_timestamp_num_spaces += m->token_cast(1)->to_string();
+    return nullptr;
+}
+
+
+auto TimestampPatternsFileParser::new_num_spaces_rule(NonTerminal* m) -> unique_ptr<ParserAST> {
+    m_current_timestamp_num_spaces += m->token_cast(0)->to_string();
+    return nullptr;
+}
+
+auto TimestampPatternsFileParser::percent_r_rule(NonTerminal* /* m */) -> unique_ptr<ParserAST> {
+    m_current_timestamp_format += "%r";
+    m_current_timestamp_regex += "\\d+";
+    return nullptr;
+}
+
+auto TimestampPatternsFileParser::percent_Y_rule(NonTerminal* /* m */) -> unique_ptr<ParserAST> {
+    m_current_timestamp_format += "%Y";
+    m_current_timestamp_regex += "\\d{4}";
+    return nullptr;
+}
+
+auto TimestampPatternsFileParser::percent_y_rule(NonTerminal* /* m */) -> unique_ptr<ParserAST> {
+    m_current_timestamp_format += "%y";
+    m_current_timestamp_regex += "\\d{2}";
+    return nullptr;
+}
+
+auto TimestampPatternsFileParser::percent_m_rule(NonTerminal* /* m */) -> unique_ptr<ParserAST> {
+    m_current_timestamp_format += "%m";
+    m_current_timestamp_regex += "\\d{2}";
+    return nullptr;
+}
+
+auto TimestampPatternsFileParser::percent_b_rule(NonTerminal* /* m */) -> unique_ptr<ParserAST> {
+m_current_timestamp_format += "%b";
+m_current_timestamp_regex += "[A-Za-z]{3}";
+return nullptr;
+}
+
+auto TimestampPatternsFileParser::percent_B_rule(NonTerminal* /* m */) -> unique_ptr<ParserAST> {
+    m_current_timestamp_format += "%B";
+    m_current_timestamp_regex += "[A-Za-z]{3,9}";
+    return nullptr;
+}
+
+auto TimestampPatternsFileParser::percent_d_rule(NonTerminal* /* m */) -> unique_ptr<ParserAST> {
+    m_current_timestamp_format += "%d";
+    m_current_timestamp_regex += "\\d{2}";
+    return nullptr;
+}
+
+auto TimestampPatternsFileParser::percent_e_rule(NonTerminal* /* m */) -> unique_ptr<ParserAST> {
+    m_current_timestamp_format += "%e";
+    m_current_timestamp_regex += "\\d{1,2}";
+    return nullptr;
+}
+
+auto TimestampPatternsFileParser::percent_a_rule(NonTerminal* /* m */) -> unique_ptr<ParserAST> {
+    m_current_timestamp_format += "%a";
+    m_current_timestamp_regex += "[A-Za-z]{3}";
+    return nullptr;
+}
+
+auto TimestampPatternsFileParser::percent_H_rule(NonTerminal* /* m */) -> unique_ptr<ParserAST> {
+    m_current_timestamp_format += "%H";
+    m_current_timestamp_regex += "\\d{2}";
+    return nullptr;
+}
+
+auto TimestampPatternsFileParser::percent_k_rule(NonTerminal* /* m */) -> unique_ptr<ParserAST> {
+    m_current_timestamp_format += "%k";
+    m_current_timestamp_regex += "\\d{1,2}";
+    return nullptr;
+}
+
+auto TimestampPatternsFileParser::percent_l_rule(NonTerminal* /* m */) -> unique_ptr<ParserAST> {
+    m_current_timestamp_format += "%l";
+    m_current_timestamp_regex += "\\d{1,2}";
+    return nullptr;
+}
+
+auto TimestampPatternsFileParser::percent_p_rule(NonTerminal* /* m */) -> unique_ptr<ParserAST> {
+    m_current_timestamp_format += "%p";
+    m_current_timestamp_regex += "[A-Za-z]{2}";
+    return nullptr;
+}
+
+auto TimestampPatternsFileParser::percent_M_rule(NonTerminal* /* m */) -> unique_ptr<ParserAST> {
+    m_current_timestamp_format += "%M";
+    m_current_timestamp_regex += "\\d{2}";
+    return nullptr;
+}
+
+auto TimestampPatternsFileParser::percent_S_rule(NonTerminal* /* m */) -> unique_ptr<ParserAST> {
+    m_current_timestamp_format += "%S";
+    m_current_timestamp_regex += "\\d{2}";
+    return nullptr;
+}
+
+auto TimestampPatternsFileParser::percent_3_rule(NonTerminal* /* m */) -> unique_ptr<ParserAST> {
+    m_current_timestamp_format += "%3";
+    m_current_timestamp_regex += "\\d{3}";
+    return nullptr;
+}
+
+auto TimestampPatternsFileParser::cancel_literal_rule(NonTerminal* /* m */) -> unique_ptr<ParserAST> {
+    m_current_timestamp_format += "%";
+    m_current_timestamp_regex += "%";
+    return nullptr;
+}
+
+auto TimestampPatternsFileParser::literal_rule(NonTerminal* m) -> unique_ptr<ParserAST> {
+    m_current_timestamp_format += m->token_cast(0)->to_string();
+    m_current_timestamp_regex += m->token_cast(0)->to_string();
+    return nullptr;
+}
+
+auto TimestampPatternsFileParser::special_literal_rule(NonTerminal* m) -> unique_ptr<ParserAST> {
+    m_current_timestamp_format += m->token_cast(0)->to_string();
+    m_current_timestamp_regex += "\\" + m->token_cast(0)->to_string();
+    return nullptr;
+}
+
+void TimestampPatternsFileParser::add_lexical_rules() {
+    add_token_group("Digit", make_unique<RegexASTGroupByte>('0', '9'));
+    add_token("Colon", ':');
+    add_token("Percent", '%');
+    add_token("Y", 'Y');
+    add_token("y", 'y');
+    add_token("m", 'm');
+    add_token("b", 'b');
+    add_token("B", 'B');
+    add_token("d", 'd');
+    add_token("e", 'e');
+    add_token("a", 'a');
+    add_token("H", 'H');
+    add_token("k", 'k');
+    add_token("l", 'l');
+    add_token("p", 'p');
+    add_token("M", 'M');
+    add_token("S", 'S');
+    add_token("3", '3');
+    add_token("r", 'r');
+    add_token("NewLine", '\n');
+    add_token("CarriageReturn", '\r');
+    // special characters that must be led by a '\' in regex to be literals 
+    // (refer to productions in SchemaParser using regex_cancel_literal_rule)
+    std::vector<uint32_t> special_characters;
+    special_characters.push_back('(');
+    special_characters.push_back(')');
+    special_characters.push_back('*');
+    special_characters.push_back('+');
+    special_characters.push_back('-');
+    special_characters.push_back('.');
+    special_characters.push_back('[');
+    special_characters.push_back('\\');
+    special_characters.push_back(']');
+    special_characters.push_back('^');
+    special_characters.push_back('{');
+    special_characters.push_back('|');
+    special_characters.push_back('}');
+    unique_ptr<RegexASTGroupByte> special_characters_group
+        = make_unique<RegexASTGroupByte>(special_characters);
+    add_token_group("SpecialCharacters", std::move(special_characters_group));
+    // default constructs to an m_negate group
+    unique_ptr<RegexASTGroupByte> literal_characters = make_unique<RegexASTGroupByte>();
+    literal_characters->add_literal('\r');
+    literal_characters->add_literal('\n');
+    literal_characters->add_literal('%');
+    for(uint32_t i : special_characters) {
+        literal_characters->add_literal(i);
+    }
+    add_token_group("LiteralCharacter", std::move(literal_characters));
+    // everything below is for comments
+    add_token("Hash", '#');
+    // default constructs to an m_negate group
+    unique_ptr<RegexASTGroupByte> comment_characters = make_unique<RegexASTGroupByte>();
+    comment_characters->add_literal('\r');
+    comment_characters->add_literal('\n');
+    add_token_group("CommentCharacter", std::move(comment_characters));
+}
+
+void TimestampPatternsFileParser::add_productions() {
+    add_production("TimestampPatterns", {"Comment"}, nullptr);
+    add_production("TimestampPatterns", {"TimestampPattern"}, nullptr);
+    add_production("TimestampPatterns", {"TimestampPatterns", "PortableNewLine"},nullptr);
+    add_production("TimestampPatterns", {"TimestampPatterns", "PortableNewLine", "Comment"},
+                   nullptr);
+    add_production("TimestampPatterns",
+                   {"TimestampPatterns", "PortableNewLine", "TimestampPattern"}, nullptr);
+    add_production("PortableNewLine", {"CarriageReturn", "NewLine"}, nullptr);
+    add_production("PortableNewLine", {"NewLine"}, nullptr);
+    add_production("Comment", {"Hash", "CommentString"}, nullptr);
+    add_production("CommentString", {"CommentString", "CommentCharacter"}, nullptr);
+    add_production("CommentString", {"CommentCharacter"}, nullptr);
+    add_production("TimestampPattern", {"NumSpaces", "Colon", "TimeFormat"},
+                   std::bind(&TimestampPatternsFileParser::timestamp_pattern_rule, this,
+                             std::placeholders::_1));
+    add_production("TimeFormat", {"TimeFormat", "Literal"}, nullptr);
+    add_production("TimeFormat", {"Literal"}, nullptr);
+    add_production("NumSpaces", {"NumSpaces", "Digit"},
+                   std::bind(&TimestampPatternsFileParser::existing_num_spaces_rule, this,
+                             std::placeholders::_1));
+    add_production("NumSpaces", {"Digit"},
+                   std::bind(&TimestampPatternsFileParser::new_num_spaces_rule, this,
+                             std::placeholders::_1));
+    /// TODO: add relative restrictions into lexer
+    add_production("Literal", {"Percent", "r"},
+                   std::bind(&TimestampPatternsFileParser::percent_r_rule, this,
+                             std::placeholders::_1));
+    add_production("Literal", {"Percent", "Y"},
+                   std::bind(&TimestampPatternsFileParser::percent_Y_rule, this,
+                             std::placeholders::_1));
+    add_production("Literal", {"Percent", "y"},
+                   std::bind(&TimestampPatternsFileParser::percent_y_rule, this,
+                             std::placeholders::_1));
+    add_production("Literal", {"Percent", "m"},
+                   std::bind(&TimestampPatternsFileParser::percent_m_rule, this,
+                             std::placeholders::_1));
+    add_production("Literal", {"Percent", "b"},
+                   std::bind(&TimestampPatternsFileParser::percent_b_rule, this,
+                             std::placeholders::_1));
+    add_production("Literal", {"Percent", "B"},
+                   std::bind(&TimestampPatternsFileParser::percent_B_rule, this,
+                             std::placeholders::_1));
+    add_production("Literal", {"Percent", "d"},
+                   std::bind(&TimestampPatternsFileParser::percent_d_rule, this,
+                             std::placeholders::_1));
+    add_production("Literal", {"Percent", "e"},
+                   std::bind(&TimestampPatternsFileParser::percent_e_rule, this,
+                             std::placeholders::_1));
+    add_production("Literal", {"Percent", "a"},
+                   std::bind(&TimestampPatternsFileParser::percent_a_rule, this,
+                             std::placeholders::_1));
+    add_production("Literal", {"Percent", "H"},
+                   std::bind(&TimestampPatternsFileParser::percent_H_rule, this,
+                             std::placeholders::_1));
+    add_production("Literal", {"Percent", "k"},
+                   std::bind(&TimestampPatternsFileParser::percent_k_rule, this,
+                             std::placeholders::_1));
+    add_production("Literal", {"Percent", "l"},
+                   std::bind(&TimestampPatternsFileParser::percent_l_rule, this,
+                             std::placeholders::_1));
+    add_production("Literal", {"Percent", "p"},
+                   std::bind(&TimestampPatternsFileParser::percent_p_rule, this,
+                             std::placeholders::_1));
+    add_production("Literal", {"Percent", "M"},
+                   std::bind(&TimestampPatternsFileParser::percent_M_rule, this,
+                             std::placeholders::_1));
+    add_production("Literal", {"Percent", "S"},
+                   std::bind(&TimestampPatternsFileParser::percent_S_rule, this,
+                             std::placeholders::_1));
+    add_production("Literal", {"Percent", "3"},
+                   std::bind(&TimestampPatternsFileParser::percent_3_rule, this,
+                             std::placeholders::_1));
+    add_production("Literal", {"Percent", "Percent"},
+                   std::bind(&TimestampPatternsFileParser::cancel_literal_rule, this,
+                             std::placeholders::_1));
+    add_production("Literal", {"LiteralCharacter"},
+                   std::bind(&TimestampPatternsFileParser::literal_rule, this, 
+                             std::placeholders::_1));
+    add_production("Literal", {"SpecialCharacters"},
+                   std::bind(&TimestampPatternsFileParser::special_literal_rule, this,
+                             std::placeholders::_1));
+}
diff --git a/components/core/src/TimestampPatternsFileParser.hpp b/components/core/src/TimestampPatternsFileParser.hpp
new file mode 100644
index 000000000..dfb72a421
--- /dev/null
+++ b/components/core/src/TimestampPatternsFileParser.hpp
@@ -0,0 +1,228 @@
+#ifndef TIMESTAMP_PATTERNS_FILE_PARSER_HPP
+#define TIMESTAMP_PATTERNS_FILE_PARSER_HPP
+
+// Log Surgeon
+#include <log_surgeon/LALR1Parser.hpp>
+#include <log_surgeon/finite_automata/RegexAST.hpp>
+
+// Project headers
+#include "TimestampPattern.hpp"
+
+class TimestampPatternsFileParser : 
+    public log_surgeon::LALR1Parser<log_surgeon::finite_automata::RegexNFAByteState, 
+                                    log_surgeon::finite_automata::RegexDFAByteState> {
+public:
+    // Constructor
+    TimestampPatternsFileParser();
+    
+    /**
+     * Adds current timestamp pattern to m_timestamp_patterns and resets
+     * @param m unused
+     * @return nullptr
+     */
+    auto timestamp_pattern_rule(log_surgeon::NonTerminal* /* m */)
+    -> std::unique_ptr<log_surgeon::ParserAST>;
+    
+    /**
+     * Begins building the digit string for number of spaces in the timestamp
+     * @param m 
+     * @return nullptr
+     */
+    auto new_num_spaces_rule(log_surgeon::NonTerminal* m) ->
+    std::unique_ptr<log_surgeon::ParserAST>;
+
+    /**
+     * Extends existing digit string for number of spaces in the timestamp
+     * @param m 
+     * @return nullptr
+     */
+    auto existing_num_spaces_rule(log_surgeon::NonTerminal* m) ->
+    std::unique_ptr<log_surgeon::ParserAST>;
+
+    /**
+     * If "%r" is lexed adds "%r" to time format string and 1 or more digits to regex string
+     * @param m unused
+     * @return nullptr
+     */
+    auto percent_r_rule(log_surgeon::NonTerminal* /* m */) ->
+    std::unique_ptr<log_surgeon::ParserAST>;
+    
+    /**
+     * If "%Y" is lexed adds "%Y" to time format string and 4 digits to regex string
+     * @param m unused
+     * @return nullptr
+     */
+    auto percent_Y_rule(log_surgeon::NonTerminal* /* m */) ->
+    std::unique_ptr<log_surgeon::ParserAST>;
+    
+    /**
+     * If "%y" is lexed adds "%y" to time format string and 2 digits to regex string
+     * @param m unused
+     * @return nullptr
+     */
+    auto percent_y_rule(log_surgeon::NonTerminal* /* m */) ->
+    std::unique_ptr<log_surgeon::ParserAST>;
+    
+    /**
+     * If "%m" is lexed adds "%m" to time format string and 2 digits to regex string
+     * @param m unused
+     * @return nullptr
+     */
+    auto percent_m_rule(log_surgeon::NonTerminal* /* m */) ->
+    std::unique_ptr<log_surgeon::ParserAST>;
+
+    /**
+     * If "%b" is lexed adds "%b" to time format string and 3 characters to regex string
+     * @param m unused
+     * @return nullptr
+     */
+    auto percent_b_rule(log_surgeon::NonTerminal* /* m */) ->
+    std::unique_ptr<log_surgeon::ParserAST>;
+    
+    /**
+     * If "%B" is lexed adds "%B" to time format string and 3-9 characters to regex string
+     * @param m unused
+     * @return nullptr
+     */
+    auto percent_B_rule(log_surgeon::NonTerminal* /* m */) ->
+    std::unique_ptr<log_surgeon::ParserAST>;
+    
+    /**
+     * If "%d" is lexed adds "%d" to time format string and 2 digits to regex string
+     * @param m unused
+     * @return nullptr
+     */
+    auto percent_d_rule(log_surgeon::NonTerminal* /* m */) ->
+    std::unique_ptr<log_surgeon::ParserAST>;
+    
+    /**
+     * If "%e" is lexed adds "%e" to time format string and 1-2 digits to regex string
+     * @param m unused
+     * @return nullptr
+     */
+    auto percent_e_rule(log_surgeon::NonTerminal* /* m */) ->
+    std::unique_ptr<log_surgeon::ParserAST>;
+    
+    /**
+     * If "%a" is lexed adds "%a" to time format string and 3 characters to regex string
+     * @param m unused
+     * @return nullptr
+     */
+    auto percent_a_rule(log_surgeon::NonTerminal* /* m */) ->
+    std::unique_ptr<log_surgeon::ParserAST>;
+    
+    /**
+     * If "%h" is lexed adds "%h" to time format string and 2 digits to regex string
+     * @param m unused
+     * @return nullptr
+     */
+    auto percent_H_rule(log_surgeon::NonTerminal* /* m */) ->
+    std::unique_ptr<log_surgeon::ParserAST>;
+    
+    /**
+     * If "%k" is lexed adds "%k" to time format string and 1-2 digits to regex string
+     * @param m unused
+     * @return nullptr
+     */
+    auto percent_k_rule(log_surgeon::NonTerminal* /* m */) ->
+    std::unique_ptr<log_surgeon::ParserAST>;
+    
+    /**
+     * If "%l" is lexed adds "%l" to time format string and 1-2 digits to regex string
+     * @param m unused
+     * @return nullptr
+     */
+    auto percent_l_rule(log_surgeon::NonTerminal* /* m */) ->
+    std::unique_ptr<log_surgeon::ParserAST>;
+    
+    /**
+     * If "%p" is lexed adds "%p" to time format string and AM/PM to regex string
+     * @param m unused
+     * @return nullptr
+     */
+    auto percent_p_rule(log_surgeon::NonTerminal* /* m */) ->
+    std::unique_ptr<log_surgeon::ParserAST>;
+    
+    /**
+     * If "%M" is lexed adds "%M" to time format string and 2 digits to regex string
+     * @param m unused
+     * @return nullptr
+     */
+    auto percent_M_rule(log_surgeon::NonTerminal* /* m */) ->
+    std::unique_ptr<log_surgeon::ParserAST>;
+    
+    /**
+     * If "%S" is lexed adds "%S" to time format string and 2 digits to regex string
+     * @param m unused
+     * @return nullptr
+     */
+    auto percent_S_rule(log_surgeon::NonTerminal* /* m */) ->
+    std::unique_ptr<log_surgeon::ParserAST>;
+    
+    /**
+     * If "%3" is lexed adds "%3" to time format string and 3 digits to regex string
+     * @param m unused
+     * @return nullptr
+     */
+    auto percent_3_rule(log_surgeon::NonTerminal* /* m */) ->
+    std::unique_ptr<log_surgeon::ParserAST>;
+    
+    /**
+     * If "%%" is lexed, adds '%" to  to the time format and regex strings
+     * @param m unused
+     * @return nullptr 
+     */
+    auto cancel_literal_rule(log_surgeon::NonTerminal* /* m */) 
+    -> std::unique_ptr<log_surgeon::ParserAST>;
+    
+    /**
+     * Adds a lexed literal to the time format and regex strings
+     * @param m contains lexed character 
+     * @return nullptr 
+     */
+    auto literal_rule(log_surgeon::NonTerminal* m) -> std::unique_ptr<log_surgeon::ParserAST>;
+
+    /**
+     * Adds a lexed special literal to the time format and regex strings 
+     * (e.g. '-' in regex is "\-")
+     * @param m contains lexed character 
+     * @return nullptr 
+     */
+    auto special_literal_rule(log_surgeon::NonTerminal* m) -> std::unique_ptr<log_surgeon::ParserAST>;
+    
+    /**
+     * Parse user defined timestamp patterns file in reader and store them in m_timestamp_patterns
+     * @param reader
+     */
+    auto generate_timestamp_patterns(log_surgeon::Reader& reader) -> void;
+
+    /**
+     * Wrapper around generate_timestamp_patterns_ast()
+     * @param file_path
+     * @return a vector containing the parsed timestamp patterns
+     */
+    static auto try_timestamp_patterns_file(std::string const& file_path)
+    -> std::vector<TimestampPattern>;
+
+private:
+    /**
+     * Add all lexical rules needed for timestamp patterns lexing
+     */
+    auto add_lexical_rules() -> void;
+
+    /**
+     * Add all productions needed for timestamp patterns parsing
+     */
+    auto add_productions() -> void;
+    
+    // contains all timestamp patterns parsed
+    std::vector<TimestampPattern> m_timestamp_patterns;
+    // contains num_spaces of timestamp pattern currently being parsed
+    std::string m_current_timestamp_num_spaces;
+    // contains time format of timestamp pattern currently being parsed
+    std::string m_current_timestamp_format;
+    // contains regex of timestamp pattern currently being parsed
+    std::string m_current_timestamp_regex;
+};
+
+#endif //TIMESTAMP_PATTERNS_FILE_PARSER_HPP
diff --git a/components/core/src/Utils.cpp b/components/core/src/Utils.cpp
index 520a3b64f..26bff70d2 100644
--- a/components/core/src/Utils.cpp
+++ b/components/core/src/Utils.cpp
@@ -243,21 +243,21 @@ void load_lexer_from_file (std::string schema_file_path,
     lexer.m_symbol_id[log_surgeon::cTokenEnd] = (int) log_surgeon::SymbolID::TokenEndID;
     lexer.m_symbol_id[log_surgeon::cTokenUncaughtString] =
             (int) log_surgeon::SymbolID::TokenUncaughtStringID;
-    lexer.m_symbol_id[log_surgeon::cTokenInt] = (int) log_surgeon::SymbolID::TokenIntId;
-    lexer.m_symbol_id[log_surgeon::cTokenFloat] = (int) log_surgeon::SymbolID::TokenFloatId;
     lexer.m_symbol_id[log_surgeon::cTokenFirstTimestamp] = (int) log_surgeon::SymbolID::TokenFirstTimestampId;
     lexer.m_symbol_id[log_surgeon::cTokenNewlineTimestamp] = (int) log_surgeon::SymbolID::TokenNewlineTimestampId;
+    lexer.m_symbol_id[log_surgeon::cTokenInt] = (int) log_surgeon::SymbolID::TokenIntId;
+    lexer.m_symbol_id[log_surgeon::cTokenFloat] = (int) log_surgeon::SymbolID::TokenFloatId;
     lexer.m_symbol_id[log_surgeon::cTokenNewline] = (int) log_surgeon::SymbolID::TokenNewlineId;
 
     lexer.m_id_symbol[(int) log_surgeon::SymbolID::TokenEndID] = log_surgeon::cTokenEnd;
     lexer.m_id_symbol[(int) log_surgeon::SymbolID::TokenUncaughtStringID] =
             log_surgeon::cTokenUncaughtString;
-    lexer.m_id_symbol[(int) log_surgeon::SymbolID::TokenIntId] = log_surgeon::cTokenInt;
-    lexer.m_id_symbol[(int) log_surgeon::SymbolID::TokenFloatId] = log_surgeon::cTokenFloat;
     lexer.m_id_symbol[(int) log_surgeon::SymbolID::TokenFirstTimestampId] =
             log_surgeon::cTokenFirstTimestamp;
     lexer.m_id_symbol[(int) log_surgeon::SymbolID::TokenNewlineTimestampId] =
             log_surgeon::cTokenNewlineTimestamp;
+    lexer.m_id_symbol[(int) log_surgeon::SymbolID::TokenIntId] = log_surgeon::cTokenInt;
+    lexer.m_id_symbol[(int) log_surgeon::SymbolID::TokenFloatId] = log_surgeon::cTokenFloat;
     lexer.m_id_symbol[(int) log_surgeon::SymbolID::TokenNewlineId] = log_surgeon::cTokenNewline;
 
     /// TODO: figure out why this needs to be specially added
diff --git a/components/core/src/clp/CommandLineArguments.cpp b/components/core/src/clp/CommandLineArguments.cpp
index e5c7c415b..5d12b4257 100644
--- a/components/core/src/clp/CommandLineArguments.cpp
+++ b/components/core/src/clp/CommandLineArguments.cpp
@@ -161,6 +161,7 @@ namespace clp {
                 po::options_description extraction_positional_options;
                 extraction_positional_options.add_options()
                         ("archives-dir", po::value<string>(&m_archives_dir))
+                        ("ts-file-path", po::value<string>(&m_ts_patterns_file_path))
                         ("output-dir", po::value<string>(&m_output_dir))
                         ("paths", po::value< vector<string> >(&m_input_paths)->composing())
                         ;
@@ -208,10 +209,12 @@ namespace clp {
                 // Define compression hidden positional options
                 po::options_description compression_positional_options;
                 compression_positional_options.add_options()
+                        ("ts-file-path", po::value<string>(&m_ts_patterns_file_path))
                         ("output-dir", po::value<string>(&m_output_dir))
                         ("input-paths", po::value< vector<string> >(&m_input_paths)->composing())
                         ;
                 po::positional_options_description compression_positional_options_description;
+                compression_positional_options_description.add("ts-file-path", 1);
                 compression_positional_options_description.add("output-dir", 1);
                 compression_positional_options_description.add("input-paths", -1);
 
@@ -257,7 +260,7 @@ namespace clp {
 
                     cerr << "Examples:" << endl;
                     cerr << "  # Compress file1.txt and dir1 into the output dir" << endl;
-                    cerr << "  " << get_program_name() << " c output-dir file1.txt dir1" << endl;
+                    cerr << "  " << get_program_name() << " c ts-file-path output-dir file1.txt dir1" << endl;
                     cerr << endl;
 
                     po::options_description visible_options;
@@ -302,7 +305,19 @@ namespace clp {
                     }
                 }
             }
-
+            // Validate timestamp patterns file  
+            if (m_ts_patterns_file_path.empty()) {
+                throw invalid_argument("Timestamp file (ts-file-path) not specified or empty.");
+            }
+            if (false == boost::filesystem::exists(m_ts_patterns_file_path)) {
+                throw invalid_argument("Specified timestamp file (ts-file-path) '" +
+                m_ts_patterns_file_path + "' does not exist.");
+            }
+            if (false == boost::filesystem::is_regular_file(m_ts_patterns_file_path)) {
+                throw invalid_argument("Specified timestamp file (ts-file-path) '" +
+                m_ts_patterns_file_path + "' is not a regular file.");
+            }
+            
             // Validate an output directory was specified
             if (m_output_dir.empty()) {
                 throw invalid_argument("output-dir not specified or empty.");
diff --git a/components/core/src/clp/CommandLineArguments.hpp b/components/core/src/clp/CommandLineArguments.hpp
index 6d61024ee..09ee3b8d5 100644
--- a/components/core/src/clp/CommandLineArguments.hpp
+++ b/components/core/src/clp/CommandLineArguments.hpp
@@ -33,6 +33,7 @@ namespace clp {
         const std::string& get_path_prefix_to_remove () const { return m_path_prefix_to_remove; }
         const std::string& get_output_dir () const { return m_output_dir; }
         const std::string& get_schema_file_path () const { return m_schema_file_path; }
+        const std::string& get_ts_patterns_file_path () const { return m_ts_patterns_file_path; }
         bool get_use_heuristic () const { return (m_schema_file_path.empty()); }
         bool show_progress () const { return m_show_progress; }
         bool print_archive_stats_progress () const { return m_print_archive_stats_progress; }
@@ -56,6 +57,7 @@ namespace clp {
         std::string m_path_prefix_to_remove;
         std::string m_output_dir;
         std::string m_schema_file_path;
+        std::string m_ts_patterns_file_path;
         bool m_show_progress;
         bool m_print_archive_stats_progress;
         size_t m_target_encoded_file_size;
diff --git a/components/core/src/clp/FileCompressor.cpp b/components/core/src/clp/FileCompressor.cpp
index 45204fbed..170696d45 100644
--- a/components/core/src/clp/FileCompressor.cpp
+++ b/components/core/src/clp/FileCompressor.cpp
@@ -91,7 +91,7 @@ namespace clp {
                                         size_t target_encoded_file_size, const FileToCompress& file_to_compress,
                                         streaming_archive::writer::Archive& archive_writer, bool use_heuristic) {
         std::string file_name = std::filesystem::canonical(file_to_compress.get_path()).string();
-
+        SPDLOG_INFO("Start parsing {}", file_name);
         PROFILER_SPDLOG_INFO("Start parsing {}", file_name)
         Profiler::start_continuous_measurement<Profiler::ContinuousMeasurementIndex::ParseLogFile>();
 
@@ -131,6 +131,7 @@ namespace clp {
         Profiler::stop_continuous_measurement<Profiler::ContinuousMeasurementIndex::ParseLogFile>();
         LOG_CONTINUOUS_MEASUREMENT(Profiler::ContinuousMeasurementIndex::ParseLogFile)
         PROFILER_SPDLOG_INFO("Done parsing {}", file_name)
+        SPDLOG_INFO("Done parsing {}", file_name);
 
         return succeeded;
     }
diff --git a/components/core/src/clp/run.cpp b/components/core/src/clp/run.cpp
index f5912ec3d..bc785f4b3 100644
--- a/components/core/src/clp/run.cpp
+++ b/components/core/src/clp/run.cpp
@@ -12,6 +12,7 @@
 
 // Project headers
 #include "../Profiler.hpp"
+#include "../TimestampPatternsFileParser.hpp"
 #include "../Utils.hpp"
 #include "CommandLineArguments.hpp"
 #include "compression.hpp"
@@ -35,8 +36,7 @@ namespace clp {
             return -1;
         }
         Profiler::init();
-        TimestampPattern::init();
-
+        
         clp::CommandLineArguments command_line_args("clp");
         auto parsing_result = command_line_args.parse_arguments(argc, argv);
         switch (parsing_result) {
@@ -48,7 +48,13 @@ namespace clp {
                 // Continue processing
                 break;
         }
-
+        
+        const std::string& ts_patterns_file_path =
+                command_line_args.get_ts_patterns_file_path();
+        std::vector<TimestampPattern> timestamp_patterns =
+                TimestampPatternsFileParser::try_timestamp_patterns_file(ts_patterns_file_path);
+        TimestampPattern::init(timestamp_patterns);
+        
         vector<string> input_paths = command_line_args.get_input_paths();
 
         Profiler::start_continuous_measurement<Profiler::ContinuousMeasurementIndex::Compression>();
@@ -65,7 +71,12 @@ namespace clp {
             std::unique_ptr<log_surgeon::ReaderParser> reader_parser;
             if (!command_line_args.get_use_heuristic()) {
                 const std::string& schema_file_path = command_line_args.get_schema_file_path();
-                reader_parser = std::make_unique<log_surgeon::ReaderParser>(schema_file_path);
+                log_surgeon::Schema schema(schema_file_path);
+                // TODO: give an error if timestamp is specified in schema file 
+                for(TimestampPattern timestamp_pattern : timestamp_patterns) {
+                    schema.add_variable("timestamp", timestamp_pattern.get_regex(), 0);
+                }
+                reader_parser = std::make_unique<log_surgeon::ReaderParser>(schema);
             }
 
             boost::filesystem::path path_prefix_to_remove(command_line_args.get_path_prefix_to_remove());
diff --git a/components/core/src/streaming_archive/reader/File.cpp b/components/core/src/streaming_archive/reader/File.cpp
index 8b3ac80ca..1a844585b 100644
--- a/components/core/src/streaming_archive/reader/File.cpp
+++ b/components/core/src/streaming_archive/reader/File.cpp
@@ -69,7 +69,7 @@ namespace streaming_archive::reader {
             m_timestamp_patterns.emplace_back(
                     std::piecewise_construct,
                     std::forward_as_tuple(msg_num),
-                    forward_as_tuple(num_spaces_before_ts, timestamp_format));
+                    forward_as_tuple(num_spaces_before_ts, timestamp_format, ""));
         }
 
         m_num_messages = file_metadata_ix.get_num_messages();
diff --git a/components/core/src/streaming_archive/writer/Archive.cpp b/components/core/src/streaming_archive/writer/Archive.cpp
index 955975852..ac71d978e 100644
--- a/components/core/src/streaming_archive/writer/Archive.cpp
+++ b/components/core/src/streaming_archive/writer/Archive.cpp
@@ -292,8 +292,7 @@ namespace streaming_archive::writer {
             size_t start;
             size_t end;
             timestamp_pattern = (TimestampPattern*) TimestampPattern::search_known_ts_patterns(
-                    log_view.get_log_output_buffer()->get_mutable_token(0).to_string(), timestamp, 
-                    start, end);
+                    log_view.get_timestamp()->to_string(), timestamp, start, end);
             if (m_old_ts_pattern != *timestamp_pattern) {
                 change_ts_pattern(timestamp_pattern);
                 m_old_ts_pattern = *timestamp_pattern;
diff --git a/components/core/tests/test_log_files/log.txt b/components/core/tests/test_log_files/log.txt
index f514ec82d..42c4240f4 100644
--- a/components/core/tests/test_log_files/log.txt
+++ b/components/core/tests/test_log_files/log.txt
@@ -1,3 +1,13 @@
+	... 52 more
+2016-05-08 07:34:05.255 Some Static Text Then MyDog123 APet4123\test.txt Then 123 then 123.123
+
+75
+
+test test 90061999ms one day one hour one minute one second rest milliseconds
+999 rest milliseconds
+1999 one second rest milliseconds
+61999 one minute one second rest milliseconds
+3661999 one hour one minute one second rest milliseconds
 2016-05-08 07:34:05.251 MyDog123 APet4123\test.txt
 2016-05-08 07:34:05.252 statictext123
 2016-05-08 07:34:05.253 123
diff --git a/components/core/tests/test_time_format_files/default_formats.txt b/components/core/tests/test_time_format_files/default_formats.txt
index 09245ab8b..05afab9fe 100644
--- a/components/core/tests/test_time_format_files/default_formats.txt
+++ b/components/core/tests/test_time_format_files/default_formats.txt
@@ -51,9 +51,11 @@
 0:%b %d %H:%M:%S
 # E.g. 01-21 11:56:42.392
 0:%m-%d %H:%M:%S.%3
-# E.g. 925123679
-0:%rn
-# E.g. 925123679ns
-0:%rnns
+# E.g. 3661999
+0:%r
+# E.g. 3661999ms
+0:%rms
+# E.g. INFO [main] 3661999ms
+2:%rms
 # This is not allowed: 0:%rs:%3 E.g. 925:913
 # This is not allowed: 0:%rs913 E.g. 925913