From 6571d8d067a5432fb0a6cfc5e85d3e1e3e9fbc1f Mon Sep 17 00:00:00 2001
From: Sharaf Mohamed <chrismohamed91@gmail.com>
Date: Wed, 6 Dec 2023 03:53:42 -0500
Subject: [PATCH] Replace compressor_frontend with log-surgeon submodule.
 (#131)

---
 .gitmodules                                   |   3 +
 components/core/.clang-format                 |   2 +-
 components/core/CMakeLists.txt                |  99 +--
 components/core/README-Schema.md              |   4 +-
 components/core/README.md                     |   2 +-
 components/core/cmake/utils.cmake             |   5 +
 components/core/config/schemas.txt            |   4 +-
 components/core/src/Grep.cpp                  | 131 +++-
 components/core/src/Grep.hpp                  |  33 +-
 components/core/src/LogSurgeonReader.cpp      |  12 +
 components/core/src/LogSurgeonReader.hpp      |  19 +
 components/core/src/Utils.cpp                 | 146 ++++
 components/core/src/Utils.hpp                 |  14 +
 components/core/src/clg/clg.cpp               |  42 +-
 components/core/src/clo/clo.cpp               |   8 +-
 components/core/src/clp/FileCompressor.cpp    |  69 +-
 components/core/src/clp/FileCompressor.hpp    |  27 +-
 components/core/src/clp/compression.cpp       |  14 +-
 components/core/src/clp/compression.hpp       |  19 +-
 components/core/src/clp/run.cpp               |  20 +-
 .../src/compressor_frontend/Constants.hpp     |  42 --
 .../src/compressor_frontend/LALR1Parser.cpp   |  14 -
 .../src/compressor_frontend/LALR1Parser.hpp   | 421 -----------
 .../src/compressor_frontend/LALR1Parser.inc   | 689 ------------------
 .../core/src/compressor_frontend/Lexer.hpp    | 199 -----
 .../core/src/compressor_frontend/Lexer.inc    | 541 --------------
 .../src/compressor_frontend/LogParser.cpp     | 218 ------
 .../src/compressor_frontend/LogParser.hpp     |  70 --
 .../src/compressor_frontend/SchemaParser.cpp  | 463 ------------
 .../src/compressor_frontend/SchemaParser.hpp  | 118 ---
 .../core/src/compressor_frontend/Token.cpp    |  31 -
 .../core/src/compressor_frontend/Token.hpp    |  52 --
 .../finite_automata/RegexAST.hpp              | 449 ------------
 .../finite_automata/RegexAST.inc              | 262 -------
 .../finite_automata/RegexDFA.hpp              |  86 ---
 .../finite_automata/RegexDFA.inc              |  41 --
 .../finite_automata/RegexNFA.hpp              | 140 ----
 .../finite_automata/RegexNFA.inc              | 188 -----
 .../finite_automata/UnicodeIntervalTree.hpp   | 186 -----
 .../finite_automata/UnicodeIntervalTree.inc   | 231 ------
 .../core/src/compressor_frontend/utils.cpp    | 120 ---
 .../core/src/compressor_frontend/utils.hpp    |  21 -
 .../src/streaming_archive/writer/Archive.cpp  |  80 +-
 .../src/streaming_archive/writer/Archive.hpp  |  19 +-
 components/core/submodules/log-surgeon        |   1 +
 components/core/tests/test-Grep.cpp           |  23 +-
 .../core/tests/test-ParserWithUserSchema.cpp  | 154 ++--
 components/core/tests/test_log_files/log.txt  |   5 +-
 .../colon_missing_schema.txt                  |   2 +-
 .../tests/test_schema_files/real_schema.txt   |   2 +-
 .../schema_with_delimiter_in_regex_error.txt  |   2 +-
 .../schema_with_delimiters.txt                |   2 +-
 ...schema_with_multicharacter_token_error.txt |   2 +-
 .../schema_without_delimiters.txt             |   2 +-
 .../tests/test_schema_files/search_schema.txt |   2 +-
 .../clp-env-base-centos7.4/Dockerfile         |   4 +-
 .../clp-env-base-ubuntu-focal/Dockerfile      |   6 +
 .../scripts/lib_install/centos7.4/README.md   |   4 +-
 .../centos7.4/install-packages-from-source.sh |   4 +-
 .../centos7.4/install-prebuilt-packages.sh    |   2 +-
 .../ubuntu-focal/install-prebuilt-packages.sh |   2 +
 .../src/etc/clp-schema.template.txt           |   2 +-
 62 files changed, 658 insertions(+), 4917 deletions(-)
 create mode 100644 components/core/src/LogSurgeonReader.cpp
 create mode 100644 components/core/src/LogSurgeonReader.hpp
 delete mode 100644 components/core/src/compressor_frontend/Constants.hpp
 delete mode 100644 components/core/src/compressor_frontend/LALR1Parser.cpp
 delete mode 100644 components/core/src/compressor_frontend/LALR1Parser.hpp
 delete mode 100644 components/core/src/compressor_frontend/LALR1Parser.inc
 delete mode 100644 components/core/src/compressor_frontend/Lexer.hpp
 delete mode 100644 components/core/src/compressor_frontend/Lexer.inc
 delete mode 100644 components/core/src/compressor_frontend/LogParser.cpp
 delete mode 100644 components/core/src/compressor_frontend/LogParser.hpp
 delete mode 100644 components/core/src/compressor_frontend/SchemaParser.cpp
 delete mode 100644 components/core/src/compressor_frontend/SchemaParser.hpp
 delete mode 100644 components/core/src/compressor_frontend/Token.cpp
 delete mode 100644 components/core/src/compressor_frontend/Token.hpp
 delete mode 100644 components/core/src/compressor_frontend/finite_automata/RegexAST.hpp
 delete mode 100644 components/core/src/compressor_frontend/finite_automata/RegexAST.inc
 delete mode 100644 components/core/src/compressor_frontend/finite_automata/RegexDFA.hpp
 delete mode 100644 components/core/src/compressor_frontend/finite_automata/RegexDFA.inc
 delete mode 100644 components/core/src/compressor_frontend/finite_automata/RegexNFA.hpp
 delete mode 100644 components/core/src/compressor_frontend/finite_automata/RegexNFA.inc
 delete mode 100644 components/core/src/compressor_frontend/finite_automata/UnicodeIntervalTree.hpp
 delete mode 100644 components/core/src/compressor_frontend/finite_automata/UnicodeIntervalTree.inc
 delete mode 100644 components/core/src/compressor_frontend/utils.cpp
 delete mode 100644 components/core/src/compressor_frontend/utils.hpp
 create mode 160000 components/core/submodules/log-surgeon

diff --git a/.gitmodules b/.gitmodules
index ba28584e2..4b3b13551 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -11,6 +11,9 @@
 [submodule "components/core/submodules/yaml-cpp"]
 	path = components/core/submodules/yaml-cpp
 	url = https://github.com/jbeder/yaml-cpp.git
+[submodule "components/core/submodules/log-surgeon"]
+	path = components/core/submodules/log-surgeon
+	url = https://github.com/y-scope/log-surgeon.git
 [submodule "components/core/submodules/boost-outcome"]
 	path = components/core/submodules/boost-outcome
 	url = https://github.com/boostorg/outcome.git
diff --git a/components/core/.clang-format b/components/core/.clang-format
index 42f194fdb..ce26532e7 100644
--- a/components/core/.clang-format
+++ b/components/core/.clang-format
@@ -68,7 +68,7 @@ IncludeBlocks: Regroup
 IncludeCategories:
   # NOTE: A header is grouped by first matching regex
   # Third-party headers. Update when adding new third-party libraries.
-  - Regex: '^<(archive|boost|catch2|date|fmt|json|mariadb|spdlog|sqlite3|yaml-cpp|zstd)'
+  - Regex: '^<(archive|boost|catch2|date|fmt|json|log_surgeon|mariadb|spdlog|sqlite3|yaml-cpp|zstd)'
     Priority: 3
   # C system headers
   - Regex: '^<.+.h>'
diff --git a/components/core/CMakeLists.txt b/components/core/CMakeLists.txt
index 503d8f122..f44cc9ad6 100644
--- a/components/core/CMakeLists.txt
+++ b/components/core/CMakeLists.txt
@@ -128,6 +128,9 @@ else()
     message(FATAL_ERROR "Could not find ${CLP_LIBS_STRING} libraries for LibArchive")
 endif()
 
+# Add log surgeon
+add_subdirectory(submodules/log-surgeon EXCLUDE_FROM_ALL)
+
 # Find and setup MariaDBClient library
 if(CLP_USE_STATIC_LIBS)
     # NOTE: We can't statically link to MariaDBClient since it's GPL
@@ -193,28 +196,6 @@ set(SOURCE_FILES_clp
         src/clp/StructuredFileToCompress.hpp
         src/clp/utils.cpp
         src/clp/utils.hpp
-        src/compressor_frontend/Constants.hpp
-        src/compressor_frontend/finite_automata/RegexAST.hpp
-        src/compressor_frontend/finite_automata/RegexAST.inc
-        src/compressor_frontend/finite_automata/RegexDFA.hpp
-        src/compressor_frontend/finite_automata/RegexDFA.inc
-        src/compressor_frontend/finite_automata/RegexNFA.hpp
-        src/compressor_frontend/finite_automata/RegexNFA.inc
-        src/compressor_frontend/finite_automata/UnicodeIntervalTree.hpp
-        src/compressor_frontend/finite_automata/UnicodeIntervalTree.inc
-        src/compressor_frontend/LALR1Parser.cpp
-        src/compressor_frontend/LALR1Parser.hpp
-        src/compressor_frontend/LALR1Parser.inc
-        src/compressor_frontend/Lexer.hpp
-        src/compressor_frontend/Lexer.inc
-        src/compressor_frontend/LogParser.cpp
-        src/compressor_frontend/LogParser.hpp
-        src/compressor_frontend/SchemaParser.cpp
-        src/compressor_frontend/SchemaParser.hpp
-        src/compressor_frontend/Token.cpp
-        src/compressor_frontend/Token.hpp
-        src/compressor_frontend/utils.cpp
-        src/compressor_frontend/utils.hpp
         src/database_utils.cpp
         src/database_utils.hpp
         src/Defs.h
@@ -262,6 +243,8 @@ set(SOURCE_FILES_clp
         src/LibarchiveFileReader.hpp
         src/LibarchiveReader.cpp
         src/LibarchiveReader.hpp
+        src/LogSurgeonReader.cpp
+        src/LogSurgeonReader.hpp
         src/LogTypeDictionaryEntry.cpp
         src/LogTypeDictionaryEntry.hpp
         src/LogTypeDictionaryReader.cpp
@@ -360,6 +343,7 @@ target_link_libraries(clp
         PRIVATE
         Boost::filesystem Boost::iostreams Boost::program_options
         fmt::fmt
+        log_surgeon::log_surgeon
         spdlog::spdlog
         ${sqlite_LIBRARY_DEPENDENCIES}
         LibArchive::LibArchive
@@ -384,26 +368,6 @@ set(SOURCE_FILES_clg
         src/clg/clg.cpp
         src/clg/CommandLineArguments.cpp
         src/clg/CommandLineArguments.hpp
-        src/compressor_frontend/Constants.hpp
-        src/compressor_frontend/finite_automata/RegexAST.hpp
-        src/compressor_frontend/finite_automata/RegexAST.inc
-        src/compressor_frontend/finite_automata/RegexDFA.hpp
-        src/compressor_frontend/finite_automata/RegexDFA.inc
-        src/compressor_frontend/finite_automata/RegexNFA.hpp
-        src/compressor_frontend/finite_automata/RegexNFA.inc
-        src/compressor_frontend/finite_automata/UnicodeIntervalTree.hpp
-        src/compressor_frontend/finite_automata/UnicodeIntervalTree.inc
-        src/compressor_frontend/LALR1Parser.cpp
-        src/compressor_frontend/LALR1Parser.hpp
-        src/compressor_frontend/LALR1Parser.inc
-        src/compressor_frontend/Lexer.hpp
-        src/compressor_frontend/Lexer.inc
-        src/compressor_frontend/SchemaParser.cpp
-        src/compressor_frontend/SchemaParser.hpp
-        src/compressor_frontend/Token.cpp
-        src/compressor_frontend/Token.hpp
-        src/compressor_frontend/utils.cpp
-        src/compressor_frontend/utils.hpp
         src/database_utils.cpp
         src/database_utils.hpp
         src/Defs.h
@@ -439,6 +403,8 @@ set(SOURCE_FILES_clg
         src/ir/LogEvent.hpp
         src/ir/parsing.cpp
         src/ir/parsing.hpp
+        src/LogSurgeonReader.cpp
+        src/LogSurgeonReader.hpp
         src/ir/parsing.inc
         src/LogTypeDictionaryEntry.cpp
         src/LogTypeDictionaryEntry.hpp
@@ -532,6 +498,7 @@ target_link_libraries(clg
         Boost::filesystem Boost::iostreams Boost::program_options
         fmt::fmt
         KQL
+        log_surgeon::log_surgeon
         MariaDBClient::MariaDBClient
         spdlog::spdlog
         ${sqlite_LIBRARY_DEPENDENCIES}
@@ -551,26 +518,6 @@ set(SOURCE_FILES_clo
         src/clo/CommandLineArguments.hpp
         src/clo/ControllerMonitoringThread.cpp
         src/clo/ControllerMonitoringThread.hpp
-        src/compressor_frontend/Constants.hpp
-        src/compressor_frontend/finite_automata/RegexAST.hpp
-        src/compressor_frontend/finite_automata/RegexAST.inc
-        src/compressor_frontend/finite_automata/RegexDFA.hpp
-        src/compressor_frontend/finite_automata/RegexDFA.inc
-        src/compressor_frontend/finite_automata/RegexNFA.hpp
-        src/compressor_frontend/finite_automata/RegexNFA.inc
-        src/compressor_frontend/finite_automata/UnicodeIntervalTree.hpp
-        src/compressor_frontend/finite_automata/UnicodeIntervalTree.inc
-        src/compressor_frontend/LALR1Parser.cpp
-        src/compressor_frontend/LALR1Parser.hpp
-        src/compressor_frontend/LALR1Parser.inc
-        src/compressor_frontend/Lexer.hpp
-        src/compressor_frontend/Lexer.inc
-        src/compressor_frontend/SchemaParser.cpp
-        src/compressor_frontend/SchemaParser.hpp
-        src/compressor_frontend/Token.cpp
-        src/compressor_frontend/Token.hpp
-        src/compressor_frontend/utils.cpp
-        src/compressor_frontend/utils.hpp
         src/database_utils.cpp
         src/database_utils.hpp
         src/Defs.h
@@ -598,6 +545,8 @@ set(SOURCE_FILES_clo
         src/ir/LogEvent.hpp
         src/ir/parsing.cpp
         src/ir/parsing.hpp
+        src/LogSurgeonReader.cpp
+        src/LogSurgeonReader.hpp
         src/ir/parsing.inc
         src/LogTypeDictionaryEntry.cpp
         src/LogTypeDictionaryEntry.hpp
@@ -690,6 +639,7 @@ target_link_libraries(clo
         PRIVATE
         Boost::filesystem Boost::iostreams Boost::program_options
         fmt::fmt
+        log_surgeon::log_surgeon
         msgpack-cxx
         spdlog::spdlog
         ${sqlite_LIBRARY_DEPENDENCIES}
@@ -723,28 +673,6 @@ set(SOURCE_FILES_unitTest
         src/clp/StructuredFileToCompress.hpp
         src/clp/utils.cpp
         src/clp/utils.hpp
-        src/compressor_frontend/Constants.hpp
-        src/compressor_frontend/finite_automata/RegexAST.hpp
-        src/compressor_frontend/finite_automata/RegexAST.inc
-        src/compressor_frontend/finite_automata/RegexDFA.hpp
-        src/compressor_frontend/finite_automata/RegexDFA.inc
-        src/compressor_frontend/finite_automata/RegexNFA.hpp
-        src/compressor_frontend/finite_automata/RegexNFA.inc
-        src/compressor_frontend/finite_automata/UnicodeIntervalTree.hpp
-        src/compressor_frontend/finite_automata/UnicodeIntervalTree.inc
-        src/compressor_frontend/LALR1Parser.cpp
-        src/compressor_frontend/LALR1Parser.hpp
-        src/compressor_frontend/LALR1Parser.inc
-        src/compressor_frontend/Lexer.hpp
-        src/compressor_frontend/Lexer.inc
-        src/compressor_frontend/LogParser.cpp
-        src/compressor_frontend/LogParser.hpp
-        src/compressor_frontend/SchemaParser.cpp
-        src/compressor_frontend/SchemaParser.hpp
-        src/compressor_frontend/Token.cpp
-        src/compressor_frontend/Token.hpp
-        src/compressor_frontend/utils.cpp
-        src/compressor_frontend/utils.hpp
         src/database_utils.cpp
         src/database_utils.hpp
         src/Defs.h
@@ -809,6 +737,8 @@ set(SOURCE_FILES_unitTest
         src/LibarchiveFileReader.hpp
         src/LibarchiveReader.cpp
         src/LibarchiveReader.hpp
+        src/LogSurgeonReader.cpp
+        src/LogSurgeonReader.hpp
         src/LogTypeDictionaryEntry.cpp
         src/LogTypeDictionaryEntry.hpp
         src/LogTypeDictionaryReader.cpp
@@ -925,6 +855,7 @@ target_link_libraries(unitTest
         PRIVATE
         Boost::filesystem Boost::iostreams Boost::program_options
         fmt::fmt
+        log_surgeon::log_surgeon
         LibArchive::LibArchive
         MariaDBClient::MariaDBClient
         spdlog::spdlog
diff --git a/components/core/README-Schema.md b/components/core/README-Schema.md
index ac59ca2ab..6644abd66 100644
--- a/components/core/README-Schema.md
+++ b/components/core/README-Schema.md
@@ -17,7 +17,7 @@ delimiters: \t\r\n:,!;%
 timestamp:\d{4}\-\d{2}\-\d{2} \d{2}:\d{2}:\d{2}(\.\d{3}){0,1}
 timestamp:\[\d{8}\-\d{2}:\d{2}:\d{2}\]
 int:\-{0,1}[0-9]+
-double:\-{0,1}[0-9]+\.[0-9]+
+float:\-{0,1}[0-9]+\.[0-9]+
 
 // Custom variables
 hex:[a-fA-F]+
@@ -49,7 +49,7 @@ equals:.*=.*[a-zA-Z0-9].*
   start of the file then a newline is used to indicate the beginning of a new
   log message. Timestamp patterns are not matched midline and are not stored as
   dictionary variables as they may contain delimiters.
-* `int` and `double` are keywords. These are encoded specially for compression
+* `int` and `float` are keywords. These are encoded specially for compression
   performance.
 
 ## Supported Regex
diff --git a/components/core/README.md b/components/core/README.md
index 5e0221d1b..6820d311c 100644
--- a/components/core/README.md
+++ b/components/core/README.md
@@ -22,7 +22,7 @@ CLP core is the low-level component that performs compression, decompression, an
 * We have built and tested CLP on the OSes listed 
   [below](https://github.com/y-scope/clp/tree/main/components/core#native-environment).
   * If you have trouble building for another OS, file an issue, and we may be able to help.
-* A compiler that supports C++17 (e.g., gcc-8)
+* A compiler that supports C++17 and std::span (e.g., gcc-10)
 
 ## Building
 
diff --git a/components/core/cmake/utils.cmake b/components/core/cmake/utils.cmake
index f2bb940ce..d6aefa160 100644
--- a/components/core/cmake/utils.cmake
+++ b/components/core/cmake/utils.cmake
@@ -41,9 +41,14 @@ set(SOURCE_FILES_make-dictionaries-readable
         ${CMAKE_CURRENT_SOURCE_DIR}/submodules/date/include/date/date.h
         )
 add_executable(make-dictionaries-readable ${SOURCE_FILES_make-dictionaries-readable})
+target_include_directories(make-dictionaries-readable
+                           PRIVATE
+                           ${CMAKE_SOURCE_DIR}/submodules
+                           )
 target_link_libraries(make-dictionaries-readable
         PRIVATE
         Boost::filesystem Boost::iostreams Boost::program_options
+        log_surgeon::log_surgeon
         spdlog::spdlog
         ZStd::ZStd
         )
diff --git a/components/core/config/schemas.txt b/components/core/config/schemas.txt
index 2965a3d8f..e0b777859 100644
--- a/components/core/config/schemas.txt
+++ b/components/core/config/schemas.txt
@@ -9,9 +9,9 @@ timestamp:\d{4}\-\d{2}\-\d{2} \d{2}:\d{2}:\d{2}(\.\d{3}){0,1}
 // E.g. [20150131-15:50:45]
 timestamp:\[\d{8}\-\d{2}:\d{2}:\d{2}\]
 
-// Specially-encoded variables (using the `int` and `double` keywords)
+// Specially-encoded variables (using the `int` and `float` keywords)
 int:\-{0,1}[0-9]+
-double:\-{0,1}[0-9]+\.[0-9]+
+float:\-{0,1}[0-9]+\.[0-9]+
 
 // Dictionary variables
 hex:[a-fA-F]+
diff --git a/components/core/src/Grep.cpp b/components/core/src/Grep.cpp
index e533a4eea..fbcc151e6 100644
--- a/components/core/src/Grep.cpp
+++ b/components/core/src/Grep.cpp
@@ -3,10 +3,13 @@
 // C++ libraries
 #include <algorithm>
 
+// Log surgeon
+#include <log_surgeon/Constants.hpp>
+
 // Project headers
-#include "compressor_frontend/Constants.hpp"
 #include "EncodedVariableInterpreter.hpp"
 #include "ir/parsing.hpp"
+#include "LogSurgeonReader.hpp"
 #include "StringReader.hpp"
 #include "Utils.hpp"
 
@@ -233,6 +236,16 @@ bool QueryToken::change_to_next_possible_type () {
     }
 }
 
+/**
+ * Wraps the tokens returned from the log_surgeon lexer, and stores the variable
+ * ids of the tokens in a search query in a set. This allows for optimized
+ * search performance.
+ */
+class SearchToken : public log_surgeon::Token {
+public:
+    std::set<int> m_type_ids_set;
+};
+
 // Local prototypes
 /**
  * Process a QueryToken that is definitely a variable
@@ -419,10 +432,17 @@ SubQueryMatchabilityResult generate_logtypes_and_vars_for_subquery (const Archiv
     return SubQueryMatchabilityResult::MayMatch;
 }
 
-bool Grep::process_raw_query (const Archive& archive, const string& search_string, epochtime_t search_begin_ts, epochtime_t search_end_ts, bool ignore_case,
-                              Query& query, compressor_frontend::lexers::ByteLexer& forward_lexer, compressor_frontend::lexers::ByteLexer& reverse_lexer,
-                              bool use_heuristic)
-{
+bool Grep::process_raw_query(
+        Archive const& archive,
+        string const& search_string,
+        epochtime_t search_begin_ts,
+        epochtime_t search_end_ts,
+        bool ignore_case,
+        Query& query,
+        log_surgeon::lexers::ByteLexer& forward_lexer,
+        log_surgeon::lexers::ByteLexer& reverse_lexer,
+        bool use_heuristic
+) {
     // Set properties which require no processing
     query.set_search_begin_timestamp(search_begin_ts);
     query.set_search_end_timestamp(search_end_ts);
@@ -437,22 +457,24 @@ bool Grep::process_raw_query (const Archive& archive, const string& search_strin
     processed_search_string = clean_up_wildcard_search_string(processed_search_string);
     query.set_search_string(processed_search_string);
 
-    // Replace non-greedy wildcards with greedy wildcards since we currently have no support for searching compressed files with non-greedy wildcards
-    std::replace(processed_search_string.begin(), processed_search_string.end(), '?', '*');
-    // Clean-up in case any instances of "?*" or "*?" were changed into "**"
-    processed_search_string = clean_up_wildcard_search_string(processed_search_string);
-
     // Split search_string into tokens with wildcards
     vector<QueryToken> query_tokens;
     size_t begin_pos = 0;
     size_t end_pos = 0;
     bool is_var;
     if (use_heuristic) {
+        // Replace non-greedy wildcards with greedy wildcards since we currently
+        // have no support for searching compressed files with non-greedy
+        // wildcards
+        std::replace(processed_search_string.begin(), processed_search_string.end(), '?', '*');
+        // Clean-up in case any instances of "?*" or "*?" were changed into "**"
+        processed_search_string = clean_up_wildcard_search_string(processed_search_string);
         while (get_bounds_of_next_potential_var(processed_search_string, begin_pos, end_pos, is_var)) {
             query_tokens.emplace_back(processed_search_string, begin_pos, end_pos, is_var);
         }
     } else {
-        while (get_bounds_of_next_potential_var(processed_search_string, begin_pos, end_pos, is_var, forward_lexer, reverse_lexer)) {
+        while (get_bounds_of_next_potential_var(processed_search_string, begin_pos, end_pos, is_var,
+                                                forward_lexer, reverse_lexer)) {
             query_tokens.emplace_back(processed_search_string, begin_pos, end_pos, is_var);
         }
     }
@@ -621,9 +643,14 @@ bool Grep::get_bounds_of_next_potential_var (const string& value, size_t& begin_
     return (value_length != begin_pos);
 }
 
-bool
-Grep::get_bounds_of_next_potential_var (const string& value, size_t& begin_pos, size_t& end_pos, bool& is_var,
-                                        compressor_frontend::lexers::ByteLexer& forward_lexer, compressor_frontend::lexers::ByteLexer& reverse_lexer) {
+bool Grep::get_bounds_of_next_potential_var(
+        string const& value,
+        size_t& begin_pos,
+        size_t& end_pos,
+        bool& is_var,
+        log_surgeon::lexers::ByteLexer& forward_lexer,
+        log_surgeon::lexers::ByteLexer& reverse_lexer
+) {
     const size_t value_length = value.length();
     if (end_pos >= value_length) {
         return false;
@@ -699,35 +726,59 @@ Grep::get_bounds_of_next_potential_var (const string& value, size_t& begin_pos,
                     break;
                 }
             }
+            SearchToken search_token;
             if (has_wildcard_in_middle || (has_prefix_wildcard && has_suffix_wildcard)) {
                 // DO NOTHING
-            } else if (has_suffix_wildcard) { //asdsas*
-                StringReader stringReader;
-                stringReader.open(value.substr(begin_pos, end_pos - begin_pos - 1));
-                forward_lexer.reset(stringReader);
-                compressor_frontend::Token token = forward_lexer.scan_with_wildcard(value[end_pos - 1]);
-                if (token.m_type_ids->at(0) != (int) compressor_frontend::SymbolID::TokenUncaughtStringID &&
-                    token.m_type_ids->at(0) != (int) compressor_frontend::SymbolID::TokenEndID) {
-                    is_var = true;
-                }
-            } else if (has_prefix_wildcard) { // *asdas
-                std::string value_reverse = value.substr(begin_pos + 1, end_pos - begin_pos - 1);
-                std::reverse(value_reverse.begin(), value_reverse.end());
-                StringReader stringReader;
-                stringReader.open(value_reverse);
-                reverse_lexer.reset(stringReader);
-                compressor_frontend::Token token = reverse_lexer.scan_with_wildcard(value[begin_pos]);
-                if (token.m_type_ids->at(0) != (int) compressor_frontend::SymbolID::TokenUncaughtStringID &&
-                    token.m_type_ids->at(0) != (int)compressor_frontend::SymbolID::TokenEndID) {
-                    is_var = true;
+            } else {
+                StringReader string_reader;
+                LogSurgeonReader reader_wrapper(string_reader);
+                log_surgeon::ParserInputBuffer parser_input_buffer;
+                if (has_suffix_wildcard) { //text*
+                    // TODO: creating a string reader, setting it equal to a 
+                    //  string, to read it into the ParserInputBuffer, seems
+                    //  like a convoluted way to set a string equal to a string,
+                    //  should be improved when adding a SearchParser to 
+                    //  log_surgeon
+                    string_reader.open(value.substr(begin_pos, end_pos - begin_pos - 1));
+                    parser_input_buffer.read_if_safe(reader_wrapper);
+                    forward_lexer.reset();
+                    forward_lexer.scan_with_wildcard(
+                            parser_input_buffer,
+                            value[end_pos - 1],
+                            search_token
+                    );
+                } else if (has_prefix_wildcard) { // *text
+                    std::string value_reverse
+                            = value.substr(begin_pos + 1, end_pos - begin_pos - 1);
+                    std::reverse(value_reverse.begin(), value_reverse.end());
+                    string_reader.open(value_reverse);
+                    parser_input_buffer.read_if_safe(reader_wrapper);
+                    reverse_lexer.reset();
+                    reverse_lexer.scan_with_wildcard(
+                            parser_input_buffer,
+                            value[begin_pos],
+                            search_token
+                    );
+                } else { // no wildcards
+                    string_reader.open(value.substr(begin_pos, end_pos - begin_pos));
+                    parser_input_buffer.read_if_safe(reader_wrapper);
+                    forward_lexer.reset();
+                    forward_lexer.scan(parser_input_buffer, search_token);
+                    search_token.m_type_ids_set.insert(search_token.m_type_ids_ptr->at(0));
                 }
-            } else { // no wildcards
-                StringReader stringReader;
-                stringReader.open(value.substr(begin_pos, end_pos - begin_pos));
-                forward_lexer.reset(stringReader);
-                compressor_frontend::Token token = forward_lexer.scan();
-                if (token.m_type_ids->at(0) != (int) compressor_frontend::SymbolID::TokenUncaughtStringID &&
-                    token.m_type_ids->at(0) != (int) compressor_frontend::SymbolID::TokenEndID) {
+                // TODO: use a set so its faster
+                // auto const& set = search_token.m_type_ids_set;
+                // if (set.find(static_cast<int>(log_surgeon::SymbolID::TokenUncaughtStringID))
+                //            == set.end()
+                //     && set.find(static_cast<int>(log_surgeon::SymbolID::TokenEndID))
+                //            == set.end())
+                // {
+                //     is_var = true;
+                // }
+                auto const& type = search_token.m_type_ids_ptr->at(0);
+                if (type != static_cast<int>(log_surgeon::SymbolID::TokenUncaughtStringID)
+                    && type != static_cast<int>(log_surgeon::SymbolID::TokenEndID))
+                {
                     is_var = true;
                 }
             }
diff --git a/components/core/src/Grep.hpp b/components/core/src/Grep.hpp
index 68225eb1b..ece1e62d9 100644
--- a/components/core/src/Grep.hpp
+++ b/components/core/src/Grep.hpp
@@ -4,12 +4,14 @@
 // C++ libraries
 #include <string>
 
+// Log surgeon
+#include <log_surgeon/Lexer.hpp>
+
 // Project headers
 #include "Defs.h"
 #include "Query.hpp"
 #include "streaming_archive/reader/Archive.hpp"
 #include "streaming_archive/reader/File.hpp"
-#include "compressor_frontend/Lexer.hpp"
 
 class Grep {
 
@@ -34,11 +36,23 @@ class Grep {
      * @param search_end_ts
      * @param ignore_case
      * @param query
+     * @param forward_lexer DFA for determining if input is in the schema
+     * @param reverse_lexer DFA for determining if reverse of input is in the
+     * schema
+     * @param use_heuristic
      * @return true if query may match messages, false otherwise
      */
-    static bool process_raw_query (const streaming_archive::reader::Archive& archive, const std::string& search_string, epochtime_t search_begin_ts,
-                                   epochtime_t search_end_ts, bool ignore_case, Query& query, compressor_frontend::lexers::ByteLexer& forward_lexer,
-                                   compressor_frontend::lexers::ByteLexer& reverse_lexer, bool use_heuristic);
+    static bool process_raw_query(
+            streaming_archive::reader::Archive const& archive,
+            std::string const& search_string,
+            epochtime_t search_begin_ts,
+            epochtime_t search_end_ts,
+            bool ignore_case,
+            Query& query,
+            log_surgeon::lexers::ByteLexer& forward_lexer,
+            log_surgeon::lexers::ByteLexer& reverse_lexer,
+            bool use_heuristic
+    );
 
     /**
      * Returns bounds of next potential variable (either a definite variable or a token with wildcards)
@@ -60,9 +74,14 @@ class Grep {
      * @param reverse_lexer DFA for determining if reverse of input is in the schema
      * @return true if another potential variable was found, false otherwise
      */
-    static bool get_bounds_of_next_potential_var (const std::string& value, size_t& begin_pos, size_t& end_pos, bool& is_var,
-                                                  compressor_frontend::lexers::ByteLexer& forward_lexer, compressor_frontend::lexers::ByteLexer& reverse_lexer);
-    
+    static bool get_bounds_of_next_potential_var(
+            std::string const& value,
+            size_t& begin_pos,
+            size_t& end_pos,
+            bool& is_var,
+            log_surgeon::lexers::ByteLexer& forward_lexer,
+            log_surgeon::lexers::ByteLexer& reverse_lexer
+    );
     /**
      * Marks which sub-queries in each query are relevant to the given file
      * @param compressed_file
diff --git a/components/core/src/LogSurgeonReader.cpp b/components/core/src/LogSurgeonReader.cpp
new file mode 100644
index 000000000..e3d0e7a12
--- /dev/null
+++ b/components/core/src/LogSurgeonReader.cpp
@@ -0,0 +1,12 @@
+#include "LogSurgeonReader.hpp"
+
+LogSurgeonReader::LogSurgeonReader(ReaderInterface& reader_interface)
+        : m_reader_interface(reader_interface) {
+    read = [this](char* buf, size_t count, size_t& read_to) -> log_surgeon::ErrorCode {
+        m_reader_interface.read(buf, count, read_to);
+        if (read_to == 0) {
+            return log_surgeon::ErrorCode::EndOfFile;
+        }
+        return log_surgeon::ErrorCode::Success;
+    };
+}
diff --git a/components/core/src/LogSurgeonReader.hpp b/components/core/src/LogSurgeonReader.hpp
new file mode 100644
index 000000000..82e762bf9
--- /dev/null
+++ b/components/core/src/LogSurgeonReader.hpp
@@ -0,0 +1,19 @@
+#ifndef LOG_SURGEON_READER_HPP
+#define LOG_SURGEON_READER_HPP
+
+#include <log_surgeon/Reader.hpp>
+
+#include "ReaderInterface.hpp"
+
+/*
+ * Wrapper providing a read function that works with the parsers in log_surgeon.
+ */
+class LogSurgeonReader : public log_surgeon::Reader {
+public:
+    LogSurgeonReader(ReaderInterface& reader_interface);
+
+private:
+    ReaderInterface& m_reader_interface;
+};
+
+#endif  // LOG_SURGEON_READER_HPP
diff --git a/components/core/src/Utils.cpp b/components/core/src/Utils.cpp
index a0b226fee..f3dd17276 100644
--- a/components/core/src/Utils.cpp
+++ b/components/core/src/Utils.cpp
@@ -14,6 +14,12 @@
 #include <boost/algorithm/string.hpp>
 #include <boost/lexical_cast.hpp>
 
+// spdlog
+#include <spdlog/spdlog.h>
+
+// Log surgeon
+#include <log_surgeon/SchemaParser.hpp>
+
 // Project headers
 #include "spdlog_with_specializations.hpp"
 #include "string_utils.hpp"
@@ -164,3 +170,143 @@ ErrorCode read_list_of_paths (const string& list_path, vector<string>& paths) {
 
     return ErrorCode_Success;
 }
+
+// TODO: duplicates code in log_surgeon/parser.tpp, should implement a
+// SearchParser in log_surgeon instead and use it here. Specifically,
+// initialization of lexer.m_symbol_id, contains_delimiter error, and add_rule
+// logic.
+void load_lexer_from_file(
+        std::string const& schema_file_path,
+        bool reverse,
+        log_surgeon::lexers::ByteLexer& lexer
+) {
+    log_surgeon::SchemaParser sp;
+    std::unique_ptr<log_surgeon::SchemaAST> schema_ast
+            = log_surgeon::SchemaParser::try_schema_file(schema_file_path);
+    if (!lexer.m_symbol_id.empty()) {
+        throw std::runtime_error("Error: symbol_ids initialized before setting enum symbol_ids");
+    }
+
+    // cTokenEnd and cTokenUncaughtString never need to be added as a rule to
+    // the lexer as they are not parsed
+    lexer.m_symbol_id[log_surgeon::cTokenEnd] = static_cast<int>(log_surgeon::SymbolID::TokenEndID);
+    lexer.m_symbol_id[log_surgeon::cTokenUncaughtString]
+            = static_cast<int>(log_surgeon::SymbolID::TokenUncaughtStringID);
+    // cTokenInt, cTokenFloat, cTokenFirstTimestamp, and cTokenNewlineTimestamp
+    // each have unknown rule(s) until specified by the user so can't be
+    // explicitly added and are done by looping over schema_vars (user schema)
+    lexer.m_symbol_id[log_surgeon::cTokenInt] = static_cast<int>(log_surgeon::SymbolID::TokenIntId);
+    lexer.m_symbol_id[log_surgeon::cTokenFloat]
+            = static_cast<int>(log_surgeon::SymbolID::TokenFloatId);
+    lexer.m_symbol_id[log_surgeon::cTokenFirstTimestamp]
+            = static_cast<int>(log_surgeon::SymbolID::TokenFirstTimestampId);
+    lexer.m_symbol_id[log_surgeon::cTokenNewlineTimestamp]
+            = static_cast<int>(log_surgeon::SymbolID::TokenNewlineTimestampId);
+    // cTokenNewline is not added in schema_vars and can be explicitly added
+    // as '\n' to catch the end of non-timestamped log messages
+    lexer.m_symbol_id[log_surgeon::cTokenNewline]
+            = static_cast<int>(log_surgeon::SymbolID::TokenNewlineId);
+
+    lexer.m_id_symbol[static_cast<int>(log_surgeon::SymbolID::TokenEndID)] = log_surgeon::cTokenEnd;
+    lexer.m_id_symbol[static_cast<int>(log_surgeon::SymbolID::TokenUncaughtStringID)]
+            = log_surgeon::cTokenUncaughtString;
+    lexer.m_id_symbol[static_cast<int>(log_surgeon::SymbolID::TokenIntId)] = log_surgeon::cTokenInt;
+    lexer.m_id_symbol[static_cast<int>(log_surgeon::SymbolID::TokenFloatId)]
+            = log_surgeon::cTokenFloat;
+    lexer.m_id_symbol[static_cast<int>(log_surgeon::SymbolID::TokenFirstTimestampId)]
+            = log_surgeon::cTokenFirstTimestamp;
+    lexer.m_id_symbol[static_cast<int>(log_surgeon::SymbolID::TokenNewlineTimestampId)]
+            = log_surgeon::cTokenNewlineTimestamp;
+    lexer.m_id_symbol[static_cast<int>(log_surgeon::SymbolID::TokenNewlineId)]
+            = log_surgeon::cTokenNewline;
+
+    lexer.add_rule(
+            lexer.m_symbol_id["newLine"],
+            std::move(std::make_unique<log_surgeon::finite_automata::RegexASTLiteral<
+                              log_surgeon::finite_automata::RegexNFAByteState>>(
+                    log_surgeon::finite_automata::RegexASTLiteral<
+                            log_surgeon::finite_automata::RegexNFAByteState>('\n')
+            ))
+    );
+
+    for (auto const& delimiters_ast : schema_ast->m_delimiters) {
+        auto* delimiters_ptr = dynamic_cast<log_surgeon::DelimiterStringAST*>(delimiters_ast.get());
+        if (delimiters_ptr != nullptr) {
+            lexer.add_delimiters(delimiters_ptr->m_delimiters);
+        }
+    }
+    vector<uint32_t> delimiters;
+    for (uint32_t i = 0; i < log_surgeon::cSizeOfByte; i++) {
+        if (lexer.is_delimiter(i)) {
+            delimiters.push_back(i);
+        }
+    }
+    for (std::unique_ptr<log_surgeon::ParserAST> const& parser_ast : schema_ast->m_schema_vars) {
+        auto* rule = dynamic_cast<log_surgeon::SchemaVarAST*>(parser_ast.get());
+
+        if ("timestamp" == rule->m_name) {
+            continue;
+        }
+
+        if (lexer.m_symbol_id.find(rule->m_name) == lexer.m_symbol_id.end()) {
+            lexer.m_symbol_id[rule->m_name] = lexer.m_symbol_id.size();
+            lexer.m_id_symbol[lexer.m_symbol_id[rule->m_name]] = rule->m_name;
+        }
+
+        // transform '.' from any-character into any non-delimiter character
+        rule->m_regex_ptr->remove_delimiters_from_wildcard(delimiters);
+
+        bool is_possible_input[log_surgeon::cUnicodeMax] = {false};
+        rule->m_regex_ptr->set_possible_inputs_to_true(is_possible_input);
+        bool contains_delimiter = false;
+        uint32_t delimiter_name;
+        for (uint32_t delimiter : delimiters) {
+            if (is_possible_input[delimiter]) {
+                contains_delimiter = true;
+                delimiter_name = delimiter;
+                break;
+            }
+        }
+
+        if (contains_delimiter) {
+            FileReader schema_reader;
+            ErrorCode error_code = schema_reader.try_open(schema_ast->m_file_path);
+            if (ErrorCode_Success != error_code) {
+                throw std::runtime_error(
+                        schema_file_path + ":" + std::to_string(rule->m_line_num + 1) + ": error: '"
+                        + rule->m_name + "' has regex pattern which contains delimiter '"
+                        + char(delimiter_name) + "'.\n"
+                );
+            } else {
+                // more detailed debugging based on looking at the file
+                string line;
+                for (uint32_t i = 0; i <= rule->m_line_num; i++) {
+                    schema_reader.read_to_delimiter('\n', false, false, line);
+                }
+                int colon_pos = 0;
+                for (char i : line) {
+                    colon_pos++;
+                    if (i == ':') {
+                        break;
+                    }
+                }
+                string indent(10, ' ');
+                string spaces(colon_pos, ' ');
+                string arrows(line.size() - colon_pos, '^');
+
+                throw std::runtime_error(
+                        schema_file_path + ":" + std::to_string(rule->m_line_num + 1) + ": error: '"
+                        + rule->m_name + "' has regex pattern which contains delimiter '"
+                        + char(delimiter_name) + "'.\n" + indent + line + "\n" + indent + spaces
+                        + arrows + "\n"
+                );
+            }
+        }
+        lexer.add_rule(lexer.m_symbol_id[rule->m_name], std::move(rule->m_regex_ptr));
+    }
+    if (reverse) {
+        lexer.generate_reverse();
+    } else {
+        lexer.generate();
+    }
+}
diff --git a/components/core/src/Utils.hpp b/components/core/src/Utils.hpp
index e3fa814a0..ea09f0ca7 100644
--- a/components/core/src/Utils.hpp
+++ b/components/core/src/Utils.hpp
@@ -8,6 +8,9 @@
 #include <unordered_set>
 #include <vector>
 
+// Log surgeon
+#include <log_surgeon/Lexer.hpp>
+
 // Project headers
 #include "Defs.h"
 #include "ErrorCode.hpp"
@@ -65,4 +68,15 @@ std::string get_unambiguous_path (const std::string& path);
  */
 ErrorCode read_list_of_paths (const std::string& list_path, std::vector<std::string>& paths);
 
+/**
+ * Loads a lexer from a file
+ * @param schema_file_path
+ * @param done
+ * @param forward_lexer_ptr
+ */
+void load_lexer_from_file(
+        std::string const& schema_file_path,
+        bool done,
+        log_surgeon::lexers::ByteLexer& forward_lexer_ptr
+);
 #endif // UTILS_HPP
diff --git a/components/core/src/clg/clg.cpp b/components/core/src/clg/clg.cpp
index 16ab7b4df..7d8160ca0 100644
--- a/components/core/src/clg/clg.cpp
+++ b/components/core/src/clg/clg.cpp
@@ -8,19 +8,21 @@
 // spdlog
 #include <spdlog/sinks/stdout_sinks.h>
 
+// Log surgeon
+#include <log_surgeon/Lexer.hpp>
+
 // Project headers
 #include "../Defs.h"
-#include "../compressor_frontend/utils.hpp"
 #include "../Grep.hpp"
 #include "../GlobalMySQLMetadataDB.hpp"
 #include "../GlobalSQLiteMetadataDB.hpp"
 #include "../Profiler.hpp"
 #include "../spdlog_with_specializations.hpp"
 #include "../streaming_archive/Constants.hpp"
+#include "../Utils.hpp"
 #include "CommandLineArguments.hpp"
 
 using clg::CommandLineArguments;
-using compressor_frontend::load_lexer_from_file;
 using std::cout;
 using std::cerr;
 using std::endl;
@@ -135,8 +137,14 @@ static bool open_archive (const string& archive_path, Archive& archive_reader) {
     return true;
 }
 
-static bool search (const vector<string>& search_strings, CommandLineArguments& command_line_args, Archive& archive,
-                    compressor_frontend::lexers::ByteLexer& forward_lexer, compressor_frontend::lexers::ByteLexer& reverse_lexer, bool use_heuristic) {
+static bool search(
+        vector<string> const& search_strings,
+        CommandLineArguments& command_line_args,
+        Archive& archive,
+        log_surgeon::lexers::ByteLexer& forward_lexer,
+        log_surgeon::lexers::ByteLexer& reverse_lexer,
+        bool use_heuristic
+) {
     ErrorCode error_code;
     auto search_begin_ts = command_line_args.get_search_begin_ts();
     auto search_end_ts = command_line_args.get_search_end_ts();
@@ -148,9 +156,8 @@ static bool search (const vector<string>& search_strings, CommandLineArguments&
         bool is_superseding_query = false;
         for (const auto& search_string : search_strings) {
             Query query;
-            if (Grep::process_raw_query(archive, search_string, search_begin_ts, search_end_ts, command_line_args.ignore_case(), query, forward_lexer, 
+            if (Grep::process_raw_query(archive, search_string, search_begin_ts, search_end_ts, command_line_args.ignore_case(), query, forward_lexer,
                                         reverse_lexer, use_heuristic)) {
-            //if (Grep::process_raw_query(archive, search_string, search_begin_ts, search_end_ts, command_line_args.ignore_case(), query, parser)) {
                 no_queries_match = false;
 
                 if (query.contains_sub_queries() == false) {
@@ -390,14 +397,15 @@ int main (int argc, const char* argv[]) {
     }
     global_metadata_db->open();
 
-    /// TODO: if performance is too slow, can make this more efficient by only diffing files with the same checksum
+    // TODO: if performance is too slow, can make this more efficient by only
+    // diffing files with the same checksum
     const uint32_t max_map_schema_length = 100000;
-    std::map<std::string, compressor_frontend::lexers::ByteLexer> forward_lexer_map;
-    std::map<std::string, compressor_frontend::lexers::ByteLexer> reverse_lexer_map;
-    compressor_frontend::lexers::ByteLexer one_time_use_forward_lexer;
-    compressor_frontend::lexers::ByteLexer one_time_use_reverse_lexer;
-    compressor_frontend::lexers::ByteLexer* forward_lexer_ptr;
-    compressor_frontend::lexers::ByteLexer* reverse_lexer_ptr;
+    std::map<std::string, log_surgeon::lexers::ByteLexer> forward_lexer_map;
+    std::map<std::string, log_surgeon::lexers::ByteLexer> reverse_lexer_map;
+    log_surgeon::lexers::ByteLexer one_time_use_forward_lexer;
+    log_surgeon::lexers::ByteLexer one_time_use_reverse_lexer;
+    log_surgeon::lexers::ByteLexer* forward_lexer_ptr;
+    log_surgeon::lexers::ByteLexer* reverse_lexer_ptr;
 
     string archive_id;
     Archive archive_reader;
@@ -416,7 +424,7 @@ int main (int argc, const char* argv[]) {
         if (!open_archive(archive_path.string(), archive_reader)) {
             return -1;
         }
-        
+
         // Generate lexer if schema file exists
         auto schema_file_path = archive_path / streaming_archive::cSchemaFileName;
         bool use_heuristic = true;
@@ -435,12 +443,14 @@ int main (int argc, const char* argv[]) {
                 // if there is a chance there might be a difference make a new lexer as it's pretty fast to create
                 if (forward_lexer_map_it == forward_lexer_map.end()) {
                     // Create forward lexer
-                    auto insert_result = forward_lexer_map.emplace(buf, compressor_frontend::lexers::ByteLexer());
+                    auto insert_result
+                            = forward_lexer_map.emplace(buf, log_surgeon::lexers::ByteLexer());
                     forward_lexer_ptr = &insert_result.first->second;
                     load_lexer_from_file(schema_file_path, false, *forward_lexer_ptr);
 
                     // Create reverse lexer
-                    insert_result = reverse_lexer_map.emplace(buf, compressor_frontend::lexers::ByteLexer());
+                    insert_result
+                            = reverse_lexer_map.emplace(buf, log_surgeon::lexers::ByteLexer());
                     reverse_lexer_ptr = &insert_result.first->second;
                     load_lexer_from_file(schema_file_path, true, *reverse_lexer_ptr);
                 } else {
diff --git a/components/core/src/clo/clo.cpp b/components/core/src/clo/clo.cpp
index b71836858..1f5439a04 100644
--- a/components/core/src/clo/clo.cpp
+++ b/components/core/src/clo/clo.cpp
@@ -16,7 +16,6 @@
 
 // Project headers
 #include "../Defs.h"
-#include "../compressor_frontend/utils.hpp"
 #include "../Grep.hpp"
 #include "../Profiler.hpp"
 #include "../networking/socket_utils.hpp"
@@ -27,7 +26,6 @@
 #include "ControllerMonitoringThread.hpp"
 
 using clo::CommandLineArguments;
-using compressor_frontend::load_lexer_from_file;
 using std::cout;
 using std::cerr;
 using std::endl;
@@ -204,16 +202,16 @@ static bool search_archive (const CommandLineArguments& command_line_args, const
 
     // Load lexers from schema file if it exists
     auto schema_file_path = archive_path / streaming_archive::cSchemaFileName;
-    unique_ptr<compressor_frontend::lexers::ByteLexer> forward_lexer, reverse_lexer;
+    unique_ptr<log_surgeon::lexers::ByteLexer> forward_lexer, reverse_lexer;
     bool use_heuristic = true;
     if (boost::filesystem::exists(schema_file_path)) {
         use_heuristic = false;
         // Create forward lexer
-        forward_lexer.reset(new compressor_frontend::lexers::ByteLexer());
+        forward_lexer.reset(new log_surgeon::lexers::ByteLexer());
         load_lexer_from_file(schema_file_path.string(), false, *forward_lexer);
 
         // Create reverse lexer
-        reverse_lexer.reset(new compressor_frontend::lexers::ByteLexer());
+        reverse_lexer.reset(new log_surgeon::lexers::ByteLexer());
         load_lexer_from_file(schema_file_path.string(), true, *reverse_lexer);
     }
 
diff --git a/components/core/src/clp/FileCompressor.cpp b/components/core/src/clp/FileCompressor.cpp
index 9b5fe493a..1eb12af44 100644
--- a/components/core/src/clp/FileCompressor.cpp
+++ b/components/core/src/clp/FileCompressor.cpp
@@ -12,14 +12,22 @@
 // libarchive
 #include <archive_entry.h>
 
+// Log surgeon
+#include <log_surgeon/LogEvent.hpp>
+#include <log_surgeon/ReaderParser.hpp>
+
 // Project headers
 #include "../ffi/ir_stream/decoding_methods.hpp"
 #include "../ir/utils.hpp"
+#include "../LogSurgeonReader.hpp"
 #include "../Profiler.hpp"
 #include "utils.hpp"
 
 using ir::has_ir_stream_magic_number;
 using ir::LogEventDeserializer;
+using log_surgeon::LogEventView;
+using log_surgeon::Reader;
+using log_surgeon::ReaderParser;
 using std::cout;
 using std::endl;
 using std::set;
@@ -123,9 +131,15 @@ namespace clp {
                                                 file_to_compress.get_path_for_compression(),
                                                 file_to_compress.get_group_id(), archive_writer, m_file_reader);
             } else {
-                parse_and_encode(target_data_size_of_dicts, archive_user_config, target_encoded_file_size,
-                                 file_to_compress.get_path_for_compression(),
-                                 file_to_compress.get_group_id(), archive_writer, m_file_reader);
+                parse_and_encode_with_library(
+                        target_data_size_of_dicts,
+                        archive_user_config,
+                        target_encoded_file_size,
+                        file_to_compress.get_path_for_compression(),
+                        file_to_compress.get_group_id(),
+                        archive_writer,
+                        m_file_reader
+                );
             }
         } else {
             if (false == try_compressing_as_archive(target_data_size_of_dicts, archive_user_config, target_encoded_file_size, file_to_compress,
@@ -144,10 +158,15 @@ namespace clp {
         return succeeded;
     }
 
-    void FileCompressor::parse_and_encode (size_t target_data_size_of_dicts, streaming_archive::writer::Archive::UserConfig& archive_user_config,
-                                           size_t target_encoded_file_size, const string& path_for_compression, group_id_t group_id,
-                                           streaming_archive::writer::Archive& archive_writer, ReaderInterface& reader)
-    {
+    void FileCompressor::parse_and_encode_with_library(
+            size_t target_data_size_of_dicts,
+            streaming_archive::writer::Archive::UserConfig& archive_user_config,
+            size_t target_encoded_file_size,
+            string const& path_for_compression,
+            group_id_t group_id,
+            streaming_archive::writer::Archive& archive_writer,
+            ReaderInterface& reader
+    ) {
         archive_writer.m_target_data_size_of_dicts = target_data_size_of_dicts;
         archive_writer.m_archive_user_config = archive_user_config;
         archive_writer.m_path_for_compression = path_for_compression;
@@ -155,26 +174,21 @@ namespace clp {
         archive_writer.m_target_encoded_file_size = target_encoded_file_size;
         // Open compressed file
         archive_writer.create_and_open_file(path_for_compression, group_id, m_uuid_generator(), 0);
-        m_log_parser->set_archive_writer_ptr(&archive_writer);
-        m_log_parser->get_archive_writer_ptr()->old_ts_pattern.clear();
-        try {
-            m_log_parser->parse(reader);
-        } catch (std::string const err) {
-            if (err.find("Lexer failed to find a match after checking entire buffer") != std::string::npos) {
-                close_file_and_append_to_segment(archive_writer);
-                SPDLOG_ERROR(err);
-            } else {
-                throw (err);
+        archive_writer.m_old_ts_pattern = nullptr;
+        LogSurgeonReader log_surgeon_reader(reader);
+        m_reader_parser->reset_and_set_reader(log_surgeon_reader);
+        while (false == m_reader_parser->done()) {
+            if (log_surgeon::ErrorCode err{m_reader_parser->parse_next_event()};
+                    log_surgeon::ErrorCode::Success != err) {
+                SPDLOG_ERROR("Parsing Failed");
+                throw (std::runtime_error("Parsing Failed"));
             }
+            LogEventView const& log_view = m_reader_parser->get_log_parser().get_log_event_view();
+            archive_writer.write_msg_using_schema(log_view);
         }
-        // TODO: separate variables from static text
-        //Stopwatch close_file_watch("close_file_watch");
-        //close_file_watch.start();
         close_file_and_append_to_segment(archive_writer);
         // archive_writer_config needs to persist between files
         archive_user_config = archive_writer.m_archive_user_config;
-        //close_file_watch.stop();
-        //close_file_watch.print();
     }
 
     void FileCompressor::parse_and_encode_with_heuristic (size_t target_data_size_of_dicts, streaming_archive::writer::Archive::UserConfig& archive_user_config,
@@ -292,8 +306,15 @@ namespace clp {
                                                     boost_path_for_compression.string(), file_to_compress.get_group_id(), archive_writer,
                                                     m_libarchive_file_reader);
                 } else {
-                    parse_and_encode(target_data_size_of_dicts, archive_user_config, target_encoded_file_size, boost_path_for_compression.string(),
-                                     file_to_compress.get_group_id(), archive_writer, m_libarchive_file_reader);
+                    parse_and_encode_with_library(
+                            target_data_size_of_dicts,
+                            archive_user_config,
+                            target_encoded_file_size,
+                            boost_path_for_compression.string(),
+                            file_to_compress.get_group_id(),
+                            archive_writer,
+                            m_libarchive_file_reader
+                    );
                 }
             } else if (has_ir_stream_magic_number({utf8_validation_buf, utf8_validation_buf_len})) {
                 // Remove .clp suffix if found
diff --git a/components/core/src/clp/FileCompressor.hpp b/components/core/src/clp/FileCompressor.hpp
index 7d87e12db..f0346a616 100644
--- a/components/core/src/clp/FileCompressor.hpp
+++ b/components/core/src/clp/FileCompressor.hpp
@@ -7,9 +7,12 @@
 // Boost libraries
 #include <boost/uuid/random_generator.hpp>
 
+// Log surgeon
+#include <log_surgeon/LogEvent.hpp>
+#include <log_surgeon/ReaderParser.hpp>
+
 // Project headers
 #include "../BufferedFileReader.hpp"
-#include "../compressor_frontend/LogParser.hpp"
 #include "../ir/LogEventDeserializer.hpp"
 #include "../LibarchiveFileReader.hpp"
 #include "../LibarchiveReader.hpp"
@@ -25,8 +28,12 @@ namespace clp {
     class FileCompressor {
     public:
         // Constructors
-        FileCompressor (boost::uuids::random_generator& uuid_generator, std::unique_ptr<compressor_frontend::LogParser> log_parser) : m_uuid_generator(
-                uuid_generator), m_log_parser(std::move(log_parser)) {}
+        FileCompressor(
+                boost::uuids::random_generator& uuid_generator,
+                std::unique_ptr<log_surgeon::ReaderParser> reader_parser
+        )
+                : m_uuid_generator(uuid_generator),
+                  m_reader_parser(std::move(reader_parser)) {}
 
         // Methods
         /**
@@ -55,9 +62,15 @@ namespace clp {
          * @param archive_writer
          * @param reader
          */
-        void parse_and_encode (size_t target_data_size_of_dicts, streaming_archive::writer::Archive::UserConfig& archive_user_config,
-                               size_t target_encoded_file_size, const std::string& path_for_compression, group_id_t group_id,
-                               streaming_archive::writer::Archive& archive_writer, ReaderInterface& reader);
+        void parse_and_encode_with_library(
+                size_t target_data_size_of_dicts,
+                streaming_archive::writer::Archive::UserConfig& archive_user_config,
+                size_t target_encoded_file_size,
+                std::string const& path_for_compression,
+                group_id_t group_id,
+                streaming_archive::writer::Archive& archive_writer,
+                ReaderInterface& reader
+        );
 
         void parse_and_encode_with_heuristic (size_t target_data_size_of_dicts, streaming_archive::writer::Archive::UserConfig& archive_user_config,
                                               size_t target_encoded_file_size, const std::string& path_for_compression, group_id_t group_id,
@@ -129,7 +142,7 @@ namespace clp {
         LibarchiveFileReader m_libarchive_file_reader;
         MessageParser m_message_parser;
         ParsedMessage m_parsed_message;
-        std::unique_ptr<compressor_frontend::LogParser> m_log_parser;
+        std::unique_ptr<log_surgeon::ReaderParser> m_reader_parser;
     };
 }
 
diff --git a/components/core/src/clp/compression.cpp b/components/core/src/clp/compression.cpp
index 8b1bf1c52..d82d0b4c8 100644
--- a/components/core/src/clp/compression.cpp
+++ b/components/core/src/clp/compression.cpp
@@ -51,9 +51,15 @@ namespace clp {
         return boost::filesystem::last_write_time(lhs.get_path()) < boost::filesystem::last_write_time(rhs.get_path());
     }
 
-    bool compress (CommandLineArguments& command_line_args, vector<FileToCompress>& files_to_compress, const vector<string>& empty_directory_paths,
-                   vector<FileToCompress>& grouped_files_to_compress, size_t target_encoded_file_size,
-                   std::unique_ptr<compressor_frontend::LogParser> log_parser, bool use_heuristic) {
+    bool compress(
+            CommandLineArguments& command_line_args,
+            vector<FileToCompress>& files_to_compress,
+            vector<string> const& empty_directory_paths,
+            vector<FileToCompress>& grouped_files_to_compress,
+            size_t target_encoded_file_size,
+            std::unique_ptr<log_surgeon::ReaderParser> reader_parser,
+            bool use_heuristic
+    ) {
         auto output_dir = boost::filesystem::path(command_line_args.get_output_dir());
 
         // Create output directory in case it doesn't exist
@@ -106,7 +112,7 @@ namespace clp {
         archive_writer.add_empty_directories(empty_directory_paths);
 
         bool all_files_compressed_successfully = true;
-        FileCompressor file_compressor(uuid_generator, std::move(log_parser));
+        FileCompressor file_compressor(uuid_generator, std::move(reader_parser));
         auto target_data_size_of_dictionaries = command_line_args.get_target_data_size_of_dictionaries();
 
         // Compress all files
diff --git a/components/core/src/clp/compression.hpp b/components/core/src/clp/compression.hpp
index 8291acb0b..a86aa1fca 100644
--- a/components/core/src/clp/compression.hpp
+++ b/components/core/src/clp/compression.hpp
@@ -8,11 +8,14 @@
 // Boost libraries
 #include <boost/filesystem/path.hpp>
 
+// Log surgeon
+#include <log_surgeon/LogEvent.hpp>
+#include <log_surgeon/ReaderParser.hpp>
+
 // Project headers
 #include "CommandLineArguments.hpp"
 #include "FileToCompress.hpp"
 #include "StructuredFileToCompress.hpp"
-#include "../compressor_frontend/LogParser.hpp"
 
 namespace clp {
     /**
@@ -22,13 +25,19 @@ namespace clp {
      * @param empty_directory_paths
      * @param grouped_files_to_compress
      * @param target_encoded_file_size
-     * @param log_parser
+     * @param reader_parser
      * @param use_heuristic
      * @return true if compression was successful, false otherwise
      */
-    bool compress (CommandLineArguments& command_line_args, std::vector<FileToCompress>& files_to_compress,
-                   const std::vector<std::string>& empty_directory_paths, std::vector<FileToCompress>& grouped_files_to_compress,
-                   size_t target_encoded_file_size, std::unique_ptr<compressor_frontend::LogParser> log_parser, bool use_heuristic);
+    bool compress(
+            CommandLineArguments& command_line_args,
+            std::vector<FileToCompress>& files_to_compress,
+            std::vector<std::string> const& empty_directory_paths,
+            std::vector<FileToCompress>& grouped_files_to_compress,
+            size_t target_encoded_file_size,
+            std::unique_ptr<log_surgeon::ReaderParser> reader_parser,
+            bool use_heuristic
+    );
 
     /**
      * Reads a list of grouped files and a list of their IDs
diff --git a/components/core/src/clp/run.cpp b/components/core/src/clp/run.cpp
index ef9f90e0c..11786a753 100644
--- a/components/core/src/clp/run.cpp
+++ b/components/core/src/clp/run.cpp
@@ -6,8 +6,10 @@
 // spdlog
 #include <spdlog/sinks/stdout_sinks.h>
 
+// Log Surgeon
+#include <log_surgeon/LogParser.hpp>
+
 // Project headers
-#include "../compressor_frontend/LogParser.hpp"
 #include "../Profiler.hpp"
 #include "../spdlog_with_specializations.hpp"
 #include "../Utils.hpp"
@@ -60,10 +62,10 @@ namespace clp {
 
         if (CommandLineArguments::Command::Compress == command_line_args.get_command()) {
             /// TODO: make this not a unique_ptr and test performance difference
-            std::unique_ptr<compressor_frontend::LogParser> log_parser;
+            std::unique_ptr<log_surgeon::ReaderParser> reader_parser;
             if (!command_line_args.get_use_heuristic()) {
                 const std::string& schema_file_path = command_line_args.get_schema_file_path();
-                log_parser = std::make_unique<compressor_frontend::LogParser>(schema_file_path);
+                reader_parser = std::make_unique<log_surgeon::ReaderParser>(schema_file_path);
             }
 
             boost::filesystem::path path_prefix_to_remove(command_line_args.get_path_prefix_to_remove());
@@ -91,9 +93,15 @@ namespace clp {
 
             bool compression_successful;
             try {
-                compression_successful = compress(command_line_args, files_to_compress, empty_directory_paths, grouped_files_to_compress,
-                                                  command_line_args.get_target_encoded_file_size(), std::move(log_parser),
-                                                  command_line_args.get_use_heuristic());
+                compression_successful = compress(
+                        command_line_args,
+                        files_to_compress,
+                        empty_directory_paths,
+                        grouped_files_to_compress,
+                        command_line_args.get_target_encoded_file_size(),
+                        std::move(reader_parser),
+                        command_line_args.get_use_heuristic()
+                );
             } catch (TraceableException& e) {
                 ErrorCode error_code = e.get_error_code();
                 if (ErrorCode_errno == error_code) {
diff --git a/components/core/src/compressor_frontend/Constants.hpp b/components/core/src/compressor_frontend/Constants.hpp
deleted file mode 100644
index ed31f1ce5..000000000
--- a/components/core/src/compressor_frontend/Constants.hpp
+++ /dev/null
@@ -1,42 +0,0 @@
-#ifndef COMPRESSOR_FRONTEND_CONSTANTS_HPP
-#define COMPRESSOR_FRONTEND_CONSTANTS_HPP
-
-#include <cstdint>
-
-namespace compressor_frontend {
-
-    typedef std::pair<uint32_t, uint32_t> Interval;
-
-    constexpr uint32_t cUnicodeMax = 0x10FFFF;
-    constexpr uint32_t cSizeOfByte = 256;
-    constexpr uint32_t cSizeOfAllChildren = 10000;
-    constexpr uint32_t cNullSymbol = 10000000;
-
-    enum class SymbolID {
-        TokenEndID,
-        TokenUncaughtStringID,
-        TokenIntId,
-        TokenFloatId,
-        TokenFirstTimestampId,
-        TokenNewlineTimestampId,
-        TokenNewlineId
-    };
-
-    constexpr char cTokenEnd[] = "$end";
-    constexpr char cTokenUncaughtString[] = "$UncaughtString";
-    constexpr char cTokenInt[] = "int";
-    constexpr char cTokenFloat[] = "float";
-    constexpr char cTokenFirstTimestamp[] = "firstTimestamp";
-    constexpr char cTokenNewlineTimestamp[] = "newLineTimestamp";
-    constexpr char cTokenNewline[] = "newLine";
-    
-    constexpr uint32_t cStaticByteBuffSize = 60000;
-
-    namespace utf8 {
-        //0xC0, 0xC1, 0xF5, 0xF6, 0xF7, 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xFF are invalid UTF-8 code units
-        static const uint32_t cError = 0xFE;
-        static const unsigned char cCharEOF = 0xFF;
-    };
-}
-
-#endif // COMPRESSOR_FRONTEND_CONSTANTS_HPP
diff --git a/components/core/src/compressor_frontend/LALR1Parser.cpp b/components/core/src/compressor_frontend/LALR1Parser.cpp
deleted file mode 100644
index 721b926d2..000000000
--- a/components/core/src/compressor_frontend/LALR1Parser.cpp
+++ /dev/null
@@ -1,14 +0,0 @@
-#include "LALR1Parser.hpp"
-
-namespace compressor_frontend {
-    MatchedSymbol NonTerminal::m_all_children[cSizeOfAllChildren];
-
-    ParserAST::~ParserAST () = default;
-
-    uint32_t NonTerminal::m_next_children_start = 0;
-
-    NonTerminal::NonTerminal (Production* p) : m_production(p), m_ast(nullptr) {
-        m_children_start = NonTerminal::m_next_children_start;
-        NonTerminal::m_next_children_start += p->m_body.size();
-    }
-}
diff --git a/components/core/src/compressor_frontend/LALR1Parser.hpp b/components/core/src/compressor_frontend/LALR1Parser.hpp
deleted file mode 100644
index 9af75a2c6..000000000
--- a/components/core/src/compressor_frontend/LALR1Parser.hpp
+++ /dev/null
@@ -1,421 +0,0 @@
-#ifndef COMPRESSOR_FRONTEND_LALR1_PARSER_HPP
-#define COMPRESSOR_FRONTEND_LALR1_PARSER_HPP
-
-// C++ standard libraries
-#include <cstdint>
-#include <cassert>
-#include <cstddef>
-#include <functional>
-#include <list>
-#include <map>
-#include <optional>
-#include <set>
-#include <stack>
-#include <tuple>
-#include <type_traits>
-#include <unordered_map>
-#include <variant>
-#include <vector>
-
-// Project headers
-#include "../ReaderInterface.hpp"
-#include "../type_utils.hpp"
-#include "Lexer.hpp"
-
-namespace streaming_archive::writer {
-    class File;
-
-    class Archive;
-}
-
-namespace compressor_frontend {
-
-    class ParserAST;
-
-    class NonTerminal;
-
-    template<typename T>
-    class ParserValue;
-
-    struct Production;
-    struct Item;
-    struct ItemSet;
-
-    typedef std::function<std::unique_ptr<ParserAST> (NonTerminal*)> SemanticRule;
-    typedef std::variant<bool, ItemSet*, Production*> Action;
-
-    class ParserAST {
-    public:
-        // Constructor
-        virtual ~ParserAST () = 0;
-
-        template<typename T>
-        T& get () {
-            // TODO: why does this compile?
-            return static_cast<ParserValue<T>*>(this)->value;
-        }
-    };
-    
-    template<typename T>
-    class ParserValue : public ParserAST {
-    public:
-        T value;
-
-        explicit ParserValue (T v) : value(std::move(v)) {}
-    };
-    
-    typedef std::variant<Token, NonTerminal> MatchedSymbol;
-
-    class NonTerminal {
-    public:
-        // Constructor
-        NonTerminal () : m_production(nullptr), m_children_start(0), m_ast(nullptr) {}
-
-        // Constructor
-        explicit NonTerminal (Production*);
-
-        /**
-         * Return the ith child's (body of production) MatchedSymbol as a Token.
-         * Note: only children are needed (and stored) for performing semantic actions (for the AST)
-         * @param i
-         * @return Token*
-         */
-        [[nodiscard]] Token* token_cast (int i) const {
-            return &std::get<Token>(NonTerminal::m_all_children[m_children_start + i]);
-        }
-
-        /**
-         * Return the ith child's (body of production) MatchedSymbol as a NonTerminal.
-         * Note: only children are needed (and stored) for performing semantic actions (for the AST)
-         * @param i
-         * @return NonTerminal*
-         */
-        [[nodiscard]] NonTerminal* nonterminal_cast (int i) const {
-            return &std::get<NonTerminal>(NonTerminal::m_all_children[m_children_start + i]);
-        }
-
-        /**
-         * Return the AST that relates this nonterminal's children together (based on the production/syntax-rule that was determined to have generated them)
-         * @return std::unique_ptr<ParserAST>
-         */
-        std::unique_ptr<ParserAST>& getParserAST () {
-            return m_ast;
-        }
-
-        static MatchedSymbol m_all_children[];
-        static uint32_t m_next_children_start;
-        uint32_t m_children_start;
-        Production* m_production;
-        std::unique_ptr<ParserAST> m_ast;
-    };
-
-    /**
-     * Structure representing a production of the form "m_head -> {m_body}".
-     * The code fragment to execute upon reducing "{m_body} -> m_head" is m_semantic_rule, which is purely a function of the MatchedSymbols for {m_body}.
-     * m_index is the productions position in the parsers production vector.
-     */
-    struct Production {
-    public:
-        /**
-         * Returns if the production is an epsilon production. An epsilon production has nothing on its LHS (i.e., HEAD -> {})
-         * @return bool
-         */
-        [[nodiscard]] bool is_epsilon () const {
-            return this->m_body.empty();
-        }
-
-        uint32_t m_index;
-        uint32_t m_head;
-        std::vector<uint32_t> m_body;
-        SemanticRule m_semantic_rule;
-    };
-
-    /**
-     * Structure representing an item in a LALR1 state.
-     * An item (1) is associated with a m_production and a single m_lookahead which is an input symbol (character) that can follow the m_production,
-     * and (2) tracks the current matching progress of its associated m_production, where everything exclusively to the left of m_dot is already matched.
-     */
-    struct Item {
-    public:
-        // Constructor
-        Item () = default;
-
-        // Constructor
-        Item (Production* p, uint32_t d, uint32_t t) : m_production(p), m_dot(d), m_lookahead(t) {
-        }
-
-        /**
-         * Comparison operator for tie-breakers (not 100% sure where this is used)
-         * @param lhs
-         * @param rhs
-         * @return bool
-         */
-        friend bool operator< (const Item& lhs, const Item& rhs) {
-            return std::tie(lhs.m_production->m_index, lhs.m_dot, lhs.m_lookahead) <
-                   std::tie(rhs.m_production->m_index, rhs.m_dot, rhs.m_lookahead);
-        }
-
-        /**
-         * Returns if the item has a dot at the end. This indicates the production associated with the item has already been fully matched.
-         * @return bool
-         */
-        [[nodiscard]] bool has_dot_at_end () const {
-            return this->m_dot == this->m_production->m_body.size();
-        }
-
-        /**
-         * Returns the next unmatched symbol in the production based on the dot.
-         * @return uint32_t
-         */
-        [[nodiscard]] uint32_t next_symbol () const {
-            return this->m_production->m_body.at(this->m_dot);
-        }
-
-        Production* m_production;
-        uint32_t m_dot;
-        uint32_t m_lookahead; // for LR0 items, `m_lookahead` is unused
-    };
-
-    /**
-     * Structure representing an LALR1 state, a collection of items.
-     * The m_kernel is sufficient for fully representing the state, but m_closure is useful for computations.
-     * m_next indicates what state (ItemSet) to transition to based on the symbol received from the lexer
-     * m_actions is the action to perform based on the symbol received from the lexer.
-     */
-    struct ItemSet {
-    public:
-        /**
-         * Comparison operator for tie-breakers (not 100% sure where this is used)
-         * @param lhs
-         * @param rhs
-         * @return bool
-         */
-        friend bool operator< (const ItemSet& lhs, const ItemSet& rhs) {
-            return lhs.m_kernel < rhs.m_kernel;
-        }
-
-        bool empty () const {
-            return m_kernel.empty();
-        }
-        
-        uint32_t m_index = -1;
-        std::set<Item> m_kernel;
-        std::set<Item> m_closure;
-        std::unordered_map<uint32_t, ItemSet*> m_next;
-        std::vector<Action> m_actions;
-    };
-
-    /// TODO: make LALR1Parser an abstract class?
-    template <typename NFAStateType, typename DFAStateType>
-    class LALR1Parser {
-    public:
-        // Constructor
-        LALR1Parser ();
-
-        /// TODO: combine all the add_* into add_rule
-        /**
-         * Add a lexical rule to m_lexer
-         * @param name
-         * @param rule
-         */
-        void add_rule (const std::string& name, std::unique_ptr<RegexAST<NFAStateType>> rule);
-
-        /**
-         * Constructs a RegexASTLiteral and call add_rule
-         * @param name
-         * @param rule_char
-         */
-        void add_token (const std::string& name, char rule_char);
-
-        /**
-         * Calls add_rule with the given RegexASTGroup
-         * @param name
-         * @param rule_char
-         */
-        void add_token_group (const std::string& name, std::unique_ptr<finite_automata::RegexASTGroup<NFAStateType>> rule_group);
-
-        /**
-         * Constructs a RegexASTCat and calls add_rule
-         * @param name
-         * @param chain
-         */
-        void add_token_chain (const std::string& name, const std::string& chain);
-
-        /**
-         * Adds productions (syntax rule) to the parser
-         * @param head
-         * @param body
-         * @param semantic_rule
-         * @return uint32_t
-         */
-        uint32_t add_production (const std::string& head, const std::vector<std::string>& body, SemanticRule semantic_rule);
-
-        /**
-         * Generate the LALR1 parser (use after all the lexical rules and productions have been added)
-         */
-        void generate ();
-
-        /// TODO: add throws to function headers
-        /**
-         * Parse an input (e.g. file)
-         * @param reader
-         * @return Nonterminal
-         */
-        NonTerminal parse (ReaderInterface& reader);
-
-        void set_archive_writer_ptr (streaming_archive::writer::Archive* value) {
-            m_archive_writer_ptr = value;
-        }
-
-        [[nodiscard]] streaming_archive::writer::Archive* get_archive_writer_ptr () const {
-            return m_archive_writer_ptr;
-        }
-        
-    protected:
-        /**
-         * Reset the parser to start a new parsing (set state to root, reset buffers, reset vars tracking positions)
-         * @param reader
-         */
-        void reset (ReaderInterface& reader);
-
-        /**
-         * Return an error string based on the current error state, matched_stack, and next_symbol in the parser
-         * @param reader
-         * @return std::string
-         */
-        std::string report_error (ReaderInterface& reader);
-
-        Lexer<NFAStateType, DFAStateType> m_lexer;
-        streaming_archive::writer::Archive* m_archive_writer_ptr;
-        std::stack<MatchedSymbol> m_parse_stack_matches;
-        std::stack<ItemSet*> m_parse_stack_states;
-        ItemSet* root_itemset_ptr;
-        std::optional<Token> m_next_token;
-        std::vector<std::unique_ptr<Production>> m_productions;
-        std::unordered_map<std::string, std::map<std::vector<std::string>, Production*>> m_productions_map;
-        std::unordered_map<uint32_t, std::vector<Production*>> m_nonterminals;
-        uint32_t m_root_production_id;
-
-    private:
-        // Parser generation
-
-        /**
-         * Generate LR0 kernels based on the productions in m_productions
-         */
-        void generate_lr0_kernels ();
-
-        /**
-         * Perform closure for the specified item_set based on its kernel
-         * @param item_set
-         */
-        void generate_lr0_closure (ItemSet* item_set_ptr);
-
-        /**
-         * Helper function for doing the closure on a specified item set
-         * @param item_set_ptr
-         * @param item
-         * @param next_symbol
-         * @return bool
-         */
-        bool lr_closure_helper (ItemSet* item_set_ptr, Item const* item, uint32_t* next_symbol);
-
-        /**
-         * Return the next state (ItemSet) based on the current state (ItemSet) and input symbol
-         * @return ItemSet*
-         */
-        ItemSet* go_to (ItemSet*, const uint32_t&);
-
-        /**
-         * Generate m_firsts, which specify for each symbol, all possible prefixes (I think?)
-         */
-        void generate_first_sets ();
-
-        /**
-         * Generate kernels for LR1 item sets based on LR0 item sets
-         */
-        void generate_lr1_itemsets ();
-
-        /**
-         * Generate closure for a specified LR1 item set
-         * @param item_set_ptr
-         */
-        void generate_lr1_closure (ItemSet* item_set_ptr);
-
-        /**
-         * Generating parsing table and goto table for LALR1 parser based on state-symbol pair
-         * generate_lalr1_goto() + generate_lalr1_action()
-         */
-        void generate_lalr1_parsing_table ();
-
-        /**
-         *  Generating the goto table for LARL1 parser specifying which state (ItemSet) to transition to based on state-symbol pair
-         *  Does nothing (its already done in an earlier step)
-         */
-        void generate_lalr1_goto ();
-
-        /**
-         *  Generating the action table for LARL1 parser specifying which action to perform based on state-symbol pair
-         */
-        void generate_lalr1_action ();
-
-        // Parser utilization
-
-        /**
-         * Use the previous symbol from the lexer if unused, otherwise request the next symbol from the lexer
-         * @return Token
-         */
-        Token get_next_symbol ();
-
-        /**
-         * Tries all symbols in the language that the next token may be until the first non-error symbol is tried
-         * @param next_token
-         * @param accept
-         * @return bool
-         */
-        bool parse_advance (Token& next_token, bool* accept);
-
-        /**
-         * Perform an action and state transition based on the current state (ItemSet) and the type_id (current symbol interpretation of the next_token)
-         * @param type_id
-         * @param next_token
-         * @param accept
-         * @return bool
-         */
-        bool parse_symbol (uint32_t const& type_id, Token& next_token, bool* accept);
-
-        // Error handling
-
-        /**
-         * Get the current line up to the error symbol
-         * @param parse_stack_matches
-         * @return std::string
-         */
-        static std::string get_input_after_last_newline (std::stack<MatchedSymbol>& parse_stack_matches);
-
-        /**
-         * Get the current line after the error symbol
-         * @param reader
-         * @param error_token
-         * @return std::string
-         */
-        std::string get_input_until_next_newline (ReaderInterface& reader, Token* error_token);
-
-        bool symbol_is_token (uint32_t s) {
-            return m_terminals.find(s) != m_terminals.end();
-        }
-
-        // Variables
-        std::set<uint32_t> m_terminals;
-        std::set<uint32_t> m_nullable;
-        std::map<std::set<Item>, std::unique_ptr<ItemSet>> m_lr0_itemsets;
-        std::map<std::set<Item>, std::unique_ptr<ItemSet>> m_lr1_itemsets;
-        std::unordered_map<uint32_t, std::set<uint32_t>> m_firsts;
-        std::unordered_map<Production*, std::set<uint32_t>> m_spontaneous_map;
-        std::map<Item, std::set<Item>> m_propagate_map;
-        std::unordered_map<uint32_t, std::map<uint32_t, uint32_t>> m_go_to_table;
-    };
-}
-
-#include "LALR1Parser.inc"
-
-#endif // COMPRESSOR_FRONTEND_LALR1_PARSER_HPP
diff --git a/components/core/src/compressor_frontend/LALR1Parser.inc b/components/core/src/compressor_frontend/LALR1Parser.inc
deleted file mode 100644
index 3e82883a3..000000000
--- a/components/core/src/compressor_frontend/LALR1Parser.inc
+++ /dev/null
@@ -1,689 +0,0 @@
-#ifndef COMPRESSOR_FRONTEND_LALR1_PARSER_TPP
-#define COMPRESSOR_FRONTEND_LALR1_PARSER_TPP
-
-#include "LALR1Parser.hpp"
-
-// C++ standard libraries
-#include <functional>
-#include <iostream>
-
-// Boost libraries
-#include<boost/filesystem.hpp>
-
-// Project headers
-#include "../FileReader.hpp"
-#include "../streaming_archive/writer/Archive.hpp"
-
-using compressor_frontend::finite_automata::RegexAST;
-using compressor_frontend::finite_automata::RegexASTCat;
-using compressor_frontend::finite_automata::RegexASTGroup;
-using compressor_frontend::finite_automata::RegexASTInteger;
-using compressor_frontend::finite_automata::RegexASTLiteral;
-using compressor_frontend::finite_automata::RegexASTMultiplication;
-using compressor_frontend::finite_automata::RegexASTOr;
-using std::cout;
-using std::deque;
-using std::holds_alternative;
-using std::make_unique;
-using std::map;
-using std::pair;
-using std::set;
-using std::string;
-using std::unique_ptr;
-using std::vector;
-
-namespace compressor_frontend {
-    template <typename NFAStateType, typename DFAStateType>
-    LALR1Parser<NFAStateType, DFAStateType>::LALR1Parser () : m_archive_writer_ptr(nullptr), root_itemset_ptr(nullptr), m_root_production_id(0) {
-        m_lexer.m_symbol_id[cTokenEnd] = (int) SymbolID::TokenEndID;
-        m_lexer.m_symbol_id[cTokenUncaughtString] = (int) SymbolID::TokenUncaughtStringID;
-        m_lexer.m_symbol_id[cTokenInt] = (int) SymbolID::TokenIntId;
-        m_lexer.m_symbol_id[cTokenFloat] = (int) SymbolID::TokenFloatId;
-        m_lexer.m_symbol_id[cTokenFirstTimestamp] = (int) SymbolID::TokenFirstTimestampId;
-        m_lexer.m_symbol_id[cTokenNewlineTimestamp] = (int) SymbolID::TokenNewlineTimestampId;
-        m_lexer.m_symbol_id[cTokenNewline] = (int) SymbolID::TokenNewlineId;
-
-        m_lexer.m_id_symbol[(int) SymbolID::TokenEndID] = cTokenEnd;
-        m_lexer.m_id_symbol[(int) SymbolID::TokenUncaughtStringID] = cTokenUncaughtString;
-        m_lexer.m_id_symbol[(int) SymbolID::TokenIntId] = cTokenInt;
-        m_lexer.m_id_symbol[(int) SymbolID::TokenFloatId] = cTokenFloat;
-        m_lexer.m_id_symbol[(int) SymbolID::TokenFirstTimestampId] = cTokenFirstTimestamp;
-        m_lexer.m_id_symbol[(int) SymbolID::TokenNewlineTimestampId] = cTokenNewlineTimestamp;
-        m_lexer.m_id_symbol[(int) SymbolID::TokenNewlineId] = cTokenNewline;
-
-        m_terminals.insert((int) SymbolID::TokenEndID);
-        m_terminals.insert((int) SymbolID::TokenUncaughtStringID);
-        m_terminals.insert((int) SymbolID::TokenIntId);
-        m_terminals.insert((int) SymbolID::TokenFloatId);
-        m_terminals.insert((int) SymbolID::TokenFirstTimestampId);
-        m_terminals.insert((int) SymbolID::TokenNewlineTimestampId);
-        m_terminals.insert((int) SymbolID::TokenNewlineId);
-    }
-
-
-    template <typename NFAStateType, typename DFAStateType>
-    void LALR1Parser<NFAStateType, DFAStateType>::add_rule (const string& name, unique_ptr<RegexAST<NFAStateType>> rule) {
-        if (m_lexer.m_symbol_id.find(name) == m_lexer.m_symbol_id.end()) {
-            m_lexer.m_symbol_id[name] = m_lexer.m_symbol_id.size();
-            m_lexer.m_id_symbol[m_lexer.m_symbol_id[name]] = name;
-
-        }
-        m_lexer.add_rule(m_lexer.m_symbol_id[name], std::move(rule));
-        m_terminals.insert(m_lexer.m_symbol_id[name]);
-    }
-
-    template <typename NFAStateType, typename DFAStateType>
-    void LALR1Parser<NFAStateType, DFAStateType>::add_token (const string& name, char rule_char) {
-        add_rule(name, make_unique<RegexASTLiteral<NFAStateType>>(RegexASTLiteral<NFAStateType>(rule_char)));
-    }
-
-    template <typename NFAStateType, typename DFAStateType>
-    void LALR1Parser<NFAStateType, DFAStateType>::add_token_group (const string& name, unique_ptr<RegexASTGroup<NFAStateType>> rule_group) {
-        add_rule(name, std::move(rule_group));
-    }
-
-    template <typename NFAStateType, typename DFAStateType>
-    void LALR1Parser<NFAStateType, DFAStateType>::add_token_chain (const string& name, const string& chain) {
-        assert(chain.size() > 1);
-        unique_ptr<RegexASTLiteral<NFAStateType>> first_char_rule = make_unique<RegexASTLiteral<NFAStateType>>(RegexASTLiteral<NFAStateType>(chain[0]));
-        unique_ptr<RegexASTLiteral<NFAStateType>> second_char_rule = make_unique<RegexASTLiteral<NFAStateType>>(RegexASTLiteral<NFAStateType>(chain[1]));
-        unique_ptr<RegexASTCat<NFAStateType>> rule_chain = make_unique<RegexASTCat<NFAStateType>>(std::move(first_char_rule), std::move(second_char_rule));
-        for (uint32_t i = 2; i < chain.size(); i++) {
-            char next_char = chain[i];
-            unique_ptr<RegexASTLiteral<NFAStateType>> next_char_rule = make_unique<RegexASTLiteral<NFAStateType>>(RegexASTLiteral<NFAStateType>(next_char));
-            rule_chain = make_unique<RegexASTCat<NFAStateType>>(std::move(rule_chain), std::move(next_char_rule));
-        }
-        add_rule(name, std::move(rule_chain));
-    }
-
-    template <typename NFAStateType, typename DFAStateType>
-    uint32_t LALR1Parser<NFAStateType, DFAStateType>::add_production (const string& head, const vector<string>& body, SemanticRule semantic_rule) {
-        if (m_lexer.m_symbol_id.find(head) == m_lexer.m_symbol_id.end()) {
-            m_lexer.m_symbol_id[head] = m_lexer.m_symbol_id.size();
-            m_lexer.m_id_symbol[m_lexer.m_symbol_id[head]] = head;
-        }
-        uint32_t n = m_productions.size();
-        auto it = m_productions_map.find(head);
-        if (it != m_productions_map.end()) {
-            map<vector<string>, Production*>::iterator it2;
-            it2 = it->second.find(body);
-            if (it2 != it->second.end()) {
-                it2->second->m_semantic_rule = semantic_rule;
-                return n;
-            }
-        }
-        unique_ptr<Production> p(new Production);
-        p->m_index = n;
-        p->m_head = m_lexer.m_symbol_id[head];
-        for (const string& symbol_string: body) {
-            if (m_lexer.m_symbol_id.find(symbol_string) == m_lexer.m_symbol_id.end()) {
-                m_lexer.m_symbol_id[symbol_string] = m_lexer.m_symbol_id.size();
-                m_lexer.m_id_symbol[m_lexer.m_symbol_id[symbol_string]] = symbol_string;
-            }
-            p->m_body.push_back(m_lexer.m_symbol_id[symbol_string]);
-        }
-        p->m_semantic_rule = std::move(semantic_rule);
-        m_nonterminals.insert(pair<int, vector<Production*>>(p->m_head, {}));
-        m_nonterminals[p->m_head].push_back(p.get());
-        m_productions_map[head][body] = p.get();
-        m_productions.push_back(std::move(p));
-        if (m_productions.size() == 1) {
-            m_root_production_id = add_production("$START_PRIME", {head}, nullptr);
-        }
-        return n;
-    }
-
-    template <typename NFAStateType, typename DFAStateType>
-    void LALR1Parser<NFAStateType, DFAStateType>::generate () {
-        m_lexer.generate();
-        assert(!m_productions.empty());
-        generate_lr0_kernels();
-        generate_first_sets();
-        generate_lr1_itemsets();
-        generate_lalr1_parsing_table();
-    }
-
-    template <typename NFAStateType, typename DFAStateType>
-    void LALR1Parser<NFAStateType, DFAStateType>::generate_lr0_kernels () {
-        Production* root_production_ptr = m_productions[m_root_production_id].get();
-        Item root_item(root_production_ptr, 0, cNullSymbol);
-        unique_ptr<ItemSet> item_set0 = make_unique<ItemSet>();
-        item_set0->m_kernel.insert(root_item);
-        deque<ItemSet*> unused_item_sets;
-        item_set0->m_index = m_lr0_itemsets.size();
-        unused_item_sets.push_back(item_set0.get());
-        m_lr0_itemsets[item_set0->m_kernel] = std::move(item_set0);
-        while (!unused_item_sets.empty()) {
-            ItemSet* item_set_ptr = unused_item_sets.back();
-            unused_item_sets.pop_back();
-            generate_lr0_closure(item_set_ptr);
-            for (const uint32_t& next_symbol: m_terminals) {
-                ItemSet* new_item_set_ptr = go_to(item_set_ptr, next_symbol);
-                if (new_item_set_ptr != nullptr) {
-                    unused_item_sets.push_back(new_item_set_ptr);
-                }
-            }
-            for (map<uint32_t, vector<Production*>>::value_type const& kv: m_nonterminals) {
-                uint32_t next_symbol = kv.first;
-                ItemSet* new_item_set_ptr = go_to(item_set_ptr, next_symbol);
-                if (new_item_set_ptr != nullptr) {
-                    unused_item_sets.push_back(new_item_set_ptr);
-                }
-            }
-        }
-    }
-
-    template <typename NFAStateType, typename DFAStateType>
-    bool LALR1Parser<NFAStateType, DFAStateType>::lr_closure_helper (ItemSet* item_set_ptr, const Item* item, uint32_t* next_symbol) {
-        if (!item_set_ptr->m_closure.insert(*item).second) { // add {S'->(dot)S, ""}
-            return true;
-        }
-        if (item->has_dot_at_end()) {
-            return true;
-        }
-        *next_symbol = item->next_symbol();
-        if (this->symbol_is_token(*next_symbol)) { // false
-            return true;
-        }
-        return false;
-    }
-
-    template <typename NFAStateType, typename DFAStateType>
-    void LALR1Parser<NFAStateType, DFAStateType>::generate_lr0_closure (ItemSet* item_set_ptr) {
-        deque<Item> q(item_set_ptr->m_kernel.begin(), item_set_ptr->m_kernel.end()); // {{S'->(dot)S, ""}}
-        while (!q.empty()) {
-            Item item = q.back(); // {S'->(dot)S, ""}
-            q.pop_back();
-            uint32_t next_symbol;
-            if (lr_closure_helper(item_set_ptr, &item, &next_symbol)) {
-                continue;
-            }
-            if (m_nonterminals.find(next_symbol) == m_nonterminals.end()) {
-                assert(false);
-            }
-            for (Production* const p: m_nonterminals.at(next_symbol)) { // S -> a
-                q.emplace_back(p, 0, cNullSymbol); // {S -> (dot) a, ""}
-            }
-        }
-    }
-
-    template <typename NFAStateType, typename DFAStateType>
-    ItemSet* LALR1Parser<NFAStateType, DFAStateType>::go_to (ItemSet* from_item_set, const uint32_t& next_symbol) {
-        unique_ptr<ItemSet> next_item_set_ptr = make_unique<ItemSet>();
-        assert(from_item_set != nullptr);
-        for (Item const& item: from_item_set->m_closure) {
-            if (item.has_dot_at_end()) {
-                continue;
-            }
-            if (item.next_symbol() == next_symbol) {
-                next_item_set_ptr->m_kernel.emplace(item.m_production, item.m_dot + 1, item.m_lookahead);
-            }
-        }
-        if (next_item_set_ptr->m_kernel.empty()) {
-            return nullptr;
-        }
-        if (m_lr0_itemsets.find(next_item_set_ptr->m_kernel) != m_lr0_itemsets.end()) {
-            ItemSet* existing_item_set_ptr = m_lr0_itemsets[next_item_set_ptr->m_kernel].get();
-            m_go_to_table[from_item_set->m_index][next_symbol] = existing_item_set_ptr->m_index;
-            from_item_set->m_next[next_symbol] = existing_item_set_ptr;
-        } else {
-            next_item_set_ptr->m_index = m_lr0_itemsets.size();
-            m_go_to_table[from_item_set->m_index][next_symbol] = next_item_set_ptr->m_index;
-            from_item_set->m_next[next_symbol] = next_item_set_ptr.get();
-            m_lr0_itemsets[next_item_set_ptr->m_kernel] = std::move(next_item_set_ptr);
-            return from_item_set->m_next[next_symbol];
-        }
-        return nullptr;
-    }
-
-    template <typename NFAStateType, typename DFAStateType>
-    void LALR1Parser<NFAStateType, DFAStateType>::generate_first_sets () {
-        for (uint32_t const& s: m_terminals) {
-            m_firsts.insert(pair<uint32_t, set<uint32_t>>(s, {s}));
-        }
-        bool changed = true;
-        while (changed) {
-            changed = false;
-            for (const unique_ptr<Production>& p: m_productions) {
-                set<uint32_t>& f = m_firsts[p->m_head];
-                if (p->is_epsilon()) {
-                    changed = changed || m_nullable.insert(p->m_head).second;
-                    continue;
-                }
-                size_t old = f.size();
-                size_t i = 0;
-                for (uint32_t const& s: p->m_body) {
-                    set<uint32_t>& f2 = m_firsts[s];
-                    f.insert(f2.begin(), f2.end());
-                    if (m_nullable.find(s) == m_nullable.end()) {
-                        break;
-                    }
-                    i++;
-                }
-                if (i == p->m_body.size()) {
-                    changed = changed || m_nullable.insert(p->m_head).second;
-                }
-                changed = changed || (f.size() != old);
-            }
-        }
-    }
-
-    template <typename NFAStateType, typename DFAStateType>
-    void LALR1Parser<NFAStateType, DFAStateType>::generate_lr1_itemsets () {
-        for (map<set<Item>, unique_ptr<ItemSet>>::value_type const& kv: m_lr0_itemsets) {
-            for (Item const& l0_item: kv.second->m_kernel) {
-                ItemSet temp_item_set;
-                temp_item_set.m_kernel.insert(l0_item);
-                generate_lr1_closure(&temp_item_set);
-                for (Item const& l1_item: temp_item_set.m_closure) {
-                    if (l1_item.m_lookahead != cNullSymbol) {
-                        m_spontaneous_map[l1_item.m_production].insert(l1_item.m_lookahead);
-                    } else {
-                        if (l1_item.m_dot < l1_item.m_production->m_body.size()) {
-                            Item temp_item(l1_item.m_production, l1_item.m_dot + 1, cNullSymbol);
-                            m_propagate_map[l0_item].insert(temp_item);
-                        }
-                    }
-                }
-            }
-        }
-        map<Item, set<int>> lookaheads;
-        for (map<set<Item>, unique_ptr<ItemSet>>::value_type const& kv: m_lr0_itemsets) {
-            for (Item const& l0_item: kv.second->m_kernel) {
-                lookaheads[l0_item].insert(m_spontaneous_map[l0_item.m_production].begin(),
-                                           m_spontaneous_map[l0_item.m_production].end());
-                if (l0_item.m_production == m_productions[m_root_production_id].get()) {
-                    lookaheads[l0_item].insert((int) SymbolID::TokenEndID);
-                }
-            }
-        }
-        bool changed = true;
-        while (changed) {
-            changed = false;
-            for (map<Item, set<Item>>::value_type& kv: m_propagate_map) {
-                Item item_from = kv.first;
-                for (Item const& item_to: kv.second) {
-                    size_t size_before = lookaheads[item_to].size();
-                    lookaheads[item_to].insert(lookaheads[item_from].begin(), lookaheads[item_from].end());
-                    size_t size_after = lookaheads[item_to].size();
-                    changed = changed || size_after > size_before;
-                }
-            }
-        }
-        for (map<set<Item>, unique_ptr<ItemSet>>::value_type const& kv: m_lr0_itemsets) {
-            unique_ptr<ItemSet> lr1_item_set_ptr = make_unique<ItemSet>();
-            for (Item const& l0_item: kv.second->m_kernel) {
-                for (int const& lookahead: lookaheads[l0_item]) {
-                    Item lr1_item(l0_item.m_production, l0_item.m_dot, lookahead);
-                    lr1_item_set_ptr->m_kernel.insert(lr1_item);
-                }
-                if (l0_item.m_production == m_productions[m_root_production_id].get() && l0_item.m_dot == 0) {
-                    root_itemset_ptr = lr1_item_set_ptr.get();
-                }
-            }
-            generate_lr1_closure(lr1_item_set_ptr.get());
-            lr1_item_set_ptr->m_index = kv.second->m_index;
-            m_lr1_itemsets[lr1_item_set_ptr->m_kernel] = std::move(lr1_item_set_ptr);
-        }
-        // this seems like the wrong way to do this still:
-        for (map<set<Item>, unique_ptr<ItemSet>>::value_type const& kv1: m_lr1_itemsets) {
-            for (map<int, int>::value_type next_index: m_go_to_table[kv1.second->m_index]) {
-                bool success = false;
-                for (map<set<Item>, unique_ptr<ItemSet>>::value_type const& kv2: m_lr1_itemsets) {
-                    if (next_index.second == kv2.second->m_index) {
-                        kv1.second->m_next[next_index.first] = kv2.second.get();
-                        success = true;
-                        break;
-                    }
-                }
-                assert(success);
-            }
-        }
-    }
-
-    template <typename NFAStateType, typename DFAStateType>
-    void LALR1Parser<NFAStateType, DFAStateType>::generate_lr1_closure (ItemSet* item_set_ptr) {
-        deque<Item> queue(item_set_ptr->m_kernel.begin(), item_set_ptr->m_kernel.end());
-        while (!queue.empty()) {
-            Item item = queue.back();
-            queue.pop_back();
-            uint32_t next_symbol;
-            if (lr_closure_helper(item_set_ptr, &item, &next_symbol)) {
-                continue;
-            }
-            vector<uint32_t> lookaheads;
-            size_t pos = item.m_dot + 1;
-            while (pos < item.m_production->m_body.size()) {
-                uint32_t symbol = item.m_production->m_body.at(pos);
-                set<uint32_t> symbol_firsts = m_firsts.find(symbol)->second;
-                lookaheads.insert(lookaheads.end(), std::make_move_iterator(symbol_firsts.begin()),
-                                  std::make_move_iterator(symbol_firsts.end()));
-                if (m_nullable.find(symbol) == m_nullable.end()) {
-                    break;
-                }
-                pos++;
-            }
-            if (pos == item.m_production->m_body.size()) {
-                lookaheads.push_back(item.m_lookahead);
-            }
-            for (Production* const p: m_nonterminals.at(next_symbol)) {
-                for (uint32_t const& l: lookaheads) {
-                    queue.emplace_back(p, 0, l);
-                }
-            }
-        }
-    }
-
-    template <typename NFAStateType, typename DFAStateType>
-    void LALR1Parser<NFAStateType, DFAStateType>::generate_lalr1_parsing_table () {
-        generate_lalr1_goto();
-        generate_lalr1_action();
-    }
-
-    template <typename NFAStateType, typename DFAStateType>
-    void LALR1Parser<NFAStateType, DFAStateType>::generate_lalr1_goto () {
-        // done already at end of generate_lr1_itemsets()?
-    }
-
-    // Dragon book page 253
-    template <typename NFAStateType, typename DFAStateType>
-    void LALR1Parser<NFAStateType, DFAStateType>::generate_lalr1_action () {
-        for (map<set<Item>, unique_ptr<ItemSet>>::value_type const& kv: m_lr1_itemsets) {
-            ItemSet* item_set_ptr = kv.second.get();
-            item_set_ptr->m_actions.resize(m_lexer.m_symbol_id.size(), false);
-            for (Item const& item: item_set_ptr->m_closure) {
-                if (!item.has_dot_at_end()) {
-                    if (m_terminals.find(item.next_symbol()) == m_terminals.end() &&
-                        m_nonterminals.find(item.next_symbol()) == m_nonterminals.end()) {
-                        continue;
-                    }
-                    assert(item_set_ptr->m_next.find(item.next_symbol()) != item_set_ptr->m_next.end());
-                    Action& action = item_set_ptr->m_actions[item.next_symbol()];
-                    if (!holds_alternative<bool>(action)) {
-                        if (holds_alternative<ItemSet*>(action) && std::get<ItemSet*>(action) == item_set_ptr->m_next[item.next_symbol()]) {
-                            continue;
-                        }
-                        cout << "Warning: For symbol " << m_lexer.m_id_symbol[item.next_symbol()] << ", adding shift to "
-                             << item_set_ptr->m_next[item.next_symbol()]->m_index << " causes ";
-                        if (holds_alternative<ItemSet*>(action)) {
-                            cout << "shift-shift conflict with shift to " << std::get<ItemSet*>(action)->m_index << std::endl;
-                        } else {
-                            cout << "shift-reduce conflict with reduction " << m_lexer.m_id_symbol[std::get<Production*>(action)->m_head]
-                                      << "-> {";
-                            for (uint32_t symbol: std::get<Production*>(action)->m_body) {
-                                cout << m_lexer.m_id_symbol[symbol] << ",";
-                            }
-                            cout << "}" << std::endl;
-                        }
-                    }
-                    item_set_ptr->m_actions[item.next_symbol()] = item_set_ptr->m_next[item.next_symbol()];
-                }
-                if (item.has_dot_at_end()) {
-                    if (item.m_production == m_productions[m_root_production_id].get()) {
-                        Action action = true;
-                        item_set_ptr->m_actions[(int) SymbolID::TokenEndID] = action;
-                    } else {
-                        Action& action = item_set_ptr->m_actions[item.m_lookahead];
-                        if (!holds_alternative<bool>(action)) {
-                            cout << "Warning: For symbol " << m_lexer.m_id_symbol[item.m_lookahead]
-                                 << ", adding reduction " << m_lexer.m_id_symbol[item.m_production->m_head] << "-> {";
-                            for (uint32_t symbol: item.m_production->m_body) {
-                                cout << m_lexer.m_id_symbol[symbol] << ",";
-                            }
-                            cout << "} causes ";
-                            if (holds_alternative<ItemSet*>(action)) {
-                                cout << "shift-reduce conflict with shift to " << std::get<ItemSet*>(action)->m_index << std::endl;
-                            } else {
-                                cout << "reduce-reduce conflict with reduction "
-                                          << m_lexer.m_id_symbol[std::get<Production*>(action)->m_head]
-                                          << "-> {";
-                                for (uint32_t symbol: std::get<Production*>(action)->m_body) {
-                                    cout << m_lexer.m_id_symbol[symbol] << ",";
-                                }
-                                cout << "}" << std::endl;
-                            }
-                        }
-                        item_set_ptr->m_actions[item.m_lookahead] = item.m_production;
-                    }
-                }
-            }
-        }
-    }
-
-    static uint32_t get_line_num (MatchedSymbol& top_symbol) {
-        uint32_t line_num = -1;
-        std::stack<MatchedSymbol> symbols;
-        symbols.push(std::move(top_symbol));
-        while (line_num == -1) {
-            assert(!symbols.empty());
-            MatchedSymbol& curr_symbol = symbols.top();
-            std::visit(overloaded{
-                    [&line_num] (Token& token) {
-                        line_num = token.m_line;
-                    },
-                    [&symbols] (NonTerminal& m) {
-                        for (int i = 0; i < m.m_production->m_body.size(); i++) {
-                            symbols.push(std::move(NonTerminal::m_all_children[m.m_children_start + i]));
-                        }
-                    }
-            }, curr_symbol);
-            symbols.pop();
-        }
-        return line_num;
-    }
-
-    template <typename NFAStateType, typename DFAStateType>
-    string LALR1Parser<NFAStateType, DFAStateType>::get_input_after_last_newline (std::stack<MatchedSymbol>& parse_stack_matches) {
-        string error_message_reversed;
-        bool done = false;
-        while (!parse_stack_matches.empty() && !done) {
-            MatchedSymbol top_symbol = std::move(parse_stack_matches.top());
-            parse_stack_matches.pop();
-            std::visit(overloaded{
-                    [&error_message_reversed, &done] (Token& token) {
-                        if (token.get_string() == "\r" || token.get_string() == "\n") {
-                            done = true;
-                        } else {
-                            // input is being read backwards, so reverse each token so that when the entire input is reversed
-                            // each token is displayed correctly
-                            string token_string = token.get_string();
-                            std::reverse(token_string.begin(), token_string.end());
-                            error_message_reversed += token_string;
-                        }
-                    },
-                    [&parse_stack_matches] (NonTerminal& m) {
-                        for (int i = 0; i < m.m_production->m_body.size(); i++) {
-                            parse_stack_matches.push(std::move(NonTerminal::m_all_children[m.m_children_start + i]));
-                        }
-                    }
-            }, top_symbol);
-        }
-        std::reverse(error_message_reversed.begin(), error_message_reversed.end());
-        return error_message_reversed;
-    }
-
-    template <typename NFAStateType, typename DFAStateType>
-    string LALR1Parser<NFAStateType, DFAStateType>::get_input_until_next_newline (ReaderInterface& reader, Token* error_token) {
-        string rest_of_line;
-        bool next_is_end_token = (error_token->m_type_ids->at(0) == (int) SymbolID::TokenEndID);
-        bool next_has_newline = (error_token->get_string().find('\n') != string::npos) || (error_token->get_string().find('\r') != string::npos);
-        while (!next_has_newline && !next_is_end_token) {
-            Token token = get_next_symbol();
-            next_has_newline = (token.get_string().find('\n') != string::npos) || (token.get_string().find('\r') != string::npos);
-            if (!next_has_newline) {
-                rest_of_line += token.get_string();
-                next_is_end_token = (token.m_type_ids->at(0) == (int) SymbolID::TokenEndID);
-            }
-        }
-        rest_of_line += "\n";
-        return rest_of_line;
-    }
-
-    static string unescape (char const& c) {
-        switch (c) {
-            case '\t':
-                return "\\t";
-            case '\r':
-                return "\\r";
-            case '\n':
-                return "\\n";
-            case '\v':
-                return "\\v";
-            case '\f':
-                return "\\f";
-            default:
-                return {c};
-        }
-    }
-
-    template <typename NFAStateType, typename DFAStateType>
-    string LALR1Parser<NFAStateType, DFAStateType>::report_error (ReaderInterface& reader) {
-        assert(m_next_token == std::nullopt);
-        assert(!m_parse_stack_matches.empty());
-        MatchedSymbol top_symbol = std::move(m_parse_stack_matches.top());
-        m_parse_stack_matches.pop();
-        uint32_t line_num = get_line_num(top_symbol);
-        Token token = std::get<Token>(top_symbol);
-        string consumed_input = get_input_after_last_newline(m_parse_stack_matches);
-        string error_type = "unknown error";
-        string error_indicator;
-        Token error_token = token;
-        string rest_of_line = get_input_until_next_newline(reader, &error_token);
-        for (uint32_t i = 0; i < consumed_input.size() + 10; i++) {
-            error_indicator += " ";
-        }
-        error_indicator += "^\n";
-        if (token.m_type_ids->at(0) == (int) SymbolID::TokenEndID && consumed_input.empty()) {
-            error_type = "empty file";
-            error_indicator = "^\n";
-        } else {
-            error_type = "expected ";
-            for (uint32_t i = 0; i < m_parse_stack_states.top()->m_actions.size(); i++) {
-                Action action = m_parse_stack_states.top()->m_actions[i];
-                if (action.index() != 0) {
-                    error_type += "'";
-                    if (auto* regex_ast_literal = dynamic_cast<RegexASTLiteral<NFAStateType>*>(m_lexer.get_rule(i))) {
-                        error_type += unescape(char(regex_ast_literal->get_character()));
-                    } else {
-                        error_type += m_lexer.m_id_symbol[i];
-                    }
-                    error_type += "',";
-                }
-            }
-            error_type.pop_back();
-            error_type += " before '" + unescape(token.get_string()[0]) + "' token";
-        }
-        string file_name = boost::filesystem::canonical((dynamic_cast<FileReader&>(reader)).get_path()).string();
-        string error_string = file_name + ":" + std::to_string(line_num + 1) + ":"
-                                   + std::to_string(consumed_input.size() + 1) + ": error: " + error_type + "\n";
-        for (int i = 0; i < 10; i++) {
-            error_string += " ";
-        }
-        error_string += consumed_input + error_token.get_string() + rest_of_line + error_indicator;
-        return error_string;
-    }
-
-    template <typename NFAStateType, typename DFAStateType>
-    NonTerminal LALR1Parser<NFAStateType, DFAStateType>::parse (ReaderInterface& reader) {
-        reset(reader);
-        m_parse_stack_states.push(root_itemset_ptr);
-        bool accept = false;
-        while (true) {
-            Token next_terminal = get_next_symbol();
-            if (parse_advance(next_terminal, &accept)) {
-                break;
-            }
-        }
-        if (!accept) {
-            throw std::runtime_error(report_error(reader));
-        }
-        assert(!m_parse_stack_matches.empty());
-        MatchedSymbol m = std::move(m_parse_stack_matches.top());
-        m_parse_stack_matches.pop();
-        assert(m_parse_stack_matches.empty());
-        return std::move(std::get<NonTerminal>(m));
-    }
-
-    template <typename NFAStateType, typename DFAStateType>
-    void LALR1Parser<NFAStateType, DFAStateType>::reset (ReaderInterface& reader) {
-        m_next_token = std::nullopt;
-        while (!m_parse_stack_states.empty()) {
-            m_parse_stack_states.pop();
-        }
-        while (!m_parse_stack_matches.empty()) {
-            m_parse_stack_matches.pop();
-        }
-        m_lexer.reset(reader);
-    }
-
-    template <typename NFAStateType, typename DFAStateType>
-    Token LALR1Parser<NFAStateType, DFAStateType>::get_next_symbol () {
-        if (m_next_token == std::nullopt) {
-            Token token = m_lexer.scan();
-            return token;
-        }
-        Token s = std::move(m_next_token.value());
-        m_next_token = std::nullopt;
-        return s;
-    }
-
-    template <typename NFAStateType, typename DFAStateType>
-    bool LALR1Parser<NFAStateType, DFAStateType>::parse_advance (Token& next_token, bool* accept) {
-        for (int const& type: *(next_token.m_type_ids)) {
-            if (parse_symbol(type, next_token, accept)) {
-                return (*accept);
-            }
-        }
-        assert(*accept == false);
-        // For error handling
-        m_parse_stack_matches.push(std::move(next_token));
-        return true;
-    }
-
-    template <typename NFAStateType, typename DFAStateType>
-    bool LALR1Parser<NFAStateType, DFAStateType>::parse_symbol (uint32_t const& type_id, Token& next_token, bool* accept) {
-        ItemSet* curr = m_parse_stack_states.top();
-        Action& it = curr->m_actions[type_id];
-        bool ret;
-        std::visit(overloaded{
-                [&ret, &accept] (bool is_accepting) {
-                    if (!is_accepting) {
-                        ret = false;
-                        return;
-                    }
-                    *accept = true;
-                    ret = true;
-                    return;
-                },
-                [&ret, &next_token, this] (ItemSet* shift) {
-                    m_parse_stack_states.push(shift);
-                    m_parse_stack_matches.push(std::move(next_token));
-                    ret = true;
-                    return;
-                },
-                [&ret, &next_token, this] (Production* reduce) {
-                    m_next_token = std::move(next_token);
-                    NonTerminal matched_nonterminal(reduce);
-                    size_t n = reduce->m_body.size();
-                    for (size_t i = 0; i < n; i++) {
-                        m_parse_stack_states.pop();
-                        NonTerminal::m_all_children[matched_nonterminal.m_children_start + n - i - 1] = std::move(m_parse_stack_matches.top());
-                        m_parse_stack_matches.pop();
-                    }
-                    if (reduce->m_semantic_rule != nullptr) {
-                        m_lexer.set_reduce_pos(m_next_token->m_start_pos - 1);
-                        matched_nonterminal.m_ast = reduce->m_semantic_rule(&matched_nonterminal);
-                    }
-                    ItemSet* curr = m_parse_stack_states.top();
-                    Action const& it = curr->m_actions[matched_nonterminal.m_production->m_head];
-                    m_parse_stack_states.push(std::get<ItemSet*>(it));
-                    m_parse_stack_matches.push(std::move(matched_nonterminal));
-                    ret = true;
-                    return;
-                }
-        }, it);
-        return ret;
-    }
-}
-
-#endif //COMPRESSOR_FRONTEND_LALR1_PARSER_TPP
diff --git a/components/core/src/compressor_frontend/Lexer.hpp b/components/core/src/compressor_frontend/Lexer.hpp
deleted file mode 100644
index 840fbdc22..000000000
--- a/components/core/src/compressor_frontend/Lexer.hpp
+++ /dev/null
@@ -1,199 +0,0 @@
-#ifndef COMPRESSOR_FRONTEND_LEXER_HPP
-#define COMPRESSOR_FRONTEND_LEXER_HPP
-
-// C++ standard libraries
-#include <bitset>
-#include <cstdint>
-#include <map>
-#include <memory>
-#include <string>
-#include <unordered_set>
-#include <vector>
-
-// Project headers
-#include "../ReaderInterface.hpp"
-#include "../Stopwatch.hpp"
-#include "Constants.hpp"
-#include "finite_automata/RegexAST.hpp"
-#include "finite_automata/RegexDFA.hpp"
-#include "finite_automata/RegexNFA.hpp"
-#include "Token.hpp"
-
-using compressor_frontend::finite_automata::RegexAST;
-using compressor_frontend::finite_automata::RegexNFA;
-using compressor_frontend::finite_automata::RegexDFA;
-
-namespace compressor_frontend {
-    template <typename NFAStateType, typename DFAStateType>
-    class Lexer {
-    public:
-        // std::vector<int> can be declared as constexpr in c++20
-        inline static const std::vector<int> cTokenEndTypes = {(int) SymbolID::TokenEndID};
-        inline static const std::vector<int> cTokenUncaughtStringTypes = {(int) SymbolID::TokenUncaughtStringID};
-
-        /**
-         * A lexical rule has a name and regex pattern
-         */
-        struct Rule {
-            // Constructor
-            Rule (int n, std::unique_ptr<RegexAST<NFAStateType>> r) : m_name(n), m_regex(std::move(r)) {}
-
-            /**
-             * Adds AST representing the lexical rule to the NFA
-             * @param nfa
-             */
-            void add_ast (RegexNFA<NFAStateType>* nfa) const;
-
-            int m_name;
-            std::unique_ptr<RegexAST<NFAStateType>> m_regex;
-        };
-
-        // Constructor
-        Lexer () : m_byte_buf_pos(0), m_bytes_read(0), m_line(0), m_fail_pos(0), m_reduce_pos(0), m_match(false), m_match_pos(0), m_start_pos(0),
-                   m_match_line(0), m_last_match_pos(0), m_last_match_line(0), m_type_ids(), m_is_delimiter(), m_is_first_char(), m_static_byte_buf(),
-                   m_finished_reading_file(false), m_at_end_of_file(false), m_last_read_first_half_of_buf(false), m_reader(nullptr), m_has_delimiters(false),
-                   m_active_byte_buf(nullptr), m_byte_buf_ptr(nullptr), m_byte_buf_size_ptr(nullptr), m_static_byte_buf_ptr(nullptr) {
-            for (bool& i: m_is_first_char) {
-                i = false;
-            }
-        }
-
-        /**
-         * Add a delimiters line from the schema to the lexer
-         * @param delimiters
-         */
-        void add_delimiters (const std::vector<uint32_t>& delimiters);
-
-        /**
-         * Add lexical rule to the lexer's list of rules
-         * @param id
-         * @param regex
-         */
-        void add_rule (const uint32_t& id, std::unique_ptr<RegexAST<NFAStateType>> regex);
-
-        /**
-         * Return regex patter for a rule name
-         * @param name
-         * @return RegexAST*
-         */
-        RegexAST<NFAStateType>* get_rule (const uint32_t& name);
-
-        /**
-         * Generate DFA for lexer
-         */
-        void generate ();
-
-        /**
-         * Generate DFA for a reverse lexer matching the reverse of the words in the original language
-         */
-        void generate_reverse ();
-
-        /**
-         * Reset the lexer to start a new lexing (reset buffers, reset vars tracking positions)
-         * @param reader
-         */
-        void reset (ReaderInterface& reader);
-
-        /**
-         * After lexing half of the buffer, reads into that half of the buffer and changes variables accordingly
-         * @param next_children_start
-         */
-        void soft_reset (uint32_t& next_children_start);
-
-        /**
-         * Gets next token from the input string
-         * If next token is an uncaught string, the next variable token is already prepped to be returned on the next call
-         * @return Token
-         */
-        Token scan ();
-
-        /**
-         * scan(), but with wild wildcards in the input string (for search)
-         * @param wildcard
-         * @return Token
-         */
-        Token scan_with_wildcard (char wildcard);
-
-        /**
-         * Sets the position of where the last reduce was performed,
-         * Used to know during lexing if half of the buffer has been lexed and needs to be read into
-         * @param value
-         */
-        void set_reduce_pos (uint32_t value) {
-            m_reduce_pos = value;
-        }
-
-        [[nodiscard]] const bool& get_has_delimiters() const {
-            return m_has_delimiters;
-        }
-
-        [[nodiscard]] const bool& is_delimiter (uint8_t byte) const {
-            return m_is_delimiter[byte];
-        }
-
-        // First character of any variable in the schema
-        [[nodiscard]] const bool& is_first_char (uint8_t byte) const {
-            return m_is_first_char[byte];
-        }
-
-        std::map<std::string, uint32_t> m_symbol_id;
-        std::map<uint32_t, std::string> m_id_symbol;
-        
-    private:
-        /**
-         * Get next character from the input buffer
-         * @return unsigned char
-         */
-        unsigned char get_next_character ();
-
-        /**
-         * Return epsilon_closure over m_epsilon_transitions
-         * @return
-         */
-        std::set<NFAStateType*> epsilon_closure (NFAStateType* state_ptr);
-
-        /**
-        * Generate a DFA from the NFA
-        * @param RegexNFA<NFAStateType> nfa
-        * @return std::unique_ptr<RegexDFA<DFAStateType>>
-        */
-        unique_ptr<RegexDFA<DFAStateType>> nfa_to_dfa (RegexNFA<NFAStateType>& nfa);
-        
-        uint32_t m_fail_pos;
-        uint32_t m_reduce_pos;
-        uint32_t m_match_pos;
-        uint32_t m_start_pos;
-        uint32_t m_match_line;
-        uint32_t m_last_match_pos;
-        uint32_t m_last_match_line;
-        bool m_match;
-        const std::vector<int>* m_type_ids;
-        static uint32_t m_current_buff_size;
-        bool m_is_delimiter[cSizeOfByte];
-        bool m_is_first_char[cSizeOfByte];
-        char* m_active_byte_buf;
-        char** m_byte_buf_ptr;
-        const uint32_t* m_byte_buf_size_ptr;
-        char* m_static_byte_buf_ptr;
-        char m_static_byte_buf[cStaticByteBuffSize];
-        bool m_finished_reading_file;
-        bool m_at_end_of_file;
-        std::vector<Rule> m_rules;
-        uint32_t m_byte_buf_pos;
-        bool m_last_read_first_half_of_buf;
-        size_t m_bytes_read;
-        uint32_t m_line;
-        ReaderInterface* m_reader;
-        bool m_has_delimiters;
-        unique_ptr<RegexDFA<DFAStateType>> m_dfa;
-    };
-
-    namespace lexers {
-        using ByteLexer = Lexer<finite_automata::RegexNFAByteState, finite_automata::RegexDFAByteState>;
-        using UTF8Lexer = Lexer<finite_automata::RegexNFAUTF8State, finite_automata::RegexDFAUTF8State>;
-    };
-}
-
-#include "Lexer.inc"
-
-#endif // COMPRESSOR_FRONTEND_LEXER_HPP
diff --git a/components/core/src/compressor_frontend/Lexer.inc b/components/core/src/compressor_frontend/Lexer.inc
deleted file mode 100644
index 41b6ee7e9..000000000
--- a/components/core/src/compressor_frontend/Lexer.inc
+++ /dev/null
@@ -1,541 +0,0 @@
-#ifndef COMPRESSOR_FRONTEND_LEXER_TPP
-#define COMPRESSOR_FRONTEND_LEXER_TPP
-
-#include "Lexer.hpp"
-
-// C++ standard libraries
-#include <cassert>
-#include <string>
-#include <vector>
-
-// Project headers
-#include "../FileReader.hpp"
-#include "../spdlog_with_specializations.hpp"
-#include "Constants.hpp"
-#include "finite_automata/RegexAST.hpp"
-
-using std::string;
-using std::to_string;
-
-/**
- * utf8 format (https://en.wikipedia.org/wiki/UTF-8)
- * 1 byte: 0x0 - 0x80 : 0xxxxxxx
- * 2 byte: 0x80 - 0x7FF : 110xxxxx 10xxxxxx
- * 3 byte: 0x800 - 0xFFFF : 1110xxxx 10xxxxxx 10xxxxxx
- * 4 byte: 0x10000 - 0x1FFFFF : 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
- */
-namespace compressor_frontend {
-    template <typename NFAStateType, typename DFAStateType>
-    uint32_t Lexer<NFAStateType, DFAStateType>::m_current_buff_size;
-
-    template <typename NFAStateType, typename DFAStateType>
-    void Lexer<NFAStateType, DFAStateType>::soft_reset (uint32_t& next_children_start) {
-        if (next_children_start > cSizeOfAllChildren / 2) {
-            next_children_start = 0;
-        }
-        if (m_finished_reading_file) {
-            return;
-        }
-        if (m_reduce_pos == -1) {
-            m_reduce_pos += m_current_buff_size;
-        }
-        if ((!m_last_read_first_half_of_buf && m_reduce_pos > m_current_buff_size / 2) ||
-            (m_last_read_first_half_of_buf && m_reduce_pos < m_current_buff_size / 2 && m_reduce_pos > 0)) {
-            uint32_t offset = 0;
-            if (m_last_read_first_half_of_buf) {
-                offset = m_current_buff_size / 2;
-            }
-            m_reader->read(m_active_byte_buf + offset, m_current_buff_size / 2, m_bytes_read);
-
-            if (m_bytes_read < m_current_buff_size / 2) {
-                m_finished_reading_file = true;
-            }
-            m_last_read_first_half_of_buf = !m_last_read_first_half_of_buf;
-            m_bytes_read += offset;
-            if (m_reduce_pos >= m_current_buff_size / 2) {
-                m_fail_pos = m_current_buff_size / 2;
-            } else {
-                m_fail_pos = 0;
-            }
-        }
-    }
-
-    template <typename NFAStateType, typename DFAStateType>
-    unsigned char Lexer<NFAStateType, DFAStateType>::get_next_character () {
-        if (m_finished_reading_file && m_byte_buf_pos == m_bytes_read) {
-            m_at_end_of_file = true;
-            return utf8::cCharEOF;
-        }
-        unsigned char character = m_active_byte_buf[m_byte_buf_pos];
-        m_byte_buf_pos++;
-        if (m_byte_buf_pos == m_current_buff_size) {
-            m_byte_buf_pos = 0;
-        }
-        return character;
-    }
-
-    template <typename NFAStateType, typename DFAStateType>
-    Token Lexer<NFAStateType, DFAStateType>::scan () {
-        if (m_match) {
-            m_match = false;
-            m_last_match_pos = m_match_pos;
-            m_last_match_line = m_match_line;
-            return Token{m_start_pos, m_match_pos, m_byte_buf_ptr, m_byte_buf_size_ptr, m_match_line, m_type_ids};
-        }
-        m_start_pos = m_byte_buf_pos;
-        m_match_pos = m_byte_buf_pos;
-        m_match_line = m_line;
-        m_type_ids = nullptr;
-        DFAStateType* state = m_dfa->get_root();
-        while (true) {
-            if (m_byte_buf_pos == m_fail_pos) {
-                string warn = "Long line detected";
-                warn += " at line " + to_string(m_line);
-                warn += " in file " + dynamic_cast<FileReader*>(m_reader)->get_path();
-                warn += " changing to dynamic buffer and increasing buffer size to ";
-                warn += to_string(m_current_buff_size * 2);
-                SPDLOG_WARN(warn);
-                // Found a super long line: for completeness handle this case, but efficiency doesn't matter
-                // 1. copy everything from old buffer into new buffer
-                if (m_active_byte_buf == m_static_byte_buf) {
-                    m_active_byte_buf = (char*) malloc(m_current_buff_size * sizeof(char));
-                    if (m_fail_pos == 0) {
-                        memcpy(m_active_byte_buf, m_static_byte_buf, sizeof(m_static_byte_buf));
-                    } else {
-                        /// TODO: make a test case for this scenario
-                        memcpy(m_active_byte_buf, m_static_byte_buf + sizeof(m_static_byte_buf) / 2, sizeof(m_static_byte_buf) / 2);
-                        memcpy(m_active_byte_buf + sizeof(m_static_byte_buf) / 2, m_static_byte_buf, sizeof(m_static_byte_buf) / 2);
-                        if (m_match_pos >= m_current_buff_size / 2) {
-                            m_match_pos -= m_current_buff_size / 2;
-                        } else {
-                            m_match_pos += m_current_buff_size / 2;
-                        }
-                        if (m_start_pos >= m_current_buff_size / 2) {
-                            m_start_pos -= m_current_buff_size / 2;
-                        } else {
-                            m_start_pos += m_current_buff_size / 2;
-                        }
-                        if (m_last_match_pos >= m_current_buff_size / 2) {
-                            m_last_match_pos -= m_current_buff_size / 2;
-                        } else {
-                            m_last_match_pos += m_current_buff_size / 2;
-                        }
-                    }
-                }
-                m_current_buff_size *= 2;
-                m_active_byte_buf = (char*) realloc(m_active_byte_buf, m_current_buff_size * sizeof(char));
-                m_byte_buf_ptr = &m_active_byte_buf;
-                m_byte_buf_size_ptr = &m_current_buff_size;
-                if (m_active_byte_buf == nullptr) {
-                    SPDLOG_ERROR("failed to allocate byte buffer of size {}", m_current_buff_size);
-                    string err = "Lexer failed to find a match after checking entire buffer";
-                    err += " at line " + to_string(m_line);
-                    err += " in file " + dynamic_cast<FileReader*>(m_reader)->get_path();
-                    dynamic_cast<FileReader*>(m_reader)->close();
-                    throw (err); // this throw allows for continuation of compressing other files 
-                }
-                m_reader->read(m_active_byte_buf + m_current_buff_size / 2, m_current_buff_size / 2, m_bytes_read);
-                m_bytes_read += m_current_buff_size / 2;
-                if (m_bytes_read < m_current_buff_size) {
-                    m_finished_reading_file = true;
-                }
-                m_byte_buf_pos = m_current_buff_size / 2;
-                m_fail_pos = 0;
-            }
-            uint32_t prev_byte_buf_pos = m_byte_buf_pos;
-            unsigned char next_char = get_next_character();
-            if ((m_is_delimiter[next_char] || m_at_end_of_file || !m_has_delimiters) && state->is_accepting()) {
-                m_match = true;
-                m_type_ids = &(state->get_tags());
-                m_match_pos = prev_byte_buf_pos;
-                m_match_line = m_line;
-            }
-            DFAStateType* next = state->next(next_char);
-            if (next_char == '\n') {
-                m_line++;
-                if (m_has_delimiters && !m_match) {
-                    next = m_dfa->get_root()->next(next_char);
-                    m_match = true;
-                    m_type_ids = &(next->get_tags());
-                    m_start_pos = prev_byte_buf_pos;
-                    m_match_pos = m_byte_buf_pos;
-                    m_match_line = m_line;
-                }
-            }
-            if (m_at_end_of_file || next == nullptr) {
-                if (m_match) {
-                    m_at_end_of_file = false;
-                    m_byte_buf_pos = m_match_pos;
-                    m_line = m_match_line;
-                    if (m_last_match_pos != m_start_pos) {
-                        return Token{m_last_match_pos, m_start_pos, m_byte_buf_ptr, m_byte_buf_size_ptr, m_last_match_line, &cTokenUncaughtStringTypes};
-                    }
-                    m_match = false;
-                    m_last_match_pos = m_match_pos;
-                    m_last_match_line = m_match_line;
-                    return Token{m_start_pos, m_match_pos, m_byte_buf_ptr, m_byte_buf_size_ptr, m_match_line, m_type_ids};
-                } else if (m_at_end_of_file && m_start_pos == m_byte_buf_pos) {
-                    if (m_last_match_pos != m_start_pos) {
-                        m_match_pos = m_byte_buf_pos;
-                        m_type_ids = &cTokenEndTypes;
-                        m_match = true;
-                        return Token{m_last_match_pos, m_start_pos, m_byte_buf_ptr, m_byte_buf_size_ptr, m_last_match_line, &cTokenUncaughtStringTypes};
-                    }
-                    return Token{m_byte_buf_pos, m_byte_buf_pos, m_byte_buf_ptr, m_byte_buf_size_ptr, m_line, &cTokenEndTypes};
-                } else {
-                    while (!m_at_end_of_file && !m_is_first_char[next_char]) {
-                        prev_byte_buf_pos = m_byte_buf_pos;
-                        next_char = get_next_character();
-                    }
-                    m_byte_buf_pos = prev_byte_buf_pos;
-                    m_start_pos = prev_byte_buf_pos;
-                    state = m_dfa->get_root();
-                    continue;
-                }
-            }
-            state = next;
-        }
-    }
-
-    /// TODO: this is duplicating almost all the code of scan()
-    template <typename NFAStateType, typename DFAStateType>
-    Token Lexer<NFAStateType, DFAStateType>::scan_with_wildcard (char wildcard) {
-        if (m_match) {
-            m_match = false;
-            m_last_match_pos = m_match_pos;
-            m_last_match_line = m_match_line;
-            return Token{m_start_pos, m_match_pos, m_byte_buf_ptr, m_byte_buf_size_ptr, m_match_line, m_type_ids};
-        }
-        m_start_pos = m_byte_buf_pos;
-        m_match_pos = m_byte_buf_pos;
-        m_match_line = m_line;
-        m_type_ids = nullptr;
-        DFAStateType* state = m_dfa->get_root();
-        while (true) {
-            if (m_byte_buf_pos == m_fail_pos) {
-                string warn = "Long line detected";
-                warn += " at line " + to_string(m_line);
-                warn += " in file " + dynamic_cast<FileReader*>(m_reader)->get_path();
-                warn += " changing to dynamic buffer and increasing buffer size to ";
-                warn += to_string(m_current_buff_size * 2);
-                SPDLOG_WARN(warn);
-                // Found a super long line: for completeness handle this case, but efficiency doesn't matter
-                // 1. copy everything from old buffer into new buffer
-                if (m_active_byte_buf == m_static_byte_buf) {
-                    m_active_byte_buf = (char*) malloc(m_current_buff_size * sizeof(char));
-                    if (m_fail_pos == 0) {
-                        memcpy(m_active_byte_buf, m_static_byte_buf, sizeof(m_static_byte_buf));
-                    } else {
-                        /// TODO: make a test case for this scenario
-                        memcpy(m_active_byte_buf, m_static_byte_buf + sizeof(m_static_byte_buf) / 2, sizeof(m_static_byte_buf) / 2);
-                        memcpy(m_active_byte_buf + sizeof(m_static_byte_buf) / 2, m_static_byte_buf, sizeof(m_static_byte_buf) / 2);
-                        if (m_match_pos >= m_current_buff_size / 2) {
-                            m_match_pos -= m_current_buff_size / 2;
-                        } else {
-                            m_match_pos += m_current_buff_size / 2;
-                        }
-                        if (m_start_pos >= m_current_buff_size / 2) {
-                            m_start_pos -= m_current_buff_size / 2;
-                        } else {
-                            m_start_pos += m_current_buff_size / 2;
-                        }
-                        if (m_last_match_pos >= m_current_buff_size / 2) {
-                            m_last_match_pos -= m_current_buff_size / 2;
-                        } else {
-                            m_last_match_pos += m_current_buff_size / 2;
-                        }
-                    }
-                }
-                m_current_buff_size *= 2;
-                m_active_byte_buf = (char*) realloc(m_active_byte_buf, m_current_buff_size * sizeof(char));
-                m_byte_buf_ptr = &m_active_byte_buf;
-                m_byte_buf_size_ptr = &m_current_buff_size;
-                if (m_active_byte_buf == nullptr) {
-                    SPDLOG_ERROR("failed to allocate byte buffer of size {}", m_current_buff_size);
-                    string err = "Lexer failed to find a match after checking entire buffer";
-                    err += " at line " + to_string(m_line);
-                    err += " in file " + dynamic_cast<FileReader*>(m_reader)->get_path();
-                    dynamic_cast<FileReader*>(m_reader)->close();
-                    throw (err); // this throw allows for continuation of compressing other files 
-                }
-                m_reader->read(m_active_byte_buf + m_current_buff_size / 2, m_current_buff_size / 2, m_bytes_read);
-                m_bytes_read += m_current_buff_size / 2;
-                if (m_bytes_read < m_current_buff_size) {
-                    m_finished_reading_file = true;
-                }
-                m_byte_buf_pos = m_current_buff_size / 2;
-                m_fail_pos = 0;
-            }
-            uint32_t prev_byte_buf_pos = m_byte_buf_pos;
-            unsigned char next_char = get_next_character();
-            if ((m_is_delimiter[next_char] || m_at_end_of_file || !m_has_delimiters) && state->is_accepting()) {
-                m_match = true;
-                m_type_ids = &(state->get_tags());
-                m_match_pos = prev_byte_buf_pos;
-                m_match_line = m_line;
-            }
-            DFAStateType* next = state->next(next_char);
-            if (next_char == '\n') {
-                m_line++;
-                if (m_has_delimiters && !m_match) {
-                    next = m_dfa->get_root()->next(next_char);
-                    m_match = true;
-                    m_type_ids = &(next->get_tags());
-                    m_start_pos = prev_byte_buf_pos;
-                    m_match_pos = m_byte_buf_pos;
-                    m_match_line = m_line;
-                }
-            }
-
-            // !m_at_end_of_file should be impossible
-            // m_match_pos != m_byte_buf_pos --> "te matches from "tes*" (means "tes" isn't a match, so is_var = false)
-            // 
-            if (m_at_end_of_file || next == nullptr) {
-                assert(m_at_end_of_file);
-
-                if (!m_match || (m_match && m_match_pos != m_byte_buf_pos)) {
-                    return Token{m_last_match_pos, m_byte_buf_pos, m_byte_buf_ptr, m_byte_buf_size_ptr, m_last_match_line, &cTokenUncaughtStringTypes};
-                }
-                if (m_match) {
-                    // BFS (keep track of m_type_ids)
-                    if (wildcard == '?') {
-                        for (uint32_t byte = 0; byte < cSizeOfByte; byte++) {
-                            DFAStateType* next_state = state->next(byte);
-                            if (next_state->is_accepting() == false) {
-                                return Token{m_last_match_pos, m_byte_buf_pos, m_byte_buf_ptr, m_byte_buf_size_ptr, m_last_match_line, &cTokenUncaughtStringTypes};
-                            }
-                        }
-                    } else if (wildcard == '*') {
-                        std::stack<DFAStateType*> unvisited_states;
-                        std::set<DFAStateType*> visited_states;
-                        unvisited_states.push(state);
-                        while (!unvisited_states.empty()) {
-                            DFAStateType* current_state = unvisited_states.top();
-                            if (current_state == nullptr || current_state->is_accepting() == false) {
-                                return Token{m_last_match_pos, m_byte_buf_pos, m_byte_buf_ptr, m_byte_buf_size_ptr, m_last_match_line, &cTokenUncaughtStringTypes};
-                            }
-                            unvisited_states.pop();
-                            visited_states.insert(current_state);
-                            for (uint32_t byte = 0; byte < cSizeOfByte; byte++) {
-                                if (m_is_delimiter[byte]) {
-                                    continue;
-                                }
-                                DFAStateType* next_state = current_state->next(byte);
-                                if (visited_states.find(next_state) == visited_states.end()) {
-                                    unvisited_states.push(next_state);
-                                }
-                            }
-                        }
-                    }
-                    m_byte_buf_pos = m_match_pos;
-                    m_line = m_match_line;
-                    m_match = false;
-                    m_last_match_pos = m_match_pos;
-                    m_last_match_line = m_match_line;
-                    return Token{m_start_pos, m_match_pos, m_byte_buf_ptr, m_byte_buf_size_ptr, m_match_line, m_type_ids};
-                }
-            }
-            state = next;
-        }
-    }
-
-    // If reset() is called all Tokens previously created by the lexer are invalid
-    template <typename NFAStateType, typename DFAStateType>
-    void Lexer<NFAStateType, DFAStateType>::reset (ReaderInterface& reader_interface) {
-        m_reader = &reader_interface;
-        m_finished_reading_file = false;
-        m_at_end_of_file = false;
-        m_reduce_pos = 0;
-        m_last_match_pos = 0;
-        m_match = false;
-        m_byte_buf_pos = 0;
-        m_line = 0;
-        m_bytes_read = 0;
-        m_last_read_first_half_of_buf = true;
-        if (m_active_byte_buf != nullptr && m_active_byte_buf != m_static_byte_buf) {
-            free(m_active_byte_buf);
-        }
-        m_static_byte_buf_ptr = m_static_byte_buf;
-        m_active_byte_buf = m_static_byte_buf;
-        m_current_buff_size = cStaticByteBuffSize;
-        m_byte_buf_ptr = &m_static_byte_buf_ptr;
-        m_byte_buf_size_ptr = &cStaticByteBuffSize;
-
-        m_reader->read(m_active_byte_buf, m_current_buff_size / 2, m_bytes_read);
-        if (m_bytes_read < m_current_buff_size / 2) {
-            m_finished_reading_file = true;
-        }
-        m_fail_pos = m_current_buff_size / 2;
-        m_match_pos = 0;
-        m_start_pos = 0;
-        m_match_line = 0;
-        m_last_match_line = 0;
-        m_type_ids = nullptr;
-    }
-
-    template <typename NFAStateType, typename DFAStateType>
-    void Lexer<NFAStateType, DFAStateType>::add_delimiters (const std::vector<uint32_t>& delimiters) {
-        assert(!delimiters.empty());
-        m_has_delimiters = true;
-        for (bool& i: m_is_delimiter) {
-            i = false;
-        }
-        for (uint32_t delimiter: delimiters) {
-            m_is_delimiter[delimiter] = true;
-        }
-    }
-
-    template <typename NFAStateType, typename DFAStateType>
-    void Lexer<NFAStateType, DFAStateType>::add_rule (const uint32_t& id, std::unique_ptr<RegexAST<NFAStateType>> rule) {
-        m_rules.emplace_back(id, std::move(rule));
-    }
-
-    template <typename NFAStateType, typename DFAStateType>
-    RegexAST<NFAStateType>* Lexer<NFAStateType, DFAStateType>::get_rule (const uint32_t& name) {
-        for (Rule& rule: m_rules) {
-            if (rule.m_name == name) {
-                return rule.m_regex.get();
-            }
-        }
-        return nullptr;
-    }
-
-    template <typename NFAStateType, typename DFAStateType>
-    void Lexer<NFAStateType, DFAStateType>::generate () {
-        RegexNFA<NFAStateType> nfa;
-        for (const Rule& r: m_rules) {
-            r.add_ast(&nfa);
-        }
-        m_dfa = nfa_to_dfa(nfa);
-
-        DFAStateType* state = m_dfa->get_root();
-        for (uint32_t i = 0; i < cSizeOfByte; i++) {
-            if (state->next(i) != nullptr) {
-                m_is_first_char[i] = true;
-            } else {
-                m_is_first_char[i] = false;
-            }
-        }
-    }
-
-    template <typename NFAStateType, typename DFAStateType>
-    void Lexer<NFAStateType, DFAStateType>::generate_reverse () {
-        RegexNFA<NFAStateType> nfa;
-        for (const Rule& r: m_rules) {
-            r.add_ast(&nfa);
-        }
-        
-        nfa.reverse();
-
-        m_dfa = nfa_to_dfa(nfa);
-
-        DFAStateType* state = m_dfa->get_root();
-        for (uint32_t i = 0; i < cSizeOfByte; i++) {
-            if (state->next(i) != nullptr) {
-                m_is_first_char[i] = true;
-            } else {
-                m_is_first_char[i] = false;
-            }
-        }
-    }
-
-    template <typename NFAStateType, typename DFAStateType>
-    void Lexer<NFAStateType, DFAStateType>::Rule::add_ast (RegexNFA<NFAStateType>* nfa) const {
-        NFAStateType* s = nfa->new_state();
-        s->set_accepting(true);
-        s->set_tag(m_name);
-        m_regex->add(nfa, s);
-    }
-
-    template <typename NFAStateType, typename DFAStateType>
-    std::set<NFAStateType*> Lexer<NFAStateType, DFAStateType>::epsilon_closure (NFAStateType* state_ptr) {
-        std::set<NFAStateType*> closure_set;
-        std::stack<NFAStateType*> stack;
-        stack.push(state_ptr);
-        while (!stack.empty()) {
-            NFAStateType* t = stack.top();
-            stack.pop();
-            if (closure_set.insert(t).second) {
-                for (NFAStateType* const u: t->get_epsilon_transitions()) {
-                    stack.push(u);
-                }
-            }
-        }
-        return closure_set;
-    }
-
-    template <typename NFAStateType, typename DFAStateType>
-    unique_ptr<RegexDFA<DFAStateType>> Lexer<NFAStateType, DFAStateType>::nfa_to_dfa (RegexNFA<NFAStateType>& nfa) {
-
-        typedef std::set<NFAStateType*> StateSet;
-        unique_ptr<RegexDFA<DFAStateType>> dfa(new RegexDFA<DFAStateType>);
-
-        map<StateSet, DFAStateType*> dfa_states;
-        stack<StateSet> unmarked_sets;
-
-        auto create_dfa_state =
-                [&dfa, &dfa_states, &unmarked_sets] (const StateSet& set) -> DFAStateType* {
-                    DFAStateType* state = dfa->new_state(set);
-                    dfa_states[set] = state;
-                    unmarked_sets.push(set);
-                    return state;
-                };
-
-        StateSet start_set = epsilon_closure(nfa.m_root);
-        create_dfa_state(start_set);
-
-        while (!unmarked_sets.empty()) {
-            StateSet set = unmarked_sets.top();
-            unmarked_sets.pop();
-            DFAStateType* dfa_state = dfa_states.at(set);
-
-            map<uint32_t, StateSet> ascii_transitions_map;
-            // map<Interval, StateSet> transitions_map;
-
-            for (NFAStateType* s0: set) {
-                for (uint32_t i = 0; i < cSizeOfByte; i++) {
-                    for (NFAStateType* const s1: s0->get_byte_transitions(i)) {
-                        StateSet closure = epsilon_closure(s1);
-                        ascii_transitions_map[i].insert(closure.begin(), closure.end());
-                    }
-                }
-
-                /// TODO: add this for the utf8 case
-                //for (const typename NFAStateType::Tree::Data& data: s0->get_tree_transitions().all()) {
-                //    for (NFAStateType* const s1: data.m_value) {
-                //    StateSet closure = epsilon_closure(s1);
-                //        transitions_map[data.m_interval].insert(closure.begin(), closure.end());
-                //    }
-                //}
-
-            }
-
-            auto next_dfa_state =
-                    [&dfa_states, &create_dfa_state] (const StateSet& set) -> DFAStateType* {
-                        DFAStateType* state;
-                        auto it = dfa_states.find(set);
-                        if (it == dfa_states.end()) {
-                            state = create_dfa_state(set);
-                        } else {
-                            state = it->second;
-                        }
-                        return state;
-                    };
-
-            for (const typename map<uint32_t, StateSet>::value_type& kv: ascii_transitions_map) {
-                DFAStateType* dest_state = next_dfa_state(kv.second);
-                dfa_state->add_byte_transition(kv.first, dest_state);
-            }
-
-            /// TODO: add this for the utf8 case
-            //for (const typename map<Interval, typename NFAStateType::StateSet>::value_type& kv: transitions_map) {
-            //    DFAStateType* dest_state = next_dfa_state(kv.second);
-            //    dfa_state->add_tree_transition(kv.first, dest_state);
-            //}
-
-        }
-        return dfa;
-    }
-}
-
-#endif // COMPRESSOR_FRONTEND_LEXER_TPP
diff --git a/components/core/src/compressor_frontend/LogParser.cpp b/components/core/src/compressor_frontend/LogParser.cpp
deleted file mode 100644
index e5ac766dd..000000000
--- a/components/core/src/compressor_frontend/LogParser.cpp
+++ /dev/null
@@ -1,218 +0,0 @@
-#include "LogParser.hpp"
-
-// C++ standard libraries
-#include <filesystem>
-#include <iostream>
-
-// Project headers
-#include "../clp/utils.hpp"
-#include "../spdlog_with_specializations.hpp"
-#include "Constants.hpp"
-#include "SchemaParser.hpp"
-
-using compressor_frontend::finite_automata::RegexAST;
-using compressor_frontend::finite_automata::RegexASTCat;
-using compressor_frontend::finite_automata::RegexASTGroup;
-using compressor_frontend::finite_automata::RegexASTInteger;
-using compressor_frontend::finite_automata::RegexASTLiteral;
-using compressor_frontend::finite_automata::RegexASTMultiplication;
-using compressor_frontend::finite_automata::RegexASTOr;
-using std::make_unique;
-using std::runtime_error;
-using std::string;
-using std::to_string;
-using std::unique_ptr;
-using std::vector;
-
-namespace compressor_frontend {
-    LogParser::LogParser (const string& schema_file_path) {
-        m_active_uncompressed_msg = nullptr;
-        m_uncompressed_msg_size = 0;
-
-        std::unique_ptr<compressor_frontend::SchemaFileAST> schema_ast = compressor_frontend::SchemaParser::try_schema_file(schema_file_path);
-        add_delimiters(schema_ast->m_delimiters);
-        add_rules(schema_ast);
-        m_lexer.generate();
-    }
-
-    void LogParser::add_delimiters (const unique_ptr<ParserAST>& delimiters) {
-        auto delimiters_ptr = dynamic_cast<DelimiterStringAST*>(delimiters.get());
-        if (delimiters_ptr != nullptr) {
-            m_lexer.add_delimiters(delimiters_ptr->m_delimiters);
-        }
-    }
-
-    void LogParser::add_rules (const unique_ptr<SchemaFileAST>& schema_ast) {
-        // Currently, required to have delimiters (if schema_ast->delimiters != nullptr it is already enforced that at least 1 delimiter is specified)
-        if (schema_ast->m_delimiters == nullptr) {
-            throw runtime_error("When using --schema-path, \"delimiters:\" line must be used.");
-        }
-        vector<uint32_t>& delimiters = dynamic_cast<DelimiterStringAST*>(schema_ast->m_delimiters.get())->m_delimiters;
-        add_token("newLine", '\n');
-        for (unique_ptr<ParserAST> const& parser_ast: schema_ast->m_schema_vars) {
-            auto rule = dynamic_cast<SchemaVarAST*>(parser_ast.get());
-
-            // transform '.' from any-character into any non-delimiter character
-            rule->m_regex_ptr->remove_delimiters_from_wildcard(delimiters);
-
-            if (rule->m_name == "timestamp") {
-                unique_ptr<RegexAST<RegexNFAByteState>> first_timestamp_regex_ast(rule->m_regex_ptr->clone());
-                add_rule("firstTimestamp", std::move(first_timestamp_regex_ast));
-                unique_ptr<RegexAST<RegexNFAByteState>> newline_timestamp_regex_ast(rule->m_regex_ptr->clone());
-                unique_ptr<RegexASTLiteral<RegexNFAByteState>> r2 = make_unique<RegexASTLiteral<RegexNFAByteState>>('\n');
-                add_rule("newLineTimestamp", make_unique<RegexASTCat<RegexNFAByteState>>(std::move(r2), std::move(newline_timestamp_regex_ast)));
-                // prevent timestamps from going into the dictionary
-                continue;
-            }
-            // currently, error out if non-timestamp pattern contains a delimiter
-            // check if regex contains a delimiter
-            bool is_possible_input[cUnicodeMax] = {false};
-            rule->m_regex_ptr->set_possible_inputs_to_true(is_possible_input);
-            bool contains_delimiter = false;
-            uint32_t delimiter_name;
-            for (uint32_t delimiter: delimiters) {
-                if (is_possible_input[delimiter]) {
-                    contains_delimiter = true;
-                    delimiter_name = delimiter;
-                    break;
-                }
-            }
-            if (contains_delimiter) {
-                FileReader schema_reader;
-                ErrorCode error_code = schema_reader.try_open(schema_ast->m_file_path);
-                if (ErrorCode_Success != error_code) {
-                    throw std::runtime_error(schema_ast->m_file_path + ":" + to_string(rule->m_line_num + 1) + ": error: '" + rule->m_name
-                                             + "' has regex pattern which contains delimiter '" + char(delimiter_name) + "'.\n");
-                } else {
-                    // more detailed debugging based on looking at the file
-                    string line;
-                    for (uint32_t i = 0; i <= rule->m_line_num; i++) {
-                        schema_reader.read_to_delimiter('\n', false, false, line);
-                    }
-                    int colon_pos = 0;
-                    for (char i : line) {
-                        colon_pos++;
-                        if (i == ':') {
-                            break;
-                        }
-                    }
-                    string indent(10, ' ');
-                    string spaces(colon_pos, ' ');
-                    string arrows(line.size() - colon_pos, '^');
-
-                    throw std::runtime_error(schema_ast->m_file_path + ":" + to_string(rule->m_line_num + 1) + ": error: '" + rule->m_name
-                                             + "' has regex pattern which contains delimiter '" + char(delimiter_name) + "'.\n"
-                                             + indent + line + "\n" + indent + spaces + arrows + "\n");
-                }
-            }
-            unique_ptr<RegexASTGroup<RegexNFAByteState>> delimiter_group =
-                    make_unique<RegexASTGroup<RegexNFAByteState>>(RegexASTGroup<RegexNFAByteState>(delimiters));
-            rule->m_regex_ptr = make_unique<RegexASTCat<RegexNFAByteState>>(std::move(delimiter_group), std::move(rule->m_regex_ptr));
-            add_rule(rule->m_name, std::move(rule->m_regex_ptr));
-        }
-    }
-
-
-    void LogParser::increment_uncompressed_msg_pos (ReaderInterface& reader) {
-        m_uncompressed_msg_pos++;
-        if (m_uncompressed_msg_pos == m_uncompressed_msg_size) {
-            string warn = "Very long line detected";
-            warn += " changing to dynamic uncompressed_msg and increasing size to ";
-            warn += to_string(m_uncompressed_msg_size * 2);
-            SPDLOG_WARN("warn");
-            if (m_active_uncompressed_msg == m_static_uncompressed_msg) {
-                m_active_uncompressed_msg = (Token*) malloc(m_uncompressed_msg_size * sizeof(Token));
-                memcpy(m_active_uncompressed_msg, m_static_uncompressed_msg, sizeof(m_static_uncompressed_msg));
-            }
-            m_uncompressed_msg_size *= 2;
-            m_active_uncompressed_msg = (Token*) realloc(m_active_uncompressed_msg, m_uncompressed_msg_size * sizeof(Token));
-            if (m_active_uncompressed_msg == nullptr) {
-                SPDLOG_ERROR("failed to allocate uncompressed msg of size {}", m_uncompressed_msg_size);
-                string err = "Lexer failed to find a match after checking entire buffer";
-                err += " in file " + dynamic_cast<FileReader&>(reader).get_path();
-                clp::close_file_and_append_to_segment(*m_archive_writer_ptr);
-                dynamic_cast<FileReader&>(reader).close();
-                throw (err); // error of this type will allow the program to continue running to compress other files
-            }
-        }
-    }
-
-    void LogParser::parse (ReaderInterface& reader) {
-        m_uncompressed_msg_pos = 0;
-        if (m_active_uncompressed_msg != m_static_uncompressed_msg) {
-            free(m_active_uncompressed_msg);
-        }
-        m_uncompressed_msg_size = cStaticByteBuffSize;
-        m_active_uncompressed_msg = m_static_uncompressed_msg;
-        reset(reader);
-        m_parse_stack_states.push(root_itemset_ptr);
-        m_active_uncompressed_msg[0] = get_next_symbol();
-        bool has_timestamp = false;
-        if (m_active_uncompressed_msg[0].m_type_ids->at(0) == (int) SymbolID::TokenEndID) {
-            return;
-        }
-        if (m_active_uncompressed_msg[0].m_type_ids->at(0) == (int) SymbolID::TokenFirstTimestampId) {
-            has_timestamp = true;
-            increment_uncompressed_msg_pos(reader);
-        } else {
-            has_timestamp = false;
-            m_archive_writer_ptr->change_ts_pattern(nullptr);
-            m_active_uncompressed_msg[1] = m_active_uncompressed_msg[0];
-            m_uncompressed_msg_pos = 2;
-        }
-        while (true) {
-            m_active_uncompressed_msg[m_uncompressed_msg_pos] = get_next_symbol();
-            int token_type = m_active_uncompressed_msg[m_uncompressed_msg_pos].m_type_ids->at(0);
-            if (token_type == (int) SymbolID::TokenEndID) {
-                m_archive_writer_ptr->write_msg_using_schema(m_active_uncompressed_msg, m_uncompressed_msg_pos,
-                                                             m_lexer.get_has_delimiters(), has_timestamp);
-                break;
-            }
-            bool found_start_of_next_message = (has_timestamp && token_type == (int) SymbolID::TokenNewlineTimestampId) ||
-                                               (!has_timestamp && m_active_uncompressed_msg[m_uncompressed_msg_pos].get_char(0) == '\n' &&
-                                                token_type != (int) SymbolID::TokenNewlineId);
-            bool found_end_of_current_message = !has_timestamp && token_type == (int) SymbolID::TokenNewlineId;
-            if (found_end_of_current_message) {
-                m_lexer.set_reduce_pos(m_active_uncompressed_msg[m_uncompressed_msg_pos].m_end_pos);
-                increment_uncompressed_msg_pos(reader);
-                m_archive_writer_ptr->write_msg_using_schema(m_active_uncompressed_msg, m_uncompressed_msg_pos,
-                                                             m_lexer.get_has_delimiters(), has_timestamp);
-                m_uncompressed_msg_pos = 0;
-                m_lexer.soft_reset(NonTerminal::m_next_children_start);
-            }
-            if (found_start_of_next_message) {
-                increment_uncompressed_msg_pos(reader);
-                m_active_uncompressed_msg[m_uncompressed_msg_pos] = m_active_uncompressed_msg[m_uncompressed_msg_pos - 1];
-                if (m_active_uncompressed_msg[m_uncompressed_msg_pos].m_start_pos == *m_active_uncompressed_msg[m_uncompressed_msg_pos].m_buffer_size_ptr - 1) {
-                    m_active_uncompressed_msg[m_uncompressed_msg_pos].m_start_pos = 0;
-                } else {
-                    m_active_uncompressed_msg[m_uncompressed_msg_pos].m_start_pos++;
-                }
-                m_active_uncompressed_msg[m_uncompressed_msg_pos - 1].m_end_pos =
-                        m_active_uncompressed_msg[m_uncompressed_msg_pos - 1].m_start_pos + 1;
-                m_active_uncompressed_msg[m_uncompressed_msg_pos - 1].m_type_ids = &Lexer<RegexNFAByteState, RegexDFAByteState>::cTokenUncaughtStringTypes;
-                m_lexer.set_reduce_pos(m_active_uncompressed_msg[m_uncompressed_msg_pos].m_start_pos - 1);
-                m_archive_writer_ptr->write_msg_using_schema(m_active_uncompressed_msg, m_uncompressed_msg_pos,
-                                                             m_lexer.get_has_delimiters(), has_timestamp);
-                // switch to timestamped messages if a timestamp is ever found at the start of line (potentially dangerous as it never switches back)
-                /// TODO: potentially switch back if a new line is reached and the message is too long (100x static message size)
-                if (token_type == (int) SymbolID::TokenNewlineTimestampId) {
-                    has_timestamp = true;
-                }
-                if (has_timestamp) {
-                    m_active_uncompressed_msg[0] = m_active_uncompressed_msg[m_uncompressed_msg_pos];
-                    m_uncompressed_msg_pos = 0;
-                } else {
-                    m_active_uncompressed_msg[1] = m_active_uncompressed_msg[m_uncompressed_msg_pos];
-                    m_uncompressed_msg_pos = 1;
-                }
-                m_lexer.soft_reset(NonTerminal::m_next_children_start);
-            }
-            increment_uncompressed_msg_pos(reader);
-        }
-    }
-
-    Token LogParser::get_next_symbol () {
-        return m_lexer.scan();
-    }
-}
diff --git a/components/core/src/compressor_frontend/LogParser.hpp b/components/core/src/compressor_frontend/LogParser.hpp
deleted file mode 100644
index f6c93e4b8..000000000
--- a/components/core/src/compressor_frontend/LogParser.hpp
+++ /dev/null
@@ -1,70 +0,0 @@
-#ifndef COMPRESSOR_FRONTEND_LOGPARSER_HPP
-#define COMPRESSOR_FRONTEND_LOGPARSER_HPP
-
-// C++ standard libraries
-#include <cassert>
-#include <iostream>
-
-// Boost libraries
-#include <boost/filesystem/path.hpp>
-
-// Project headers
-#include "../Stopwatch.hpp"
-#include "LALR1Parser.hpp"
-#include "SchemaParser.hpp"
-
-namespace compressor_frontend {
-
-    using finite_automata::RegexDFAByteState;
-    using finite_automata::RegexNFAByteState;
-
-    /// TODO: try not inheriting from LALR1Parser (and compare c-array vs. vectors (its underlying array) for buffers afterwards)
-    class LogParser : public LALR1Parser<RegexNFAByteState, RegexDFAByteState> {
-    public:
-        // Constructor
-        LogParser (const std::string& schema_file_path);
-
-        /**
-         * /// TODO: this description will need to change after adding it directly into the dictionary writer
-         * Custom parsing for the log that builds up an uncompressed message and then compresses it all at once
-         * @param reader
-         */
-        void parse (ReaderInterface& reader);
-
-        /**
-         * Increment uncompressed message pos, considering swapping to a dynamic buffer (or doubling its size) when the current buffer size is reached
-         * @param reader
-         */
-        void increment_uncompressed_msg_pos (ReaderInterface& reader);
-
-    private:
-        /**
-         * Request the next symbol from the lexer
-         * @return Token
-         */
-        Token get_next_symbol ();
-
-        /**
-         * Add delimiters (originally from the schema AST from the user defined schema) to the log parser
-         * @param delimiters
-         */
-        void add_delimiters (const std::unique_ptr<ParserAST>& delimiters);
-
-        /**
-         * Add log lexing rules (directly from the schema AST from the user defined schema) to the log lexer
-         * Add delimiters to the start of regex formats if delimiters are specified in user defined schema
-         * Timestamps aren't matched mid log message as a variable (as they can contain delimiters, which will break search)
-         * Variables other than timestamps cannot have delimiters
-         * @param schema_ast
-         */
-        void add_rules (const std::unique_ptr<SchemaFileAST>& schema_ast);
-
-        Token* m_active_uncompressed_msg;
-        uint32_t m_uncompressed_msg_size;
-        Token m_static_uncompressed_msg[cStaticByteBuffSize];
-        uint32_t m_uncompressed_msg_pos = 0;
-
-    };
-}
-
-#endif // COMPRESSOR_FRONTEND_LOGPARSER_HPP
diff --git a/components/core/src/compressor_frontend/SchemaParser.cpp b/components/core/src/compressor_frontend/SchemaParser.cpp
deleted file mode 100644
index 419ddee4e..000000000
--- a/components/core/src/compressor_frontend/SchemaParser.cpp
+++ /dev/null
@@ -1,463 +0,0 @@
-#include "SchemaParser.hpp"
-
-// C++ libraries
-#include <cmath>
-#include <memory>
-
-// Project headers
-#include "../FileReader.hpp"
-#include "../spdlog_with_specializations.hpp"
-#include "Constants.hpp"
-#include "finite_automata/RegexAST.hpp"
-#include "LALR1Parser.hpp"
-#include "Lexer.hpp"
-
-using RegexASTByte = compressor_frontend::finite_automata::RegexAST<compressor_frontend::finite_automata::RegexNFAByteState>;
-using RegexASTGroupByte = compressor_frontend::finite_automata::RegexASTGroup<compressor_frontend::finite_automata::RegexNFAByteState>;
-using RegexASTIntegerByte = compressor_frontend::finite_automata::RegexASTInteger<compressor_frontend::finite_automata::RegexNFAByteState>;
-using RegexASTLiteralByte = compressor_frontend::finite_automata::RegexASTLiteral<compressor_frontend::finite_automata::RegexNFAByteState>;
-using RegexASTMultiplicationByte = compressor_frontend::finite_automata::RegexASTMultiplication<compressor_frontend::finite_automata::RegexNFAByteState>;
-using RegexASTOrByte = compressor_frontend::finite_automata::RegexASTOr<compressor_frontend::finite_automata::RegexNFAByteState>;
-using RegexASTCatByte = compressor_frontend::finite_automata::RegexASTCat<compressor_frontend::finite_automata::RegexNFAByteState>;
-
-
-using std::make_unique;
-using std::string;
-using std::unique_ptr;
-
-namespace compressor_frontend {
-    SchemaParser::SchemaParser () {
-        add_lexical_rules();
-        add_productions();
-        generate();
-    }
-
-    unique_ptr<SchemaFileAST> SchemaParser::generate_schema_ast (ReaderInterface& reader) {
-        NonTerminal nonterminal = parse(reader);
-        std::unique_ptr<SchemaFileAST> schema_file_ast(dynamic_cast<SchemaFileAST*>(nonterminal.getParserAST().release()));
-        return std::move(schema_file_ast);
-    }
-
-    unique_ptr<SchemaFileAST> SchemaParser::try_schema_file (const string& schema_file_path) {
-        FileReader schema_reader;
-        ErrorCode error_code = schema_reader.try_open(schema_file_path);
-        if (ErrorCode_Success != error_code) {
-            if (ErrorCode_FileNotFound == error_code) {
-                SPDLOG_ERROR("'{}' does not exist.", schema_file_path);
-            } else if (ErrorCode_errno == error_code) {
-                SPDLOG_ERROR("Failed to read '{}', errno={}", schema_file_path, errno);
-            } else {
-                SPDLOG_ERROR("Failed to read '{}', error_code={}", schema_file_path, error_code);
-            }
-            return nullptr;
-        }
-        SchemaParser sp;
-        unique_ptr<SchemaFileAST> schema_ast = sp.generate_schema_ast(schema_reader);
-        schema_reader.close();
-        schema_ast->m_file_path = std::filesystem::canonical(schema_reader.get_path()).string();
-        return schema_ast;
-    }
-
-    static unique_ptr<IdentifierAST> new_identifier_rule (NonTerminal* m) {
-        string r1 = m->token_cast(0)->get_string();
-        return make_unique<IdentifierAST>(IdentifierAST(r1[0]));
-    }
-
-    static unique_ptr<ParserAST> existing_identifier_rule (NonTerminal* m) {
-        unique_ptr<ParserAST>& r1 = m->nonterminal_cast(0)->getParserAST();
-        auto* r1_ptr = dynamic_cast<IdentifierAST*>(r1.get());
-        string r2 = m->token_cast(1)->get_string();
-        r1_ptr->add_character(r2[0]);
-        return std::move(r1);
-    }
-
-    static unique_ptr<SchemaVarAST> schema_var_rule (NonTerminal* m) {
-        auto* r2 = dynamic_cast<IdentifierAST*>(m->nonterminal_cast(1)->getParserAST().get());
-        Token* colon_token = m->token_cast(2);
-        auto& r4 = m->nonterminal_cast(3)->getParserAST()->get<unique_ptr<RegexASTByte>>();
-        return make_unique<SchemaVarAST>(r2->m_name, std::move(r4), colon_token->m_line);
-    }
-
-    static unique_ptr<SchemaFileAST> new_schema_file_rule (NonTerminal* m) {
-        return make_unique<SchemaFileAST>();
-    }
-
-    static unique_ptr<SchemaFileAST> new_schema_file_rule_with_var (NonTerminal* m) {
-        unique_ptr<ParserAST>& r1 = m->nonterminal_cast(0)->getParserAST();
-        unique_ptr<SchemaFileAST> schema_file_ast = make_unique<SchemaFileAST>();
-        schema_file_ast->add_schema_var(std::move(r1));
-        return std::move(schema_file_ast);
-    }
-
-
-    static unique_ptr<SchemaFileAST> new_schema_file_rule_with_delimiters (NonTerminal* m) {
-        unique_ptr<ParserAST>& r1 = m->nonterminal_cast(2)->getParserAST();
-        unique_ptr<SchemaFileAST> schema_file_ast = make_unique<SchemaFileAST>();
-        schema_file_ast->set_delimiters(std::move(r1));
-        return std::move(schema_file_ast);
-    }
-
-    static unique_ptr<SchemaFileAST> existing_schema_file_rule_with_delimiter (NonTerminal* m) {
-        unique_ptr<ParserAST>& r1 = m->nonterminal_cast(0)->getParserAST();
-        std::unique_ptr<SchemaFileAST> schema_file_ast(dynamic_cast<SchemaFileAST*>(r1.release()));
-        unique_ptr<ParserAST>& r5 = m->nonterminal_cast(4)->getParserAST();
-        schema_file_ast->set_delimiters(std::move(r5));
-        return std::move(schema_file_ast);
-    }
-
-    unique_ptr<SchemaFileAST> SchemaParser::existing_schema_file_rule (NonTerminal* m) {
-        unique_ptr<ParserAST>& r1 = m->nonterminal_cast(0)->getParserAST();
-        std::unique_ptr<SchemaFileAST> schema_file_ast(dynamic_cast<SchemaFileAST*>(r1.release()));
-        unique_ptr<ParserAST>& r2 = m->nonterminal_cast(2)->getParserAST();
-        schema_file_ast->add_schema_var(std::move(r2));
-        m_lexer.soft_reset(NonTerminal::m_next_children_start);
-        return std::move(schema_file_ast);
-    }
-
-    static unique_ptr<SchemaFileAST> identity_rule_ParserASTSchemaFile (NonTerminal* m) {
-        unique_ptr<ParserAST>& r1 = m->nonterminal_cast(0)->getParserAST();
-        std::unique_ptr<SchemaFileAST> schema_file_ast(dynamic_cast<SchemaFileAST*>(r1.release()));
-        return std::move(schema_file_ast);
-    }
-    
-    typedef ParserValue<unique_ptr<RegexASTByte>> ParserValueRegex;
-
-    static unique_ptr<ParserAST> regex_identity_rule (NonTerminal* m) {
-        return unique_ptr<ParserAST>(
-                new ParserValueRegex(std::move(m->nonterminal_cast(0)->getParserAST()->get<unique_ptr<RegexASTByte>>())));
-    }
-
-    static unique_ptr<ParserAST> regex_cat_rule (NonTerminal* m) {
-        auto& r1 = m->nonterminal_cast(0)->getParserAST()->get<unique_ptr<RegexASTByte>>();
-        auto& r2 = m->nonterminal_cast(1)->getParserAST()->get<unique_ptr<RegexASTByte>>();
-        return unique_ptr<ParserAST>(new ParserValueRegex(unique_ptr<RegexASTByte>(new RegexASTCatByte(std::move(r1), std::move(r2)))));
-    }
-
-    static unique_ptr<ParserAST> regex_or_rule (NonTerminal* m) {
-        auto& r1 = m->nonterminal_cast(0)->getParserAST()->get<unique_ptr<RegexASTByte>>();
-        auto& r2 = m->nonterminal_cast(2)->getParserAST()->get<unique_ptr<RegexASTByte>>();
-        return unique_ptr<ParserAST>(new ParserValueRegex(unique_ptr<RegexASTByte>(new RegexASTOrByte(std::move(r1), std::move(r2)))));
-    }
-
-    static unique_ptr<ParserAST> regex_match_zero_or_more_rule (NonTerminal* m) {
-        auto& r1 = m->nonterminal_cast(0)->getParserAST()->get<unique_ptr<RegexASTByte>>();
-        return unique_ptr<ParserAST>(new ParserValueRegex(unique_ptr<RegexASTByte>(new RegexASTMultiplicationByte(std::move(r1), 0, 0))));
-    }
-
-    static unique_ptr<ParserAST> regex_match_one_or_more_rule (NonTerminal* m) {
-        auto& r1 = m->nonterminal_cast(0)->getParserAST()->get<unique_ptr<RegexASTByte>>();
-        return unique_ptr<ParserAST>(new ParserValueRegex(unique_ptr<RegexASTByte>(new RegexASTMultiplicationByte(std::move(r1), 1, 0))));
-    }
-
-    static unique_ptr<ParserAST> regex_match_exactly_rule (NonTerminal* m) {
-        auto& r3 = m->nonterminal_cast(2)->getParserAST()->get<unique_ptr<RegexASTByte>>();
-        auto* r3_ptr = dynamic_cast<RegexASTIntegerByte*>(r3.get());
-        uint32_t reps = 0;
-        uint32_t r3_size = r3_ptr->get_digits().size();
-        for (uint32_t i = 0; i < r3_size; i++) {
-            reps += r3_ptr->get_digit(i) * (uint32_t) pow(10, r3_size - i - 1);
-        }
-        auto& r1 = m->nonterminal_cast(0)->getParserAST()->get<unique_ptr<RegexASTByte>>();
-        return unique_ptr<ParserAST>(new ParserValueRegex(unique_ptr<RegexASTByte>(new RegexASTMultiplicationByte(std::move(r1), reps, reps))));
-    }
-
-    static unique_ptr<ParserAST> regex_match_range_rule (NonTerminal* m) {
-        auto& r3 = m->nonterminal_cast(2)->getParserAST()->get<unique_ptr<RegexASTByte>>();
-        auto* r3_ptr = dynamic_cast<RegexASTIntegerByte*>(r3.get());
-        uint32_t min = 0;
-        uint32_t r3_size = r3_ptr->get_digits().size();
-        for (uint32_t i = 0; i < r3_size; i++) {
-            min += r3_ptr->get_digit(i) * (uint32_t) pow(10, r3_size - i - 1);
-        }
-        auto& r5 = m->nonterminal_cast(4)->getParserAST()->get<unique_ptr<RegexASTByte>>();
-        auto* r5_ptr = dynamic_cast<RegexASTIntegerByte*>(r5.get());
-        uint32_t max = 0;
-        uint32_t r5_size = r5_ptr->get_digits().size();
-        for (uint32_t i = 0; i < r5_size; i++) {
-            max += r5_ptr->get_digit(i) * (uint32_t) pow(10, r5_size - i - 1);
-        }
-        auto& r1 = m->nonterminal_cast(0)->getParserAST()->get<unique_ptr<RegexASTByte>>();
-        return unique_ptr<ParserAST>(new ParserValueRegex(unique_ptr<RegexASTByte>(new RegexASTMultiplicationByte(std::move(r1), min, max))));
-    }
-
-    static unique_ptr<ParserAST> regex_add_literal_existing_group_rule (NonTerminal* m) {
-        auto& r1 = m->nonterminal_cast(0)->getParserAST()->get<unique_ptr<RegexASTByte>>();
-        auto& r2 = m->nonterminal_cast(1)->getParserAST()->get<unique_ptr<RegexASTByte>>();
-        auto* r1_ptr = dynamic_cast<RegexASTGroupByte*>(r1.get());
-        auto* r2_ptr = dynamic_cast<RegexASTLiteralByte*>(r2.get());
-        return unique_ptr<ParserAST>(new ParserValueRegex(unique_ptr<RegexASTByte>(new RegexASTGroupByte(r1_ptr, r2_ptr))));
-    }
-
-    static unique_ptr<ParserAST> regex_add_range_existing_group_rule (NonTerminal* m) {
-        auto& r1 = m->nonterminal_cast(0)->getParserAST()->get<unique_ptr<RegexASTByte>>();
-        auto& r2 = m->nonterminal_cast(1)->getParserAST()->get<unique_ptr<RegexASTByte>>();
-        auto* r1_ptr = dynamic_cast<RegexASTGroupByte*>(r1.get());
-        auto* r2_ptr = dynamic_cast<RegexASTGroupByte*>(r2.get());
-        return unique_ptr<ParserAST>(new ParserValueRegex(unique_ptr<RegexASTByte>(new RegexASTGroupByte(r1_ptr, r2_ptr))));
-    }
-
-    static unique_ptr<ParserAST> regex_add_literal_new_group_rule (NonTerminal* m) {
-        auto& r2 = m->nonterminal_cast(1)->getParserAST()->get<unique_ptr<RegexASTByte>>();
-        auto* r2_ptr = dynamic_cast<RegexASTLiteralByte*>(r2.get());
-        return unique_ptr<ParserAST>(new ParserValueRegex(unique_ptr<RegexASTByte>(new RegexASTGroupByte(r2_ptr))));
-    }
-
-    static unique_ptr<ParserAST> regex_add_range_new_group_rule (NonTerminal* m) {
-        auto& r2 = m->nonterminal_cast(1)->getParserAST()->get<unique_ptr<RegexASTByte>>();
-        auto* r2_ptr = dynamic_cast<RegexASTGroupByte*>(r2.get());
-        return unique_ptr<ParserAST>(new ParserValueRegex(unique_ptr<RegexASTByte>(new RegexASTGroupByte(r2_ptr))));
-    }
-
-    static unique_ptr<ParserAST> regex_complement_incomplete_group_rule (NonTerminal* m) {
-        return unique_ptr<ParserAST>(new ParserValueRegex(make_unique<RegexASTGroupByte>()));
-    }
-
-    static unique_ptr<ParserAST> regex_range_rule (NonTerminal* m) {
-        auto& r1 = m->nonterminal_cast(0)->getParserAST()->get<unique_ptr<RegexASTByte>>();
-        auto& r2 = m->nonterminal_cast(2)->getParserAST()->get<unique_ptr<RegexASTByte>>();
-        auto* r1_ptr = dynamic_cast<RegexASTLiteralByte*>(r1.get());
-        auto* r2_ptr = dynamic_cast<RegexASTLiteralByte*>(r2.get());
-        return unique_ptr<ParserAST>(new ParserValueRegex(unique_ptr<RegexASTByte>(new RegexASTGroupByte(r1_ptr, r2_ptr))));
-    }
-
-    static unique_ptr<ParserAST> regex_middle_identity_rule (NonTerminal* m) {
-        return unique_ptr<ParserAST>(
-                new ParserValueRegex(std::move(m->nonterminal_cast(1)->getParserAST()->get<unique_ptr<RegexASTByte>>())));
-    }
-
-    static unique_ptr<ParserAST> regex_literal_rule (NonTerminal* m) {
-        Token* token = m->token_cast(0);
-        assert(token->get_string().size() == 1);
-        return unique_ptr<ParserAST>(new ParserValueRegex(unique_ptr<RegexASTByte>(
-                new RegexASTLiteralByte(token->get_string()[0]))));
-    }
-
-    static unique_ptr<ParserAST> regex_cancel_literal_rule (NonTerminal* m) {
-        Token* token = m->token_cast(1);
-        assert(token->get_string().size() == 1);
-        return unique_ptr<ParserAST>(new ParserValueRegex(unique_ptr<RegexASTByte>(
-                new RegexASTLiteralByte(token->get_string()[0]))));
-    }
-
-    static unique_ptr<ParserAST> regex_existing_integer_rule (NonTerminal* m) {
-        auto& r2 = m->nonterminal_cast(0)->getParserAST()->get<unique_ptr<RegexASTByte>>();
-        auto* r2_ptr = dynamic_cast<RegexASTIntegerByte*>(r2.get());
-        Token* token = m->token_cast(1);
-        assert(token->get_string().size() == 1);
-        return unique_ptr<ParserAST>(new ParserValueRegex(unique_ptr<RegexASTByte>(new RegexASTIntegerByte(r2_ptr, token->get_string()[0]))));
-    }
-
-    static unique_ptr<ParserAST> regex_new_integer_rule (NonTerminal* m) {
-        Token* token = m->token_cast(0);
-        assert(token->get_string().size() == 1);
-        return unique_ptr<ParserAST>(new ParserValueRegex(unique_ptr<RegexASTByte>(
-                new RegexASTIntegerByte(token->get_string()[0]))));
-    }
-
-    static unique_ptr<ParserAST> regex_digit_rule (NonTerminal* m) {
-        return unique_ptr<ParserAST>(new ParserValueRegex(unique_ptr<RegexASTByte>(new RegexASTGroupByte('0', '9'))));
-    }
-
-    static unique_ptr<ParserAST> regex_wildcard_rule (NonTerminal* m) {
-        unique_ptr<RegexASTGroupByte> regex_wildcard = make_unique<RegexASTGroupByte>(0, cUnicodeMax);
-        regex_wildcard->set_is_wildcard_true();
-        return unique_ptr<ParserAST>(new ParserValueRegex(std::move(regex_wildcard)));
-    }
-
-    static unique_ptr<ParserAST> regex_vertical_tab_rule (NonTerminal* m) {
-        return unique_ptr<ParserAST>(new ParserValueRegex(unique_ptr<RegexASTByte>(new RegexASTLiteralByte('\v'))));
-    }
-
-    static unique_ptr<ParserAST> regex_form_feed_rule (NonTerminal* m) {
-        return unique_ptr<ParserAST>(new ParserValueRegex(unique_ptr<RegexASTByte>(new RegexASTLiteralByte('\f'))));
-    }
-
-    static unique_ptr<ParserAST> regex_tab_rule (NonTerminal* m) {
-        return unique_ptr<ParserAST>(new ParserValueRegex(unique_ptr<RegexASTByte>(new RegexASTLiteralByte('\t'))));
-    }
-
-    static unique_ptr<ParserAST> regex_char_return_rule (NonTerminal* m) {
-        return unique_ptr<ParserAST>(new ParserValueRegex(unique_ptr<RegexASTByte>(new RegexASTLiteralByte('\r'))));
-    }
-
-    static unique_ptr<ParserAST> regex_newline_rule (NonTerminal* m) {
-        return unique_ptr<ParserAST>(new ParserValueRegex(unique_ptr<RegexASTByte>(new RegexASTLiteralByte('\n'))));
-    }
-
-    static unique_ptr<ParserAST> regex_white_space_rule (NonTerminal* m) {
-        unique_ptr<RegexASTGroupByte> regex_ast_group = make_unique<RegexASTGroupByte>(RegexASTGroupByte({' ', '\t', '\r', '\n', '\v', '\f'}));
-        return unique_ptr<ParserAST>(new ParserValueRegex(unique_ptr<RegexASTByte>(std::move(regex_ast_group))));
-    }
-
-    static unique_ptr<ParserAST> existing_delimiter_string_rule (NonTerminal* m) {
-        unique_ptr<ParserAST>& r1 = m->nonterminal_cast(0)->getParserAST();
-        auto& r2 = m->nonterminal_cast(1)->getParserAST()->get<unique_ptr<RegexASTByte>>();
-        auto* r1_ptr = dynamic_cast<DelimiterStringAST*>(r1.get());
-        uint32_t character = dynamic_cast<RegexASTLiteralByte*>(r2.get())->get_character();
-        r1_ptr->add_delimiter(character);
-        return std::move(r1);
-    }
-
-    static unique_ptr<ParserAST> new_delimiter_string_rule (NonTerminal* m) {
-        auto& r1 = m->nonterminal_cast(0)->getParserAST()->get<unique_ptr<RegexASTByte>>();
-        uint32_t character = dynamic_cast<RegexASTLiteralByte*>(r1.get())->get_character();
-        return make_unique<DelimiterStringAST>(character);
-    }
-
-    void SchemaParser::add_lexical_rules () {
-        add_token("Tab", '\t'); //9
-        add_token("NewLine", '\n'); //10
-        add_token("VerticalTab", '\v'); //11
-        add_token("FormFeed", '\f'); //12
-        add_token("CarriageReturn", '\r'); //13
-        add_token("Space", ' ');
-        add_token("Bang", '!');
-        add_token("Quotation", '"');
-        add_token("Hash", '#');
-        add_token("DollarSign", '$');
-        add_token("Percent", '%');
-        add_token("Ampersand", '&');
-        add_token("Apostrophe", '\'');
-        add_token("Lparen", '(');
-        add_token("Rparen", ')');
-        add_token("Star", '*');
-        add_token("Plus", '+');
-        add_token("Comma", ',');
-        add_token("Dash", '-');
-        add_token("Dot", '.');
-        add_token("ForwardSlash", '/');
-        add_token_group("Numeric", make_unique<RegexASTGroupByte>('0', '9'));
-        add_token("Colon", ':');
-        add_token("SemiColon", ';');
-        add_token("LAngle", '<');
-        add_token("Equal", '=');
-        add_token("RAngle", '>');
-        add_token("QuestionMark", '?');
-        add_token("At", '@');
-        add_token_group("AlphaNumeric", make_unique<RegexASTGroupByte>('a', 'z'));
-        add_token_group("AlphaNumeric", make_unique<RegexASTGroupByte>('A', 'Z'));
-        add_token_group("AlphaNumeric", make_unique<RegexASTGroupByte>('0', '9'));
-        add_token("Lbracket", '[');
-        add_token("Backslash", '\\');
-        add_token("Rbracket", ']');
-        add_token("Hat", '^');
-        add_token("Underscore", '_');
-        add_token("Backtick", '`');
-        add_token("Lbrace", '{');
-        add_token("Vbar", '|');
-        add_token("Rbrace", '}');
-        add_token("Tilde", '~');
-        add_token("d", 'd');
-        add_token("s", 's');
-        add_token("n", 'n');
-        add_token("r", 'r');
-        add_token("t", 't');
-        add_token("f", 'f');
-        add_token("v", 'v');
-        add_token_chain("Delimiters", "delimiters");
-        // default constructs to a m_negate group
-        unique_ptr<RegexASTGroupByte> comment_characters = make_unique<RegexASTGroupByte>();
-        comment_characters->add_literal('\r');
-        comment_characters->add_literal('\n');
-        add_token_group("CommentCharacters", std::move(comment_characters));
-    }
-
-    void SchemaParser::add_productions () {
-        // add_production("SchemaFile", {}, new_schema_file_rule);
-        add_production("SchemaFile", {"Comment"}, new_schema_file_rule);
-        add_production("SchemaFile", {"SchemaVar"}, new_schema_file_rule_with_var);
-        add_production("SchemaFile", {"Delimiters", "Colon", "DelimiterString"}, new_schema_file_rule_with_delimiters);
-        add_production("SchemaFile", {"SchemaFile", "PortableNewLine"}, identity_rule_ParserASTSchemaFile);
-        add_production("SchemaFile", {"SchemaFile", "PortableNewLine", "Comment"}, identity_rule_ParserASTSchemaFile);
-        add_production("SchemaFile", {"SchemaFile", "PortableNewLine", "SchemaVar"},
-                       std::bind(&SchemaParser::existing_schema_file_rule, this, std::placeholders::_1));
-        add_production("SchemaFile", {"SchemaFile", "PortableNewLine", "Delimiters", "Colon", "DelimiterString"}, existing_schema_file_rule_with_delimiter);
-        add_production("DelimiterString", {"DelimiterString", "Literal"}, existing_delimiter_string_rule);
-        add_production("DelimiterString", {"Literal"}, new_delimiter_string_rule);
-        add_production("PortableNewLine", {"CarriageReturn", "NewLine"}, nullptr);
-        add_production("PortableNewLine", {"NewLine"}, nullptr);
-        add_production("Comment", {"ForwardSlash", "ForwardSlash", "Text"}, nullptr);
-        add_production("Text", {"Text", "CommentCharacters"}, nullptr);
-        add_production("Text", {"CommentCharacters"}, nullptr);
-        add_production("Text", {"Text", "Delimiters"}, nullptr);
-        add_production("Text", {"Delimiters"}, nullptr);
-        add_production("SchemaVar", {"WhitespaceStar", "Identifier", "Colon", "Regex"}, schema_var_rule);
-        add_production("Identifier", {"Identifier", "AlphaNumeric"}, existing_identifier_rule);
-        add_production("Identifier", {"AlphaNumeric"}, new_identifier_rule);
-        add_production("WhitespaceStar", {"WhitespaceStar", "Space"}, nullptr);
-        add_production("WhitespaceStar", {}, nullptr);
-        add_production("Regex", {"Concat"}, regex_identity_rule);
-        add_production("Concat", {"Concat", "Or"}, regex_cat_rule);
-        add_production("Concat", {"Or"}, regex_identity_rule);
-        add_production("Or", {"Or", "Vbar", "Literal"}, regex_or_rule);
-        add_production("Or", {"MatchStar"}, regex_identity_rule);
-        add_production("Or", {"MatchPlus"}, regex_identity_rule);
-        add_production("Or", {"MatchExact"}, regex_identity_rule);
-        add_production("Or", {"MatchRange"}, regex_identity_rule);
-        add_production("Or", {"CompleteGroup"}, regex_identity_rule);
-        add_production("MatchStar", {"CompleteGroup", "Star"}, regex_match_zero_or_more_rule);
-        add_production("MatchPlus", {"CompleteGroup", "Plus"}, regex_match_one_or_more_rule);
-        add_production("MatchExact", {"CompleteGroup", "Lbrace", "Integer", "Rbrace"}, regex_match_exactly_rule);
-        add_production("MatchRange", {"CompleteGroup", "Lbrace", "Integer", "Comma", "Integer", "Rbrace"}, regex_match_range_rule);
-        add_production("CompleteGroup", {"IncompleteGroup", "Rbracket"}, regex_identity_rule);
-        add_production("CompleteGroup", {"Literal"}, regex_identity_rule);
-        add_production("CompleteGroup", {"Digit"}, regex_identity_rule);
-        add_production("CompleteGroup", {"Wildcard"}, regex_identity_rule);
-        add_production("CompleteGroup", {"WhiteSpace"}, regex_identity_rule);
-        add_production("IncompleteGroup", {"IncompleteGroup", "LiteralRange"}, regex_add_range_existing_group_rule);
-        add_production("IncompleteGroup", {"IncompleteGroup", "Digit"}, regex_add_range_existing_group_rule);
-        add_production("IncompleteGroup", {"IncompleteGroup", "Literal"}, regex_add_literal_existing_group_rule);
-        add_production("IncompleteGroup", {"IncompleteGroup", "WhiteSpace"}, regex_add_literal_existing_group_rule);
-        add_production("IncompleteGroup", {"Lbracket", "LiteralRange"}, regex_add_range_new_group_rule);
-        add_production("IncompleteGroup", {"Lbracket", "Digit"}, regex_add_range_new_group_rule);
-        add_production("IncompleteGroup", {"Lbracket", "Literal"}, regex_add_literal_new_group_rule);
-        add_production("IncompleteGroup", {"Lbracket", "WhiteSpace"}, regex_add_literal_new_group_rule);
-        add_production("IncompleteGroup", {"Lbracket", "Hat"}, regex_complement_incomplete_group_rule);
-        add_production("LiteralRange", {"Literal", "Dash", "Literal"}, regex_range_rule);
-        add_production("Literal", {"Backslash", "t"}, regex_tab_rule);
-        add_production("Literal", {"Backslash", "n"}, regex_newline_rule);
-        add_production("Literal", {"Backslash", "v"}, regex_vertical_tab_rule);
-        add_production("Literal", {"Backslash", "f"}, regex_form_feed_rule);
-        add_production("Literal", {"Backslash", "r"}, regex_char_return_rule);
-        add_production("Literal", {"Space"}, regex_literal_rule);
-        add_production("Literal", {"Bang"}, regex_literal_rule);
-        add_production("Literal", {"Quotation"}, regex_literal_rule);
-        add_production("Literal", {"Hash"}, regex_literal_rule);
-        add_production("Literal", {"DollarSign"}, regex_literal_rule);
-        add_production("Literal", {"Percent"}, regex_literal_rule);
-        add_production("Literal", {"Ampersand"}, regex_literal_rule);
-        add_production("Literal", {"Apostrophe"}, regex_literal_rule);
-        add_production("Literal", {"Backslash", "Lparen"}, regex_cancel_literal_rule);
-        add_production("Literal", {"Backslash", "Rparen"}, regex_cancel_literal_rule);
-        add_production("Literal", {"Backslash", "Star"}, regex_cancel_literal_rule);
-        add_production("Literal", {"Backslash", "Plus"}, regex_cancel_literal_rule);
-        add_production("Literal", {"Comma"}, regex_literal_rule);
-        add_production("Literal", {"Backslash", "Dash"}, regex_cancel_literal_rule);
-        add_production("Literal", {"Backslash", "Dot"}, regex_cancel_literal_rule);
-        add_production("Literal", {"ForwardSlash"}, regex_literal_rule);
-        add_production("Literal", {"AlphaNumeric"}, regex_literal_rule);
-        add_production("Literal", {"Colon"}, regex_literal_rule);
-        add_production("Literal", {"SemiColon"}, regex_literal_rule);
-        add_production("Literal", {"LAngle"}, regex_literal_rule);
-        add_production("Literal", {"Equal"}, regex_literal_rule);
-        add_production("Literal", {"RAngle"}, regex_literal_rule);
-        add_production("Literal", {"QuestionMark"}, regex_literal_rule);
-        add_production("Literal", {"At"}, regex_literal_rule);
-        add_production("Literal", {"Backslash", "Lbracket"}, regex_cancel_literal_rule);
-        add_production("Literal", {"Backslash", "Backslash"}, regex_cancel_literal_rule);
-        add_production("Literal", {"Backslash", "Rbracket"}, regex_cancel_literal_rule);
-        add_production("Literal", {"Backslash", "Hat"}, regex_cancel_literal_rule);
-        add_production("Literal", {"Underscore"}, regex_literal_rule);
-        add_production("Literal", {"Backtick"}, regex_literal_rule);
-        add_production("Literal", {"Backslash", "Lbrace"}, regex_cancel_literal_rule);
-        add_production("Literal", {"Backslash", "Vbar"}, regex_cancel_literal_rule);
-        add_production("Literal", {"Backslash", "Rbrace"}, regex_cancel_literal_rule);
-        add_production("Literal", {"Tilde"}, regex_literal_rule);
-        add_production("Literal", {"Lparen", "Regex", "Rparen"}, regex_middle_identity_rule);
-        add_production("Integer", {"Integer", "Numeric"}, regex_existing_integer_rule);
-        add_production("Integer", {"Numeric"}, regex_new_integer_rule);
-        add_production("Digit", {"Backslash", "d"}, regex_digit_rule);
-        add_production("Wildcard", {"Dot"}, regex_wildcard_rule);
-        add_production("WhiteSpace", {"Backslash", "s"}, regex_white_space_rule);
-    }
-}
\ No newline at end of file
diff --git a/components/core/src/compressor_frontend/SchemaParser.hpp b/components/core/src/compressor_frontend/SchemaParser.hpp
deleted file mode 100644
index 10375d7f0..000000000
--- a/components/core/src/compressor_frontend/SchemaParser.hpp
+++ /dev/null
@@ -1,118 +0,0 @@
-#ifndef COMPRESSOR_FRONTEND_SCHEMAPARSER_HPP
-#define COMPRESSOR_FRONTEND_SCHEMAPARSER_HPP
-
-// Boost libraries
-#include <boost/filesystem/path.hpp>
-#include <utility>
-
-// Project headers
-#include "../ReaderInterface.hpp"
-#include "LALR1Parser.hpp"
-
-namespace compressor_frontend {
-
-    using finite_automata::RegexDFAByteState;
-    using finite_automata::RegexNFAByteState;
-
-    // ASTs used in SchemaParser AST
-    class SchemaFileAST : public ParserAST {
-    public:
-        // Constructor
-        SchemaFileAST () = default;
-
-        /// TODO: shouldn't this add delimiters instead of setting it?
-        void set_delimiters (std::unique_ptr<ParserAST> delimiters_in) {
-            m_delimiters = std::move(delimiters_in);
-        }
-
-        void add_schema_var (std::unique_ptr<ParserAST> schema_var) {
-            m_schema_vars.push_back(std::move(schema_var));
-        }
-
-        std::vector<std::unique_ptr<ParserAST>> m_schema_vars;
-        std::unique_ptr<ParserAST> m_delimiters;
-        std::string m_file_path;
-    };
-    
-    class IdentifierAST : public ParserAST {
-    public:
-        // Constructor
-        explicit IdentifierAST (char character) {
-            m_name.push_back(character);
-        }
-
-        void add_character (char character) {
-            m_name.push_back(character);
-        }
-        
-        std::string m_name;
-    };
-    
-    class SchemaVarAST : public ParserAST {
-    public:
-        //Constructor
-        SchemaVarAST (std::string name, std::unique_ptr<RegexAST<RegexNFAByteState>> regex_ptr, uint32_t line_num) : m_name(std::move(name)),
-                                                                                                                     m_regex_ptr(std::move(regex_ptr)),
-                                                                                                                     m_line_num(line_num) {}
-
-        uint32_t m_line_num;
-        std::string m_name;
-        std::unique_ptr<RegexAST<RegexNFAByteState>> m_regex_ptr;
-    };
-
-    class DelimiterStringAST : public ParserAST {
-    public:
-        // Constructor
-        explicit DelimiterStringAST (uint32_t delimiter) {
-            m_delimiters.push_back(delimiter);
-        }
-
-        void add_delimiter (uint32_t delimiter) {
-            m_delimiters.push_back(delimiter);
-        }
-
-        std::vector<uint32_t> m_delimiters;
-    };
-
-    // Schema Parser itself
-
-    class SchemaParser : public LALR1Parser<RegexNFAByteState, RegexDFAByteState> {
-    public:
-        // Constructor
-        SchemaParser ();
-
-        /**
-         * A semantic rule that needs access to soft_reset()
-         * @param m
-         * @return std::unique_ptr<SchemaFileAST>
-         */
-        std::unique_ptr<SchemaFileAST> existing_schema_file_rule (NonTerminal* m);
-
-        /**
-         * Parse a user defined schema to generate a schema AST used for generating the log lexer
-         * @param reader
-         * @return std::unique_ptr<SchemaFileAST>
-         */
-        std::unique_ptr<SchemaFileAST> generate_schema_ast (ReaderInterface& reader);
-
-        /**
-         * Wrapper around generate_schema_ast()
-         * @param schema_file_path
-         * @return std::unique_ptr<SchemaFileAST>
-         */
-        static std::unique_ptr<SchemaFileAST> try_schema_file (const std::string& schema_file_path);
-
-    private:
-        /**
-         * Add all lexical rules needed for schema lexing
-         */
-        void add_lexical_rules ();
-
-        /**
-         * Add all productions needed for schema parsing
-         */
-        void add_productions ();
-    };
-}
-
-#endif // COMPRESSOR_FRONTEND_SCHEMAPARSER_HPP
diff --git a/components/core/src/compressor_frontend/Token.cpp b/components/core/src/compressor_frontend/Token.cpp
deleted file mode 100644
index 4c984d0af..000000000
--- a/components/core/src/compressor_frontend/Token.cpp
+++ /dev/null
@@ -1,31 +0,0 @@
-#include "Token.hpp"
-
-using std::string;
-
-namespace compressor_frontend {
-
-    string Token::get_string () const {
-        if (m_start_pos <= m_end_pos) {
-            return {*m_buffer_ptr + m_start_pos, *m_buffer_ptr + m_end_pos};
-        } else {
-            return string(*m_buffer_ptr + m_start_pos, *m_buffer_ptr + *m_buffer_size_ptr) +
-                   string(*m_buffer_ptr, *m_buffer_ptr + m_end_pos);
-        }
-    }
-
-    char Token::get_char (uint8_t i) const {
-        return (*m_buffer_ptr)[m_start_pos + i];
-    }
-
-    string Token::get_delimiter () const {
-        return {*m_buffer_ptr + m_start_pos, *m_buffer_ptr + m_start_pos + 1};
-    }
-
-    uint32_t Token::get_length () const {
-        if (m_start_pos <= m_end_pos) {
-            return m_end_pos - m_start_pos;
-        } else {
-            return *m_buffer_size_ptr - m_start_pos + m_end_pos;
-        }
-    }
-}
\ No newline at end of file
diff --git a/components/core/src/compressor_frontend/Token.hpp b/components/core/src/compressor_frontend/Token.hpp
deleted file mode 100644
index d4db8396b..000000000
--- a/components/core/src/compressor_frontend/Token.hpp
+++ /dev/null
@@ -1,52 +0,0 @@
-#ifndef COMPRESSOR_FRONTEND_TOKEN_HPP
-#define COMPRESSOR_FRONTEND_TOKEN_HPP
-
-// C++ standard libraries
-#include <string>
-#include <vector>
-
-namespace compressor_frontend {
-    class Token {
-    public:
-        // Constructor
-        Token () : m_buffer_ptr(nullptr), m_buffer_size_ptr(nullptr), m_type_ids(nullptr), m_start_pos(0), m_end_pos(0), m_line(0) {}
-
-        // Constructor
-        Token (uint32_t start_pos, uint32_t end_pos, char** buffer_ptr, const uint32_t* buffer_size_ptr, uint32_t line, const std::vector<int>* type_ids) :
-                m_start_pos(start_pos), m_end_pos(end_pos), m_buffer_ptr(buffer_ptr), m_buffer_size_ptr(buffer_size_ptr), m_line(line), m_type_ids(type_ids) {}
-
-        /**
-         * Return the token string (string in the input buffer that the token represents)
-         * @return std::string
-         */
-        [[nodiscard]] std::string get_string () const;
-
-        /**
-         * Return the first character (as a string) of the token string (which is a delimiter if delimiters are being used)
-         * @return std::string
-         */
-        [[nodiscard]] std::string get_delimiter () const;
-
-        /**
-         * Return the ith character of the token string
-         * @param i
-         * @return char
-         */
-        [[nodiscard]] char get_char (uint8_t i) const;
-
-        /**
-         * Get the length of the token string
-         * @return uint32_t
-         */
-        [[nodiscard]] uint32_t get_length () const;
-
-        uint32_t m_start_pos;
-        uint32_t m_end_pos;
-        char** m_buffer_ptr;
-        const uint32_t* m_buffer_size_ptr;
-        uint32_t m_line;
-        const std::vector<int>* m_type_ids;
-    };
-}
-
-#endif // COMPRESSOR_FRONTEND_TOKEN_HPP
\ No newline at end of file
diff --git a/components/core/src/compressor_frontend/finite_automata/RegexAST.hpp b/components/core/src/compressor_frontend/finite_automata/RegexAST.hpp
deleted file mode 100644
index f40796b3f..000000000
--- a/components/core/src/compressor_frontend/finite_automata/RegexAST.hpp
+++ /dev/null
@@ -1,449 +0,0 @@
-#ifndef COMPRESSOR_FRONTEND_FINITE_AUTOMATA_REGEX_AST_HPP
-#define COMPRESSOR_FRONTEND_FINITE_AUTOMATA_REGEX_AST_HPP
-
-// C++ standard libraries
-#include <algorithm>
-#include <cstdint>
-#include <memory>
-#include <set>
-#include <utility>
-#include <vector>
-
-// Project headers
-#include "../Constants.hpp"
-#include "RegexNFA.hpp"
-#include "UnicodeIntervalTree.hpp"
-
-namespace compressor_frontend::finite_automata {
-
-    template <typename NFAStateType>
-    class RegexAST {
-    public:
-        // Destructor
-        virtual ~RegexAST () = default;
-
-        /**
-         * Used for cloning a unique_pointer of base type RegexAST
-         * @return RegexAST*
-         */
-        [[nodiscard]] virtual RegexAST* clone () const = 0;
-        
-        /**
-         * Sets is_possible_input to specify which utf8 characters are allowed in a lexer rule
-         * @param is_possible_input
-         */
-        virtual void set_possible_inputs_to_true (bool is_possible_input[]) const = 0;
-
-        /**
-         * transform '.' from any-character into any non-delimiter in a lexer rule
-         * @param delimiters
-         */
-        virtual void remove_delimiters_from_wildcard (std::vector<uint32_t>& delimiters) = 0;
-        
-        /**
-         * Add the needed RegexNFA::states to the passed in nfa to handle the current node before transitioning to a pre-tagged end_state
-         * @param nfa
-         * @param end_state
-         */
-        virtual void add (RegexNFA<NFAStateType>* nfa, NFAStateType* end_state) = 0;
-    };
-
-    // Leaf node
-    template <typename NFAStateType>
-    class RegexASTLiteral : public RegexAST<NFAStateType> {
-    public:
-        // Constructor
-        explicit RegexASTLiteral (uint32_t character);
-        
-        /**
-         * Used for cloning a unique_pointer of type RegexASTLiteral
-         * @return RegexASTLiteral*
-         */
-        [[nodiscard]] RegexASTLiteral* clone () const override {
-            return new RegexASTLiteral(*this);
-        }
-        
-        /**
-         * Sets is_possible_input to specify which utf8 characters are allowed in a lexer rule containing RegexASTLiteral at a leaf node in its AST
-         * @param is_possible_input
-         */
-        void set_possible_inputs_to_true (bool is_possible_input[]) const override {
-            is_possible_input[m_character] = true;
-        }
-
-        /**
-         * Transforms '.' to to be any non-delimiter in a lexer rule, which does nothing as RegexASTLiteral is a leaf node that is not a RegexASTGroup
-         * @param delimiters
-         */
-        void remove_delimiters_from_wildcard (std::vector<uint32_t>& delimiters) override {
-            // DO NOTHING
-        }
-        
-        /**
-         * Add the needed RegexNFA::states to the passed in nfa to handle a RegexASTLiteral before transitioning to a pre-tagged end_state
-         * @param nfa
-         * @param end_state
-         */
-        void add (RegexNFA<NFAStateType>* nfa, NFAStateType* end_state) override;
-
-        [[nodiscard]] const uint32_t& get_character () const {
-            return m_character;
-        }
-        
-    private:
-        uint32_t m_character;
-
-    };
-
-    // Leaf node
-    template <typename NFAStateType>
-    class RegexASTInteger : public RegexAST<NFAStateType> {
-    public:
-        // Constructor
-        explicit RegexASTInteger (uint32_t digit);
-
-        // Constructor
-        RegexASTInteger (RegexASTInteger* left, uint32_t digit);
-
-        /**
-         * Used for cloning a unique_pointer of type RegexASTInteger
-         * @return RegexASTInteger*
-         */
-        [[nodiscard]] RegexASTInteger* clone () const override {
-            return new RegexASTInteger(*this);
-        }
-        
-        /**
-         * Sets is_possible_input to specify which utf8 characters are allowed in a lexer rule containing RegexASTInteger at a leaf node in its AST
-         * @param is_possible_input
-         */
-        void set_possible_inputs_to_true (bool is_possible_input[]) const override {
-            for (uint32_t i: m_digits) {
-                is_possible_input[i + '0'] = true;
-            }
-        }
-
-        /**
-         * Transforms '.' to to be any non-delimiter in a lexer rule, which does nothing as RegexASTInteger is a leaf node that is not a RegexASTGroup
-         * @param delimiters
-         */
-        void remove_delimiters_from_wildcard (std::vector<uint32_t>& delimiters) override {
-            // DO NOTHING
-        }
-
-        /**
-         * Add the needed RegexNFA::states to the passed in nfa to handle a RegexASTInteger before transitioning to a pre-tagged end_state
-         * @param nfa
-         * @param end_state
-         */
-        void add (RegexNFA<NFAStateType>* nfa, NFAStateType* end_state) override;
-
-        [[nodiscard]] const std::vector<uint32_t>& get_digits () const {
-            return m_digits;
-        }
-
-        [[nodiscard]] const uint32_t& get_digit (uint32_t i) const {
-            return m_digits[i];
-        }
-        
-    private:
-        std::vector<uint32_t> m_digits;
-    };
-
-    // Lead node
-    template <typename NFAStateType>
-    class RegexASTGroup : public RegexAST<NFAStateType> {
-    public:
-
-        typedef std::pair<uint32_t, uint32_t> Range;
-
-        // constructor
-        RegexASTGroup ();
-
-        // constructor
-        RegexASTGroup (RegexASTGroup* left, RegexASTLiteral<NFAStateType>* right);
-
-        // constructor
-        RegexASTGroup (RegexASTGroup* left, RegexASTGroup* right);
-
-        // constructor
-        explicit RegexASTGroup (RegexASTLiteral<NFAStateType>* right);
-
-        // constructor
-        explicit RegexASTGroup (RegexASTGroup* right);
-
-        // constructor
-        RegexASTGroup (RegexASTLiteral<NFAStateType>* left, RegexASTLiteral<NFAStateType>* right);
-
-        // constructor
-        RegexASTGroup (uint32_t min, uint32_t max);
-
-        // constructor
-        explicit RegexASTGroup (const std::vector<uint32_t>& literals);
-
-        /**
-         * Used for cloning a unique_pointer of type RegexASTGroup
-         * @return RegexASTGroup*
-         */
-        [[nodiscard]] RegexASTGroup* clone () const override {
-            return new RegexASTGroup(*this);
-        }
-
-        /**
-         * Sets is_possible_input to specify which utf8 characters are allowed in a lexer rule containing RegexASTGroup at a leaf node in its AST
-         * @param is_possible_input
-         */
-        void set_possible_inputs_to_true (bool is_possible_input[]) const override {
-            if (!m_negate) {
-                for (Range range: m_ranges) {
-                    for (uint32_t i = range.first; i <= range.second; i++) {
-                        is_possible_input[i] = true;
-                    }
-                }
-            } else {
-                std::vector<char> inputs(cUnicodeMax, true);
-                for (Range range: m_ranges) {
-                    for (uint32_t i = range.first; i <= range.second; i++) {
-                        inputs[i] = false;
-                    }
-                }
-                for (uint32_t i = 0; i < inputs.size(); i++) {
-                    if (inputs[i]) {
-                        is_possible_input[i] = true;
-                    }
-                }
-            }
-        }
-
-        /**
-         * Transforms '.' to to be any non-delimiter in a lexer rule if this RegexASTGroup node contains `.` (is a wildcard group)
-         * @param delimiters
-         */
-        void remove_delimiters_from_wildcard (std::vector<uint32_t>& delimiters) override {
-            if (!m_is_wildcard) {
-                return;
-            }
-            if (delimiters.empty()) {
-                return;
-            }
-            m_ranges.clear();
-            std::sort(delimiters.begin(), delimiters.end());
-            if (delimiters[0] != 0) {
-                Range range(0, delimiters[0] - 1);
-                m_ranges.push_back(range);
-            }
-            for (uint32_t i = 1; i < delimiters.size(); i++) {
-                if (delimiters[i] - delimiters[i - 1] > 1) {
-                    Range range(delimiters[i - 1] + 1, delimiters[i] - 1);
-                    m_ranges.push_back(range);
-                }
-            }
-            if (delimiters.back() != cUnicodeMax) {
-                Range range(delimiters.back() + 1, cUnicodeMax);
-                m_ranges.push_back(range);
-            }
-        }
-
-        /**
-         * Add the needed RegexNFA::states to the passed in nfa to handle a RegexASTGroup before transitioning to a pre-tagged end_state
-         * @param nfa
-         * @param end_state
-         */
-        void add (RegexNFA<NFAStateType>* nfa, NFAStateType* end_state) override;
-        
-        void add_range (uint32_t min, uint32_t max) {
-            m_ranges.emplace_back(min, max);
-        }
-
-        void add_literal (uint32_t literal) {
-            m_ranges.emplace_back(literal, literal);
-        }
-
-        void set_is_wildcard_true () {
-            m_is_wildcard = true;
-        }
-
-    private:
-        /**
-         * Merges multiple ranges such that the resulting m_ranges is sorted and non-overlapping
-         * @param ranges
-         * @return std::vector<Range>
-         */
-        static std::vector<Range> merge (const std::vector<Range>& ranges);
-        
-        /**
-         * Takes the compliment (in the cast of regex `^` at the start of a group) of multiple ranges such that m_ranges is sorted and non-overlapping
-         * @param ranges
-         * @return std::vector<Range>
-         */
-        static std::vector<Range> complement (const std::vector<Range>& ranges);
-
-        bool m_is_wildcard;
-        bool m_negate;
-        std::vector<Range> m_ranges;
-
-
-    };
-    
-    // Intermediate node
-
-    template <typename NFAStateType>
-    class RegexASTOr : public RegexAST<NFAStateType> {
-    public:
-        // Constructor
-        RegexASTOr (std::unique_ptr<RegexAST<NFAStateType>>, std::unique_ptr<RegexAST<NFAStateType>>);
-
-        // Constructor
-        RegexASTOr (const RegexASTOr& rhs) {
-            m_left = std::unique_ptr<RegexAST<NFAStateType>>(rhs.m_left->clone());
-            m_right = std::unique_ptr<RegexAST<NFAStateType>>(rhs.m_right->clone());
-        }
-        
-        /**
-         * Used for cloning a unique_pointer of type RegexASTOr
-         * @return RegexASTOr*
-         */
-        [[nodiscard]] RegexASTOr* clone () const override {
-            return new RegexASTOr(*this);
-        }
-
-        /**
-         * Sets is_possible_input to specify which utf8 characters are allowed in a lexer rule containing RegexASTOr at a leaf node in its AST
-         * @param is_possible_input
-         */
-        void set_possible_inputs_to_true (bool is_possible_input[]) const override {
-            m_left->set_possible_inputs_to_true(is_possible_input);
-            m_right->set_possible_inputs_to_true(is_possible_input);
-        }
-        
-        /**
-         * Transforms '.' to to be any non-delimiter in a lexer rule if RegexASTGroup with `.` is a descendant of this RegexASTOr node
-         * @param delimiters
-         */
-        void remove_delimiters_from_wildcard (std::vector<uint32_t>& delimiters) override {
-            m_left->remove_delimiters_from_wildcard(delimiters);
-            m_right->remove_delimiters_from_wildcard(delimiters);
-        }
-        
-        /**
-         * Add the needed RegexNFA::states to the passed in nfa to handle a RegexASTOr before transitioning to a pre-tagged end_state
-         * @param nfa
-         * @param end_state
-         */
-        void add (RegexNFA<NFAStateType>* nfa, NFAStateType* end_state) override;
-
-    private:
-        std::unique_ptr<RegexAST<NFAStateType>> m_left;
-        std::unique_ptr<RegexAST<NFAStateType>> m_right;
-    };
-
-    // Intermediate node
-    template <typename NFAStateType>
-    class RegexASTCat : public RegexAST<NFAStateType> {
-    public:
-        // Constructor
-        RegexASTCat (std::unique_ptr<RegexAST<NFAStateType>>, std::unique_ptr<RegexAST<NFAStateType>>);
-
-        // Constructor
-        RegexASTCat (const RegexASTCat& rhs) {
-            m_left = std::unique_ptr<RegexAST<NFAStateType>>(rhs.m_left->clone());
-            m_right = std::unique_ptr<RegexAST<NFAStateType>>(rhs.m_right->clone());
-        }
-        
-        /**
-         * Used for cloning a unique_pointer of type RegexASTCat
-         * @return RegexASTCat*
-         */
-        [[nodiscard]] RegexASTCat* clone () const override {
-            return new RegexASTCat(*this);
-        }
-
-        /**
-         * Sets is_possible_input to specify which utf8 characters are allowed in a lexer rule containing RegexASTCat at a leaf node in its AST
-         * @param is_possible_input
-         */
-        void set_possible_inputs_to_true (bool is_possible_input[]) const override {
-            m_left->set_possible_inputs_to_true(is_possible_input);
-            m_right->set_possible_inputs_to_true(is_possible_input);
-        }
-        
-        /**
-         * Transforms '.' to to be any non-delimiter in a lexer rule if RegexASTGroup with `.` is a descendant of this RegexASTCat node
-         * @param delimiters
-         */
-        void remove_delimiters_from_wildcard (std::vector<uint32_t>& delimiters) override {
-            m_left->remove_delimiters_from_wildcard(delimiters);
-            m_right->remove_delimiters_from_wildcard(delimiters);
-        }
-        
-        /**
-         * Add the needed RegexNFA::states to the passed in nfa to handle a RegexASTCat before transitioning to a pre-tagged end_state
-         * @param nfa
-         * @param end_state
-         */
-        void add (RegexNFA<NFAStateType>* nfa, NFAStateType* end_state) override;
-
-    private:
-        std::unique_ptr<RegexAST<NFAStateType>> m_left;
-        std::unique_ptr<RegexAST<NFAStateType>> m_right;
-    };
-
-    // Intermediate node
-    template <typename NFAStateType>
-    class RegexASTMultiplication : public RegexAST<NFAStateType> {
-    public:
-        // Constructor
-        RegexASTMultiplication (std::unique_ptr<RegexAST<NFAStateType>>, uint32_t, uint32_t);
-        
-        // Constructor
-        RegexASTMultiplication (const RegexASTMultiplication& rhs) {
-            m_operand = std::unique_ptr<RegexAST<NFAStateType>>(rhs.m_operand->clone());
-            m_min = rhs.m_min;
-            m_max = rhs.m_max;
-        }
-        
-        /**
-         * Used for cloning a unique_pointer of type RegexASTMultiplication
-         * @return RegexASTMultiplication*
-         */
-        [[nodiscard]] RegexASTMultiplication* clone () const override {
-            return new RegexASTMultiplication(*this);
-        }
-        
-        /**
-         * Sets is_possible_input to specify which utf8 characters are allowed in a lexer rule containing RegexASTMultiplication at a leaf node in its AST
-         * @param is_possible_input
-         */
-        void set_possible_inputs_to_true (bool is_possible_input[]) const override {
-            m_operand->set_possible_inputs_to_true(is_possible_input);
-        }
-        
-        /**
-         * Transforms '.' to to be any non-delimiter in a lexer rule if RegexASTGroup with `.` is a descendant of this RegexASTMultiplication node
-         * @param delimiters
-         */
-        void remove_delimiters_from_wildcard (std::vector<uint32_t>& delimiters) override {
-            m_operand->remove_delimiters_from_wildcard(delimiters);
-        }
-        
-        /**
-         * Add the needed RegexNFA::states to the passed in nfa to handle a RegexASTMultiplication before transitioning to a pre-tagged end_state
-         * @param nfa
-         * @param end_state
-         */
-        void add (RegexNFA<NFAStateType>* nfa, NFAStateType* end_state) override;
-
-        [[nodiscard]] bool is_infinite () const {
-            return this->m_max == 0;
-        }
-
-    private:
-        std::unique_ptr<RegexAST<NFAStateType>> m_operand;
-        uint32_t m_min;
-        uint32_t m_max;
-    };
-}
-
-#include "RegexAST.inc"
-
-#endif // COMPRESSOR_FRONTEND_FINITE_AUTOMATA_REGEX_AST_HPP
diff --git a/components/core/src/compressor_frontend/finite_automata/RegexAST.inc b/components/core/src/compressor_frontend/finite_automata/RegexAST.inc
deleted file mode 100644
index 650d305f5..000000000
--- a/components/core/src/compressor_frontend/finite_automata/RegexAST.inc
+++ /dev/null
@@ -1,262 +0,0 @@
-#ifndef COMPRESSOR_FRONTEND_FINITE_AUTOMATA_REGEX_AST_TPP
-#define COMPRESSOR_FRONTEND_FINITE_AUTOMATA_REGEX_AST_TPP
-
-#include "RegexAST.hpp"
-
-// C++ standard libraries
-#include <algorithm>
-#include <cassert>
-#include <map>
-#include <stack>
-
-// Project headers
-#include "../../spdlog_with_specializations.hpp"
-#include "../Constants.hpp"
-#include "RegexNFA.hpp"
-#include "UnicodeIntervalTree.hpp"
-
-/* In order to use std::unordered_map (or absl::flat_hash_map) we need to have
- * a specialization for hash<std::set> from boost, abseil, etc. Afaik replacing
- * std::set (i.e. an ordered set) with an unordered set is difficult due to
- * fundamental issues of making an unordered data structure hashable.
- * (i.e. you need two containers with the same elements in differing orders to
- * hash to the same value, which makes computing/maintaining the hash of this
- * unordered container non-trivial)
- */
-
-/// TODO: remove general `using` expressions like these from tpp
-using std::map;
-using std::max;
-using std::min;
-using std::pair;
-using std::runtime_error;
-using std::stack;
-using std::unique_ptr;
-using std::vector;
-
-namespace compressor_frontend::finite_automata {
-
-    template <typename NFAStateType>
-    RegexASTLiteral<NFAStateType>::RegexASTLiteral (uint32_t character) : m_character(character) {
-
-    }
-
-    template <typename NFAStateType>
-    void RegexASTLiteral<NFAStateType>::add (RegexNFA<NFAStateType>* nfa, NFAStateType* end_state) {
-        nfa->add_root_interval(Interval(m_character, m_character), end_state);
-    }
-
-    template <typename NFAStateType>
-    RegexASTInteger<NFAStateType>::RegexASTInteger (uint32_t digit) {
-        digit = digit - '0';
-        m_digits.push_back(digit);
-    }
-
-    template <typename NFAStateType>
-    RegexASTInteger<NFAStateType>::RegexASTInteger (RegexASTInteger<NFAStateType>* left, uint32_t digit) {
-        digit = digit - '0';
-        m_digits = std::move(left->m_digits);
-        m_digits.push_back(digit);
-    }
-
-    template <typename NFAStateType>
-    void RegexASTInteger<NFAStateType>::add (RegexNFA<NFAStateType>* nfa, NFAStateType* end_state) {
-        assert(false); // this shouldn't ever be called
-    }
-
-    template <typename NFAStateType>
-    RegexASTOr<NFAStateType>::RegexASTOr (unique_ptr<RegexAST<NFAStateType>> left, unique_ptr<RegexAST<NFAStateType>> right) : m_left(std::move(left)),
-                                                                                                                m_right(std::move(right)) {
-
-    }
-
-    template <typename NFAStateType>
-    void RegexASTOr<NFAStateType>::add (RegexNFA<NFAStateType>* nfa, NFAStateType* end_state) {
-        m_left->add(nfa, end_state);
-        m_right->add(nfa, end_state);
-    }
-
-    template <typename NFAStateType>
-    RegexASTCat<NFAStateType>::RegexASTCat (unique_ptr<RegexAST<NFAStateType>> left, unique_ptr<RegexAST<NFAStateType>> right) : m_left(std::move(left)),
-                                                                                                                  m_right(std::move(right)) {
-
-    }
-
-    template <typename NFAStateType>
-    void RegexASTCat<NFAStateType>::add (RegexNFA<NFAStateType>* nfa, NFAStateType* end_state) {
-        NFAStateType* saved_root = nfa->m_root;
-        NFAStateType* intermediate_state = nfa->new_state();
-        m_left->add(nfa, intermediate_state);
-        nfa->m_root = intermediate_state;
-        m_right->add(nfa, end_state);
-        nfa->m_root = saved_root;
-    }
-
-    template <typename NFAStateType>
-    RegexASTMultiplication<NFAStateType>::RegexASTMultiplication (unique_ptr<RegexAST<NFAStateType>> operand, uint32_t min, uint32_t max) :
-            m_operand(std::move(operand)), m_min(min), m_max(max) {
-
-    }
-
-    template <typename NFAStateType>
-    void RegexASTMultiplication<NFAStateType>::add (RegexNFA<NFAStateType>* nfa, NFAStateType* end_state) {
-        NFAStateType* saved_root = nfa->m_root;
-        if (this->m_min == 0) {
-            nfa->m_root->add_epsilon_transition(end_state);
-        } else {
-            for (int i = 1; i < this->m_min; i++) {
-                NFAStateType* intermediate_state = nfa->new_state();
-                m_operand->add(nfa, intermediate_state);
-                nfa->m_root = intermediate_state;
-            }
-            m_operand->add(nfa, end_state);
-        }
-        if (this->is_infinite()) {
-            nfa->m_root = end_state;
-            m_operand->add(nfa, end_state);
-        } else if (this->m_max > this->m_min) {
-            if (this->m_min != 0) {
-                NFAStateType* intermediate_state = nfa->new_state();
-                m_operand->add(nfa, intermediate_state);
-                nfa->m_root = intermediate_state;
-            }
-            for (uint32_t i = this->m_min + 1; i < this->m_max; i++) {
-                m_operand->add(nfa, end_state);
-                NFAStateType* intermediate_state = nfa->new_state();
-                m_operand->add(nfa, intermediate_state);
-                nfa->m_root = intermediate_state;
-            }
-            m_operand->add(nfa, end_state);
-        }
-        nfa->m_root = saved_root;
-    }
-
-    template <typename NFAStateType>
-    RegexASTGroup<NFAStateType>::RegexASTGroup () {
-        m_is_wildcard = false;
-        m_negate = true;
-    }
-
-    template <typename NFAStateType>
-    RegexASTGroup<NFAStateType>::RegexASTGroup (RegexASTGroup<NFAStateType>* left, RegexASTLiteral<NFAStateType>* right) {
-        m_is_wildcard = false;
-        if (right == nullptr) {
-            SPDLOG_ERROR("A bracket expression in the schema contains illegal characters, remember to escape special characters. "
-                         "Refer to README-Schema.md for more details.");
-            throw runtime_error("RegexASTGroup1: right==nullptr");
-        }
-        m_negate = left->m_negate;
-        m_ranges = left->m_ranges;
-        m_ranges.emplace_back(right->get_character(), right->get_character());
-    }
-
-    template <typename NFAStateType>
-    RegexASTGroup<NFAStateType>::RegexASTGroup (RegexASTGroup<NFAStateType>* left, RegexASTGroup<NFAStateType>* right) {
-        m_is_wildcard = false;
-        m_negate = left->m_negate;
-        m_ranges = left->m_ranges;
-        assert(right->m_ranges.size() == 1); // Only add LiteralRange
-        m_ranges.push_back(right->m_ranges[0]);
-    }
-
-    template <typename NFAStateType>
-    RegexASTGroup<NFAStateType>::RegexASTGroup (RegexASTLiteral<NFAStateType>* right) {
-        m_is_wildcard = false;
-        if (right == nullptr) {
-            SPDLOG_ERROR("A bracket expression in the schema contains illegal characters, remember to escape special characters. "
-                         "Refer to README-Schema.md for more details.");
-            throw runtime_error("RegexASTGroup2: right==nullptr");
-        }
-        m_negate = false;
-        m_ranges.emplace_back(right->get_character(), right->get_character());
-    }
-
-    template <typename NFAStateType>
-    RegexASTGroup<NFAStateType>::RegexASTGroup (RegexASTGroup<NFAStateType>* right) {
-        m_is_wildcard = false;
-        m_negate = false;
-        assert(right->m_ranges.size() == 1); // Only add LiteralRange
-        m_ranges.push_back(right->m_ranges[0]);
-    }
-
-    template <typename NFAStateType>
-    RegexASTGroup<NFAStateType>::RegexASTGroup (RegexASTLiteral<NFAStateType>* left, RegexASTLiteral<NFAStateType>* right) {
-        m_is_wildcard = false;
-        if (left == nullptr || right == nullptr) {
-            SPDLOG_ERROR("A bracket expression in the schema contains illegal characters, remember to escape special characters. "
-                         "Refer to README-Schema.md for more details.");
-            throw runtime_error("RegexASTGroup3: left == nullptr || right == nullptr");
-        }
-        m_negate = false;
-        assert(right->get_character() > left->get_character());
-        m_ranges.emplace_back(left->get_character(), right->get_character());
-    }
-
-    template <typename NFAStateType>
-    RegexASTGroup<NFAStateType>::RegexASTGroup (const vector<uint32_t>& literals) {
-        m_is_wildcard = false;
-        m_negate = false;
-        for (uint32_t literal: literals) {
-            m_ranges.emplace_back(literal, literal);
-        }
-    }
-
-    template <typename NFAStateType>
-    RegexASTGroup<NFAStateType>::RegexASTGroup (uint32_t min, uint32_t max) {
-        m_is_wildcard = false;
-        m_negate = false;
-        m_ranges.emplace_back(min, max);
-    }
-
-    // ranges must be sorted
-    template <typename NFAStateType>
-    vector<typename RegexASTGroup<NFAStateType>::Range> RegexASTGroup<NFAStateType>::merge (const vector<Range>& ranges) {
-        vector<Range> merged;
-        if (ranges.empty()) {
-            return merged;
-        }
-        Range cur = ranges[0];
-        for (size_t i = 1; i < ranges.size(); i++) {
-            Range r = ranges[i];
-            if (r.first <= cur.second + 1) {
-                cur.second = max(r.second, cur.second);
-            } else {
-                merged.push_back(cur);
-                cur = r;
-            }
-        }
-        merged.push_back(cur);
-        return merged;
-    }
-
-    // ranges must be sorted and non-overlapping
-    template <typename NFAStateType>
-    vector<typename RegexASTGroup<NFAStateType>::Range> RegexASTGroup<NFAStateType>::complement (const vector<Range>& ranges) {
-        vector<Range> complemented;
-        uint32_t low = 0;
-        for (const Range& r: ranges) {
-            if (r.first > 0) {
-                complemented.emplace_back(low, r.first - 1);
-            }
-            low = r.second + 1;
-        }
-        if (low > 0) {
-            complemented.emplace_back(low, cUnicodeMax);
-        }
-        return complemented;
-    }
-
-    template <typename NFAStateType>
-    void RegexASTGroup<NFAStateType>::add (RegexNFA<NFAStateType>* nfa, NFAStateType* end_state) {
-        std::sort(this->m_ranges.begin(), this->m_ranges.end());
-        vector<Range> merged = RegexASTGroup::merge(this->m_ranges);
-        if (this->m_negate) {
-            merged = RegexASTGroup::complement(merged);
-        }
-        for (const Range& r: merged) {
-            nfa->m_root->add_interval(Interval(r.first, r.second), end_state);
-        }
-    }    
-}
-
-#endif // COMPRESSOR_FRONTEND_FINITE_AUTOMATA_REGEX_AST_TPP
\ No newline at end of file
diff --git a/components/core/src/compressor_frontend/finite_automata/RegexDFA.hpp b/components/core/src/compressor_frontend/finite_automata/RegexDFA.hpp
deleted file mode 100644
index f532c93c5..000000000
--- a/components/core/src/compressor_frontend/finite_automata/RegexDFA.hpp
+++ /dev/null
@@ -1,86 +0,0 @@
-#ifndef COMPRESSOR_FRONTEND_FINITE_AUTOMATA_REGEX_DFA_HPP
-#define COMPRESSOR_FRONTEND_FINITE_AUTOMATA_REGEX_DFA_HPP
-
-// C++ standard libraries
-#include <algorithm>
-#include <cstdint>
-#include <memory>
-#include <set>
-#include <utility>
-#include <vector>
-
-// Project headers
-#include "../Constants.hpp"
-#include "RegexNFA.hpp"
-
-namespace compressor_frontend::finite_automata {
-    enum class RegexDFAStateType {
-        Byte,
-        UTF8
-    };
-
-    template<RegexDFAStateType stateType>
-    class RegexDFAState {
-    public:
-        using Tree = UnicodeIntervalTree<RegexDFAState<stateType>*>;
-
-        void add_tag (const int& rule_name_id) {
-            m_tags.push_back(rule_name_id);
-        }
-
-        [[nodiscard]] const std::vector<int>& get_tags () const {
-            return m_tags;
-        }
-
-        bool is_accepting () {
-            return !m_tags.empty();
-        }
-
-        void add_byte_transition (const uint8_t& byte, RegexDFAState<stateType>* dest_state) {
-            m_bytes_transition[byte] = dest_state;
-        }
-
-        /**
-         * Returns the next state the DFA transitions to on input character (byte or utf8)
-         * @param character
-         * @return RegexDFAState<stateType>*
-         */
-        RegexDFAState<stateType>* next (uint32_t character);
-
-
-    private:
-        std::vector<int> m_tags;
-        RegexDFAState<stateType>* m_bytes_transition[cSizeOfByte];
-
-        // NOTE: We don't need m_tree_transitions for the `stateType == RegexDFAStateType::Byte` case,
-        // so we use an empty class (`std::tuple<>`) in that case.
-        std::conditional_t<stateType == RegexDFAStateType::UTF8, Tree, std::tuple<>> m_tree_transitions;
-    };
-
-    using RegexDFAByteState = RegexDFAState<RegexDFAStateType::Byte>;
-    using RegexDFAUTF8State = RegexDFAState<RegexDFAStateType::UTF8>;
-
-    template <typename DFAStateType>
-    class RegexDFA {
-    public:
-
-        /**
-         * Creates a new DFA state based on a set of NFA states and adds it to m_states
-         * @param set
-         * @return DFAStateType*
-         */
-        template <typename NFAStateType>
-        DFAStateType* new_state (const std::set<NFAStateType*>& set);
-
-        DFAStateType* get_root () {
-            return m_states.at(0).get();
-        }
-
-    private:
-        std::vector<std::unique_ptr<DFAStateType>> m_states;
-    };
-}
-
-#include "RegexDFA.inc"
-
-#endif // COMPRESSOR_FRONTEND_FINITE_AUTOMATA_REGEX_DFA_HPP
diff --git a/components/core/src/compressor_frontend/finite_automata/RegexDFA.inc b/components/core/src/compressor_frontend/finite_automata/RegexDFA.inc
deleted file mode 100644
index 75a5774bb..000000000
--- a/components/core/src/compressor_frontend/finite_automata/RegexDFA.inc
+++ /dev/null
@@ -1,41 +0,0 @@
-#ifndef COMPRESSOR_FRONTEND_FINITE_AUTOMATA_REGEX_DFA_TPP
-#define COMPRESSOR_FRONTEND_FINITE_AUTOMATA_REGEX_DFA_TPP
-
-#include "RegexDFA.hpp"
-
-namespace compressor_frontend::finite_automata {
-
-    template<RegexDFAStateType stateType>
-    RegexDFAState<stateType>* RegexDFAState<stateType>::next (uint32_t character) {
-        if constexpr (RegexDFAStateType::Byte == stateType) {
-           return m_bytes_transition[character];
-        } else {
-            if (character < cSizeOfByte) {
-              return m_bytes_transition[character];
-            }
-            unique_ptr<vector<typename Tree::Data>> result = m_tree_transitions.find(Interval(character, character));
-            assert(result->size() <= 1);
-            if (!result->empty()) {
-                return result->front().m_value;
-            }
-            return nullptr;
-        }
-    }
-
-    template <typename DFAStateType>
-    template <typename NFAStateType>
-    DFAStateType* RegexDFA<DFAStateType>::new_state (const std::set<NFAStateType*>& set) {
-        std::unique_ptr<DFAStateType> ptr = std::make_unique<DFAStateType>();
-        m_states.push_back(std::move(ptr));
-
-        DFAStateType* state = m_states.back().get();
-        for (const NFAStateType* s: set) {
-            if (s->is_accepting()) {
-                state->add_tag(s->get_tag());
-            }
-        }
-        return state;
-    }
-}
-
-#endif // COMPRESSOR_FRONTEND_FINITE_AUTOMATA_REGEX_DFA_TPP
\ No newline at end of file
diff --git a/components/core/src/compressor_frontend/finite_automata/RegexNFA.hpp b/components/core/src/compressor_frontend/finite_automata/RegexNFA.hpp
deleted file mode 100644
index 415740fcd..000000000
--- a/components/core/src/compressor_frontend/finite_automata/RegexNFA.hpp
+++ /dev/null
@@ -1,140 +0,0 @@
-#ifndef COMPRESSOR_FRONTEND_FINITE_AUTOMATA_REGEX_NFA_HPP
-#define COMPRESSOR_FRONTEND_FINITE_AUTOMATA_REGEX_NFA_HPP
-
-// C++ standard libraries
-#include <algorithm>
-#include <cstdint>
-#include <memory>
-#include <set>
-#include <stack>
-#include <utility>
-#include <vector>
-
-// Project headers
-#include "../Constants.hpp"
-#include "UnicodeIntervalTree.hpp"
-
-namespace compressor_frontend::finite_automata {
-    enum class RegexNFAStateType {
-        Byte,
-        UTF8
-    };
-
-    template <RegexNFAStateType stateType>
-    class RegexNFAState {
-    public:
-
-        using Tree = UnicodeIntervalTree<RegexNFAState<stateType>*>;
-
-        void set_accepting (bool accepting) {
-            m_accepting = accepting;
-        }
-
-        [[nodiscard]] const bool& is_accepting () const {
-            return m_accepting;
-        }
-
-        void set_tag (int rule_name_id) {
-            m_tag = rule_name_id;
-        }
-
-        [[nodiscard]] const int& get_tag () const {
-            return m_tag;
-        }
-
-        void set_epsilon_transitions (std::vector<RegexNFAState<stateType>*>& epsilon_transitions) {
-            m_epsilon_transitions = epsilon_transitions;
-        }
-
-        void add_epsilon_transition (RegexNFAState<stateType>* epsilon_transition) {
-            m_epsilon_transitions.push_back(epsilon_transition);
-        }
-
-        void clear_epsilon_transitions () {
-            m_epsilon_transitions.clear();
-        }
-
-        [[nodiscard]] const std::vector<RegexNFAState<stateType>*>& get_epsilon_transitions () const {
-            return m_epsilon_transitions;
-        }
-
-        void set_byte_transitions (uint8_t byte, std::vector<RegexNFAState<stateType>*>& byte_transitions) {
-            m_bytes_transitions[byte] = byte_transitions;
-        }
-
-        void add_byte_transition (uint8_t byte, RegexNFAState<stateType>* dest_state) {
-            m_bytes_transitions[byte].push_back(dest_state);
-        }
-
-        void clear_byte_transitions (uint8_t byte) {
-            m_bytes_transitions[byte].clear();
-        }
-
-        [[nodiscard]] const std::vector<RegexNFAState<stateType>*>& get_byte_transitions (uint8_t byte) const {
-            return m_bytes_transitions[byte];
-        }
-
-        void reset_tree_transitions () {
-            m_tree_transitions.reset();
-        }
-
-        const Tree& get_tree_transitions () {
-            return m_tree_transitions;
-        }
-
-        /**
-         Add dest_state to m_bytes_transitions if all values in interval are a byte, otherwise add dest_state to m_tree_transitions
-         * @param interval
-         * @param dest_state
-         */
-        void add_interval (Interval interval, RegexNFAState<stateType>* dest_state);
-
-    private:
-        bool m_accepting;
-        int m_tag;
-        std::vector<RegexNFAState<stateType>*> m_epsilon_transitions;
-        std::vector<RegexNFAState<stateType>*> m_bytes_transitions[cSizeOfByte];
-
-        // NOTE: We don't need m_tree_transitions for the `stateType == RegexDFAStateType::Byte` case,
-        // so we use an empty class (`std::tuple<>`) in that case.
-        std::conditional_t<stateType == RegexNFAStateType::UTF8, Tree, std::tuple<>> m_tree_transitions;
-
-
-    };
-
-    using RegexNFAByteState = RegexNFAState<RegexNFAStateType::Byte>;
-    using RegexNFAUTF8State = RegexNFAState<RegexNFAStateType::UTF8>;
-
-    template <typename NFAStateType>
-    class RegexNFA {
-    public:
-        typedef std::vector<NFAStateType*> StateVec;
-
-        // constructor
-        RegexNFA ();
-
-        /**
-         * Create a unique_ptr for an NFA state and add it to m_states
-         * @return NFAStateType*
-         */
-        NFAStateType* new_state ();
-
-        /**
-         * Reverse the NFA such that it matches on its reverse language 
-         */
-        void reverse ();
-
-        void add_root_interval (Interval interval, NFAStateType* dest_state) {
-            m_root->add_interval(interval, dest_state);
-        }
-
-        NFAStateType* m_root;
-
-    private:
-        std::vector<std::unique_ptr<NFAStateType>> m_states;
-    };
-}
-
-#include "RegexNFA.inc"
-
-#endif // COMPRESSOR_FRONTEND_FINITE_AUTOMATA_REGEX_NFA_HPP
diff --git a/components/core/src/compressor_frontend/finite_automata/RegexNFA.inc b/components/core/src/compressor_frontend/finite_automata/RegexNFA.inc
deleted file mode 100644
index 287ef75bf..000000000
--- a/components/core/src/compressor_frontend/finite_automata/RegexNFA.inc
+++ /dev/null
@@ -1,188 +0,0 @@
-#ifndef COMPRESSOR_FRONTEND_FINITE_AUTOMATA_REGEX_NFA_TPP
-#define COMPRESSOR_FRONTEND_FINITE_AUTOMATA_REGEX_NFA_TPP
-
-#include "RegexNFA.hpp"
-
-// C++ standard libraries
-#include <algorithm>
-#include <cassert>
-#include <map>
-#include <stack>
-
-// Project headers
-#include "../Constants.hpp"
-#include "UnicodeIntervalTree.hpp"
-
-using std::map;
-using std::max;
-using std::min;
-using std::pair;
-using std::stack;
-using std::unique_ptr;
-using std::vector;
-
-namespace compressor_frontend::finite_automata {
-    template <RegexNFAStateType stateType>
-    void RegexNFAState<stateType>::add_interval (Interval interval, RegexNFAState<stateType>* dest_state) {
-        if (interval.first < cSizeOfByte) {
-            uint32_t bound = min(interval.second, cSizeOfByte - 1);
-            for (uint32_t i = interval.first; i <= bound; i++) {
-                add_byte_transition(i, dest_state);
-            }
-            interval.first = bound + 1;
-        }
-        if constexpr (RegexNFAStateType::UTF8 == stateType) {
-            if (interval.second < cSizeOfByte) {
-                return;
-            }
-            unique_ptr<vector<typename Tree::Data>> overlaps = m_tree_transitions.pop(interval);
-            for (const typename Tree::Data& data: *overlaps) {
-                uint32_t overlap_low = max(data.m_interval.first, interval.first);
-                uint32_t overlap_high = min(data.m_interval.second, interval.second);
-
-                std::vector<RegexNFAUTF8State*> tree_states = data.m_value;
-                tree_states.push_back(dest_state);
-                m_tree_transitions.insert(Interval(overlap_low, overlap_high), tree_states);
-                if (data.m_interval.first < interval.first) {
-                    m_tree_transitions.insert(Interval(data.m_interval.first, interval.first - 1), data.m_value);
-                } else if (data.m_interval.first > interval.first) {
-                    m_tree_transitions.insert(Interval(interval.first, data.m_interval.first - 1), {dest_state});
-                }
-                if (data.m_interval.second > interval.second) {
-                    m_tree_transitions.insert(Interval(interval.second + 1, data.m_interval.second), data.m_value);
-                }
-                interval.first = data.m_interval.second + 1;
-            }
-            if (interval.first != 0 && interval.first <= interval.second) {
-                m_tree_transitions.insert(interval, {dest_state});
-            }
-        }
-    }
-
-    template <typename NFAStateType>
-    void RegexNFA<NFAStateType>::reverse () {
-        // add new end with all accepting pointing to it
-        NFAStateType* new_end = new_state();
-        for (unique_ptr<NFAStateType>& state_ptr: m_states) {
-            if (state_ptr->is_accepting()) {
-                state_ptr->add_epsilon_transition(new_end);
-                state_ptr->set_accepting(false);
-            }
-        }
-        // move edges from NFA to maps
-        map<pair<NFAStateType*, NFAStateType*>, vector<uint8_t>> byte_edges;
-        map<pair<NFAStateType*, NFAStateType*>, bool> epsilon_edges;
-        for (unique_ptr<NFAStateType>& src_state_ptr: m_states) {
-            // TODO: handle utf8 case with if constexpr (RegexNFAUTF8State == NFAStateType) ~ don't really need this though
-            for (uint32_t byte = 0; byte < cSizeOfByte; byte++) {
-                for (NFAStateType* dest_state_ptr: src_state_ptr->get_byte_transitions(byte)) {
-                    byte_edges[pair<NFAStateType*, NFAStateType*>(src_state_ptr.get(), dest_state_ptr)].push_back(byte);
-                }
-                src_state_ptr->clear_byte_transitions(byte);
-            }
-            for (NFAStateType* dest_state_ptr: src_state_ptr->get_epsilon_transitions()) {
-                epsilon_edges[pair<NFAStateType*, NFAStateType*>(src_state_ptr.get(), dest_state_ptr)] = true;
-            }
-            src_state_ptr->clear_epsilon_transitions();
-        }
-
-        // insert edges from maps back into NFA, but in the reverse direction
-        for (unique_ptr<NFAStateType>& src_state_ptr: m_states) {
-            for (unique_ptr<NFAStateType>& dest_state_ptr: m_states) {
-                pair<NFAStateType*, NFAStateType*> key(src_state_ptr.get(), dest_state_ptr.get());
-                auto byte_it = byte_edges.find(key);
-                if (byte_it != byte_edges.end()) {
-                    for (uint8_t byte: byte_it->second) {
-                        dest_state_ptr->add_byte_transition(byte, src_state_ptr.get());
-                    }
-                }
-                auto epsilon_it = epsilon_edges.find(key);
-                if (epsilon_it != epsilon_edges.end()) {
-                    dest_state_ptr->add_epsilon_transition(src_state_ptr.get());
-                }
-            }
-        }
-
-        // propagate tag from old accepting m_states
-        for (NFAStateType* old_accepting_state: new_end->get_epsilon_transitions()) {
-            int tag = old_accepting_state->get_tag();
-            stack<NFAStateType*> unvisited_states;
-            std::set<NFAStateType*> visited_states;
-            unvisited_states.push(old_accepting_state);
-            while (!unvisited_states.empty()) {
-                NFAStateType* current_state = unvisited_states.top();
-                current_state->set_tag(tag);
-                unvisited_states.pop();
-                visited_states.insert(current_state);
-                for(uint32_t byte  = 0; byte < cSizeOfByte; byte++) {
-                    std::vector<NFAStateType*> byte_transitions = current_state->get_byte_transitions(byte);
-                    for (NFAStateType* next_state: byte_transitions) {
-                        if (visited_states.find(next_state) == visited_states.end()) {
-                            unvisited_states.push(next_state);
-                        }
-                    }
-                }
-                for (NFAStateType* next_state: current_state->get_epsilon_transitions()) {
-                    if (visited_states.find(next_state) == visited_states.end()) {
-                        unvisited_states.push(next_state);
-                    }
-                }
-            }
-        }
-        for (int32_t i = m_states.size() - 1; i >= 0; i--) {
-            unique_ptr<NFAStateType>& src_state_unique_ptr = m_states[i];
-            NFAStateType* src_state = src_state_unique_ptr.get();
-            int tag = src_state->get_tag();
-            for(uint32_t byte  = 0; byte < cSizeOfByte; byte++) {
-                std::vector<NFAStateType*> byte_transitions = src_state->get_byte_transitions(byte);
-                for (int32_t j = byte_transitions.size() - 1; j >= 0; j--) {
-                    NFAStateType*& dest_state = byte_transitions[j];
-                    if (dest_state == m_root) {
-                        dest_state = new_state();
-                        assert(dest_state != nullptr);
-                        dest_state->set_tag(tag);
-                        dest_state->set_accepting(true);
-                    }
-                }
-                src_state->clear_byte_transitions(byte);
-                src_state->set_byte_transitions(byte, byte_transitions);
-            }
-            std::vector<NFAStateType*> epsilon_transitions = src_state->get_epsilon_transitions();
-            for (int32_t j = epsilon_transitions .size() - 1; j >= 0; j--) {
-                NFAStateType*& dest_state = epsilon_transitions[j];
-                if (dest_state == m_root) {
-                    dest_state = new_state();
-                    dest_state->set_tag(src_state->get_tag());
-                    dest_state->set_accepting(true);
-                }
-            }
-            src_state->clear_epsilon_transitions();
-            src_state->set_epsilon_transitions(epsilon_transitions);
-        }
-
-        for (uint32_t i = 0; i < m_states.size(); i++) {
-            if (m_states[i].get() == m_root) {
-                m_states.erase(m_states.begin() + i);
-                break;
-            }
-        }
-        // start from the end
-        m_root = new_end;
-
-    }
-
-    template <typename NFAStateType>
-    RegexNFA<NFAStateType>::RegexNFA () {
-        m_root = new_state();
-    }
-
-    template <typename NFAStateType>
-    NFAStateType* RegexNFA<NFAStateType>::new_state () {
-        unique_ptr<NFAStateType> ptr = std::make_unique<NFAStateType>();
-        NFAStateType* state = ptr.get();
-        m_states.push_back(std::move(ptr));
-        return state;
-    }
-}
-
-#endif // COMPRESSOR_FRONTEND_FINITE_AUTOMATA_REGEX_NFA_TPP
\ No newline at end of file
diff --git a/components/core/src/compressor_frontend/finite_automata/UnicodeIntervalTree.hpp b/components/core/src/compressor_frontend/finite_automata/UnicodeIntervalTree.hpp
deleted file mode 100644
index 016b564da..000000000
--- a/components/core/src/compressor_frontend/finite_automata/UnicodeIntervalTree.hpp
+++ /dev/null
@@ -1,186 +0,0 @@
-#ifndef COMPRESSOR_FRONTEND_FINITE_AUTOMATA_UNICODE_INTERVAL_TREE_HPP
-#define COMPRESSOR_FRONTEND_FINITE_AUTOMATA_UNICODE_INTERVAL_TREE_HPP
-
-#include <cstdint>
-#include <memory>
-#include <set>
-#include <utility>
-#include <vector>
-
-// Project headers
-#include "../Constants.hpp"
-
-namespace compressor_frontend::finite_automata {
-
-    template<class T>
-    class UnicodeIntervalTree {
-    public:
-        /// TODO: probably use this Data type more often in this class???
-        /**
-         * Structure to represent utf8 data
-         */
-        struct Data {
-        public:
-            Data (Interval interval, T value) : m_interval(std::move(interval)), m_value(value) {}
-
-            Interval m_interval;
-            T m_value;
-        };
-        
-        /**
-         * Insert data into the tree
-         * @param interval
-         * @param value
-         */
-        void insert (Interval interval, T value);
-        
-        /**
-         * Returns all utf8 in the tree
-         * @return std::vector<Data>
-         */
-        std::vector<Data> all () const;
-
-        /**
-         * Return an interval in the tree
-         * @param interval
-         * @return std::unique_ptr<std::vector<Data>>
-         */
-        std::unique_ptr<std::vector<Data>> find (Interval interval);
-        
-        /**
-         * Remove an interval from the tree
-         * @param interval
-         * @return std::unique_ptr<std::vector<Data>>
-         */
-        std::unique_ptr<std::vector<Data>> pop (Interval interval);
-
-        void reset () {
-            m_root.reset();
-        }
-
-    private:
-        class Node {
-        public:
-            // Constructor
-            Node () : m_lower(0), m_upper(0), m_height(0) {}
-
-            // Constructor
-            Node (Interval i, T v) : m_interval(std::move(i)), m_value(v) {}
-            
-            /**
-             * Balance the subtree below a node
-             * @param node
-             * @return std::unique_ptr<Node>
-             */
-            static std::unique_ptr<Node> balance (std::unique_ptr<Node> node);
-
-            /**
-             * Insert a node
-             * @param node
-             * @param interval
-             * @param value
-             * @return std::unique_ptr<Node>
-             */
-            static std::unique_ptr<Node> insert (std::unique_ptr<Node> node, Interval interval, T value);
-            
-            /**
-             * Remove a node
-             * @param node
-             * @param interval
-             * @param ret
-             * @return std::unique_ptr<Node>
-             */
-            static std::unique_ptr<Node> pop (std::unique_ptr<Node> node, Interval interval, std::unique_ptr<Node>* ret);
-
-            /**
-             * Remove a node
-             * @param node
-             * @param ret
-             * @return std::unique_ptr<Node>
-             */
-            static std::unique_ptr<Node> pop_min (std::unique_ptr<Node> node, std::unique_ptr<Node>* ret);
-
-            /**
-             * Rotate a node by a factor
-             * @param node
-             * @param factor
-             * @return std::unique_ptr<Node>
-             */
-            static std::unique_ptr<Node> rotate (std::unique_ptr<Node> node, int factor);
-
-            /**
-             * Rotate a node clockwise
-             * @param node
-             * @return std::unique_ptr<Node>
-             */
-            static std::unique_ptr<Node> rotate_cw (std::unique_ptr<Node> node);
-
-            /**
-             * Rotate a node counterclockwise
-             * @param node
-             * @return std::unique_ptr<Node>
-             */
-            static std::unique_ptr<Node> rotate_ccw (std::unique_ptr<Node> node);
-
-            /**
-             * add all utf8 in subtree to results
-             * @param results
-             */
-            void all (std::vector<Data>* results);
-
-            /**
-             * add all utf8 in subtree that matches interval to results
-             * @param interval
-             * @param results
-             */
-            void find (Interval interval, std::vector<Data>* results);
-            
-            /**
-             * update node
-             */
-            void update ();
-
-            /**
-             * get balance factor of node
-             */
-            int balance_factor ();
-
-            /**
-             * overlaps_recursive()
-             * @param i
-             */
-            bool overlaps_recursive (Interval i);
-            
-            /**
-             * overlaps()
-             * @param i
-             */
-            bool overlaps (Interval i);
-
-            Interval get_interval () {
-                return m_interval;
-            }
-
-            T get_value () {
-                return m_value;
-            }
-
-        private:
-            
-            Interval m_interval;
-            T m_value;
-            uint32_t m_lower{};
-            uint32_t m_upper{};
-            int m_height{};
-            std::unique_ptr<Node> m_left;
-            std::unique_ptr<Node> m_right;
-        };
-
-        std::unique_ptr<Node> m_root;
-    };
-}
-
-// Implementation of template class must be included in anything wanting to use it
-#include "UnicodeIntervalTree.inc"
-
-#endif // COMPRESSOR_FRONTEND_FINITE_AUTOMATA_UNICODE_INTERVAL_TREE_HPP
diff --git a/components/core/src/compressor_frontend/finite_automata/UnicodeIntervalTree.inc b/components/core/src/compressor_frontend/finite_automata/UnicodeIntervalTree.inc
deleted file mode 100644
index 2bde708b7..000000000
--- a/components/core/src/compressor_frontend/finite_automata/UnicodeIntervalTree.inc
+++ /dev/null
@@ -1,231 +0,0 @@
-#ifndef COMPRESSOR_FRONTEND_FINITE_AUTOMATA_UNICODE_INTERVAL_TREE_TPP
-#define COMPRESSOR_FRONTEND_FINITE_AUTOMATA_UNICODE_INTERVAL_TREE_TPP
-
-#include "UnicodeIntervalTree.hpp"
-
-// C++ standard libraries
-#include <cassert>
-
-using std::max;
-using std::unique_ptr;
-using std::vector;
-
-namespace compressor_frontend::finite_automata {
-
-    template<class T>
-    void UnicodeIntervalTree<T>::insert (Interval interval, T value) {
-        m_root = Node::insert(std::move(m_root), interval, value);
-    }
-
-    template<class T>
-    unique_ptr<class UnicodeIntervalTree<T>::Node> UnicodeIntervalTree<T>::Node::insert (unique_ptr<Node> node, Interval interval, T value) {
-        if (node == nullptr) {
-            unique_ptr<Node> n(new Node(interval, value));
-            n->update();
-            return n;
-        }
-        if (interval < node->m_interval) {
-            node->m_left = Node::insert(std::move(node->m_left), interval, value);
-        } else if (interval > node->m_interval) {
-            node->m_right = Node::insert(std::move(node->m_right), interval, value);
-        } else {
-            node->m_value = value;
-        }
-        node->update();
-        return Node::balance(std::move(node));
-    }
-
-    template<typename T>
-    vector<typename UnicodeIntervalTree<T>::Data> UnicodeIntervalTree<T>::all () const {
-        vector<Data> results;
-        if (m_root != nullptr) {
-            m_root->all(&results);
-        }
-        return results;
-    }
-
-    template<typename T>
-    void UnicodeIntervalTree<T>::Node::all (vector<Data>* results) {
-        if (m_left != nullptr) {
-            m_left->all(results);
-        }
-        results->push_back(Data(m_interval, m_value));
-        if (m_right != nullptr) {
-            m_right->all(results);
-        }
-    }
-
-    template<typename T>
-    unique_ptr<vector<typename UnicodeIntervalTree<T>::Data>> UnicodeIntervalTree<T>::find (Interval interval) {
-        unique_ptr<vector<Data>> results(new vector<Data>);
-        m_root->find(interval, results.get());
-        return results;
-    }
-
-    template<class T>
-    void UnicodeIntervalTree<T>::Node::find (Interval interval, vector<Data>* results) {
-        if (!overlaps_recursive(interval)) {
-            return;
-        }
-        if (m_left != nullptr) {
-            m_left->find(interval, results);
-        }
-        if (overlaps(interval)) {
-            results->push_back(Data(m_interval, m_value));
-        }
-        if (m_right != nullptr) {
-            m_right->find(interval, results);
-        }
-    }
-
-    template<class T>
-    unique_ptr<vector<typename UnicodeIntervalTree<T>::Data>> UnicodeIntervalTree<T>::pop (Interval interval) {
-        unique_ptr<vector<Data>> results(new vector<Data>);
-        while (true) {
-            unique_ptr<Node> n;
-            m_root = Node::pop(std::move(m_root), interval, &n);
-            if (n == nullptr) {
-                break;
-            }
-            results->push_back(Data(n->get_interval(), n->get_value()));
-        }
-        return results;
-    }
-
-    template<class T>
-    unique_ptr<typename UnicodeIntervalTree<T>::Node> UnicodeIntervalTree<T>::Node::pop (unique_ptr<Node> node, Interval interval,
-                                                                                         unique_ptr<Node>* ret) {
-        if (node == nullptr) {
-            return nullptr;
-        }
-        if (!node->overlaps_recursive(interval)) {
-            return node;
-        }
-        node->m_left = Node::pop(std::move(node->m_left), interval, ret);
-        if (ret->get() != nullptr) {
-            node->update();
-            return Node::balance(std::move(node));
-        }
-        assert(node->overlaps(interval));
-        ret->reset(node.release());
-        if (((*ret)->m_left == nullptr) && ((*ret)->m_right == nullptr)) {
-            return nullptr;
-        } else if ((*ret)->m_left == nullptr) {
-            return std::move((*ret)->m_right);
-        } else if ((*ret)->m_right == nullptr) {
-            return std::move((*ret)->m_left);
-        } else {
-            unique_ptr<Node> replacement;
-            unique_ptr<Node> sub_tree = Node::pop_min(std::move((*ret)->m_right), &replacement);
-            replacement->m_left = std::move((*ret)->m_left);
-            replacement->m_right = std::move(sub_tree);
-            replacement->update();
-            return Node::balance(std::move(replacement));
-        }
-    }
-
-    template<class T>
-    unique_ptr<typename UnicodeIntervalTree<T>::Node> UnicodeIntervalTree<T>::Node::pop_min (unique_ptr<Node> node, unique_ptr<Node>* ret) {
-        assert(node != nullptr);
-        if (node->m_left == nullptr) {
-            assert(node->m_right != nullptr);
-            unique_ptr<Node> right(std::move(node->m_right));
-            ret->reset(node.release());
-            return right;
-        }
-        node->m_left = Node::pop_min(std::move(node->m_left), ret);
-        node->update();
-        return Node::balance(std::move(node));
-    }
-
-    template<class T>
-    void UnicodeIntervalTree<T>::Node::update () {
-        if ((m_left == nullptr) && (m_right == nullptr)) {
-            m_height = 1;
-            m_lower = m_interval.first;
-            m_upper = m_interval.second;
-        } else if (m_left == nullptr) {
-            m_height = 2;
-            m_lower = m_interval.first;
-            m_upper = max(m_interval.second, m_right->m_upper);
-        } else if (m_right == nullptr) {
-            m_height = 2;
-            m_lower = m_left->m_lower;
-            m_upper = max(m_interval.second, m_left->m_upper);
-        } else {
-            m_height = max(m_left->m_height, m_right->m_height) + 1;
-            m_lower = m_left->m_lower;
-            m_upper = max({m_interval.second, m_left->m_upper, m_right->m_upper});
-        }
-    }
-
-    template<class T>
-    int UnicodeIntervalTree<T>::Node::balance_factor () {
-        return (m_right != nullptr ? m_right.get() : 0) -
-               (m_left != nullptr ? m_left.get() : 0);
-    }
-
-    template<class T>
-    unique_ptr<typename UnicodeIntervalTree<T>::Node> UnicodeIntervalTree<T>::Node::balance (unique_ptr<Node> node) {
-        int factor = node->balance_factor();
-        if (factor * factor <= 1) {
-            return node;
-        }
-        int sub_factor = (factor < 0) ? node->m_left->balance_factor() : node->m_right->balance_factor();
-        if (factor * sub_factor > 0) {
-            return Node::rotate(std::move(node), factor);
-        }
-        if (factor == 2) {
-            node->m_right = Node::rotate(std::move(node->m_right), sub_factor);
-        } else {
-            node->m_left = Node::rotate(std::move(node->m_left), sub_factor);
-        }
-        return Node::rotate(std::move(node), factor);
-    }
-
-    template<class T>
-    unique_ptr<typename UnicodeIntervalTree<T>::Node> UnicodeIntervalTree<T>::Node::rotate (unique_ptr<Node> node, int factor) {
-        if (factor < 0) {
-            return Node::rotate_cw(std::move(node));
-        } else if (factor > 0) {
-            return Node::rotate_ccw(std::move(node));
-        }
-        return node;
-    }
-
-    template<class T>
-    unique_ptr<typename UnicodeIntervalTree<T>::Node> UnicodeIntervalTree<T>::Node::rotate_cw (unique_ptr<Node> node) {
-        unique_ptr<Node> n(std::move(node->m_left));
-        node->m_left.reset(n->m_right.release());
-        n->m_right.reset(node.release());
-        n->m_right->update();
-        n->update();
-        return n;
-    }
-
-    template<class T>
-    unique_ptr<typename UnicodeIntervalTree<T>::Node> UnicodeIntervalTree<T>::Node::rotate_ccw (unique_ptr<Node> node) {
-        unique_ptr<Node> n(std::move(node->m_right));
-        node->m_right.reset(n->m_left.release());
-        n->m_left.reset(node.release());
-        n->m_left->update();
-        n->update();
-        return n;
-    }
-
-    template<class T>
-    bool UnicodeIntervalTree<T>::Node::overlaps_recursive (Interval i) {
-        return ((m_lower <= i.first) && (i.first <= m_upper)) ||
-               ((m_lower <= i.second) && (i.second <= m_upper)) ||
-               ((i.first <= m_lower) && (m_lower <= i.second));
-    }
-
-    template<class T>
-    bool UnicodeIntervalTree<T>::Node::overlaps (Interval i) {
-        return ((m_interval.first <= i.first) && (i.first <= m_interval.second)) ||
-               ((m_interval.first <= i.second) && (i.second <= m_interval.second)) ||
-               ((i.first <= m_interval.first) && (m_interval.first <= i.second));
-    }
-}
-
-#endif // COMPRESSOR_FRONTEND_FINITE_AUTOMATA_UNICODE_INTERVAL_TREE_TPP
\ No newline at end of file
diff --git a/components/core/src/compressor_frontend/utils.cpp b/components/core/src/compressor_frontend/utils.cpp
deleted file mode 100644
index 9efbeb133..000000000
--- a/components/core/src/compressor_frontend/utils.cpp
+++ /dev/null
@@ -1,120 +0,0 @@
-#include "utils.hpp"
-
-// C++ standard libraries
-#include <memory>
-
-// Project headers
-#include "../FileReader.hpp"
-#include "Constants.hpp"
-#include "LALR1Parser.hpp"
-#include "SchemaParser.hpp"
-
-using std::unique_ptr;
-
-namespace compressor_frontend {
-    void load_lexer_from_file (const std::string& schema_file_path, bool reverse, lexers::ByteLexer& lexer) {
-        FileReader schema_reader;
-        schema_reader.try_open(schema_file_path);
-
-        SchemaParser sp;
-        unique_ptr<compressor_frontend::SchemaFileAST> schema_ast = sp.generate_schema_ast(schema_reader);
-        auto* delimiters_ptr = dynamic_cast<DelimiterStringAST*>(schema_ast->m_delimiters.get());
-
-        if (!lexer.m_symbol_id.empty()) {
-            throw std::runtime_error("Error: symbol_ids initialized before setting enum symbol_ids");
-        }
-
-        /// TODO: this is a copy of other code
-        lexer.m_symbol_id[cTokenEnd] = (int) SymbolID::TokenEndID;
-        lexer.m_symbol_id[cTokenUncaughtString] = (int) SymbolID::TokenUncaughtStringID;
-        lexer.m_symbol_id[cTokenInt] = (int) SymbolID::TokenIntId;
-        lexer.m_symbol_id[cTokenFloat] = (int) SymbolID::TokenFloatId;
-        lexer.m_symbol_id[cTokenFirstTimestamp] = (int) SymbolID::TokenFirstTimestampId;
-        lexer.m_symbol_id[cTokenNewlineTimestamp] = (int) SymbolID::TokenNewlineTimestampId;
-        lexer.m_symbol_id[cTokenNewline] = (int) SymbolID::TokenNewlineId;
-
-        lexer.m_id_symbol[(int) SymbolID::TokenEndID] = cTokenEnd;
-        lexer.m_id_symbol[(int) SymbolID::TokenUncaughtStringID] = cTokenUncaughtString;
-        lexer.m_id_symbol[(int) SymbolID::TokenIntId] = cTokenInt;
-        lexer.m_id_symbol[(int) SymbolID::TokenFloatId] = cTokenFloat;
-        lexer.m_id_symbol[(int) SymbolID::TokenFirstTimestampId] = cTokenFirstTimestamp;
-        lexer.m_id_symbol[(int) SymbolID::TokenNewlineTimestampId] = cTokenNewlineTimestamp;
-        lexer.m_id_symbol[(int) SymbolID::TokenNewlineId] = cTokenNewline;
-
-        /// TODO: figure out why this needs to be specially added
-        lexer.add_rule(lexer.m_symbol_id["newLine"],
-                       std::move(make_unique<RegexASTLiteral<finite_automata::RegexNFAByteState>>(RegexASTLiteral<finite_automata::RegexNFAByteState>('\n'))));
-
-        if (delimiters_ptr != nullptr) {
-            lexer.add_delimiters(delimiters_ptr->m_delimiters);
-        }
-        for (unique_ptr<ParserAST> const& parser_ast: schema_ast->m_schema_vars) {
-            auto* rule = dynamic_cast<SchemaVarAST*>(parser_ast.get());
-
-            if ("timestamp" == rule->m_name) {
-                continue;
-            }
-
-            if (lexer.m_symbol_id.find(rule->m_name) == lexer.m_symbol_id.end()) {
-                lexer.m_symbol_id[rule->m_name] = lexer.m_symbol_id.size();
-                lexer.m_id_symbol[lexer.m_symbol_id[rule->m_name]] = rule->m_name;
-            }
-
-            // transform '.' from any-character into any non-delimiter character
-            rule->m_regex_ptr->remove_delimiters_from_wildcard(delimiters_ptr->m_delimiters);
-
-            /// TODO: this error function is a copy
-            // currently, error out if non-timestamp pattern contains a delimiter
-            // check if regex contains a delimiter
-            bool is_possible_input[cUnicodeMax] = {false};
-            rule->m_regex_ptr->set_possible_inputs_to_true(is_possible_input);
-            bool contains_delimiter = false;
-            uint32_t delimiter_name;
-            for (uint32_t delimiter: delimiters_ptr->m_delimiters) {
-                if (is_possible_input[delimiter]) {
-                    contains_delimiter = true;
-                    delimiter_name = delimiter;
-                    break;
-                }
-            }
-            if (contains_delimiter) {
-                FileReader schema_reader;
-                ErrorCode error_code = schema_reader.try_open(schema_ast->m_file_path);
-                if (ErrorCode_Success != error_code) {
-                    throw std::runtime_error(schema_file_path + ":" + to_string(rule->m_line_num + 1) + ": error: '" + rule->m_name
-                                             + "' has regex pattern which contains delimiter '" + char(delimiter_name) + "'.\n");
-                } else {
-                    // more detailed debugging based on looking at the file
-                    string line;
-                    for (uint32_t i = 0; i <= rule->m_line_num; i++) {
-                        schema_reader.read_to_delimiter('\n', false, false, line);
-                    }
-                    int colon_pos = 0;
-                    for (char i : line) {
-                        colon_pos++;
-                        if (i == ':') {
-                            break;
-                        }
-                    }
-                    string indent(10, ' ');
-                    string spaces(colon_pos, ' ');
-                    string arrows(line.size() - colon_pos, '^');
-
-                    throw std::runtime_error(schema_file_path + ":" + to_string(rule->m_line_num + 1) + ": error: '" + rule->m_name
-                                             + "' has regex pattern which contains delimiter '" + char(delimiter_name) + "'.\n"
-                                             + indent + line + "\n" + indent + spaces + arrows + "\n");
-
-                }
-            }
-
-            lexer.add_rule(lexer.m_symbol_id[rule->m_name], std::move(rule->m_regex_ptr));
-        }
-        if (reverse) {
-            lexer.generate_reverse();
-        } else {
-            lexer.generate();
-        }
-
-        schema_reader.close();
-    }
-}
diff --git a/components/core/src/compressor_frontend/utils.hpp b/components/core/src/compressor_frontend/utils.hpp
deleted file mode 100644
index 0943d3dda..000000000
--- a/components/core/src/compressor_frontend/utils.hpp
+++ /dev/null
@@ -1,21 +0,0 @@
-#ifndef COMPRESSOR_FRONTEND_UTILS_HPP
-#define COMPRESSOR_FRONTEND_UTILS_HPP
-
-// Project headers
-#include "Lexer.hpp"
-
-namespace compressor_frontend {
-
-    using finite_automata::RegexNFAByteState;
-    using finite_automata::RegexDFAByteState;
-
-    /**
-     * Loads the lexer from the schema file at the given path
-     * @param schema_file_path
-     * @param reverse Whether to generate a reverse lexer
-     * @param lexer
-     */
-    void load_lexer_from_file (const std::string& schema_file_path, bool reverse, Lexer<RegexNFAByteState, RegexDFAByteState>& lexer);
-}
-
-#endif //COMPRESSOR_FRONTEND_UTILS_HPP
diff --git a/components/core/src/streaming_archive/writer/Archive.cpp b/components/core/src/streaming_archive/writer/Archive.cpp
index 415d599e4..b8b900dca 100644
--- a/components/core/src/streaming_archive/writer/Archive.cpp
+++ b/components/core/src/streaming_archive/writer/Archive.cpp
@@ -18,13 +18,18 @@
 // json
 #include <json/single_include/nlohmann/json.hpp>
 
+// Log surgeon
+#include <log_surgeon/LogEvent.hpp>
+#include <log_surgeon/LogParser.hpp>
+
 // Project headers
-#include "../../compressor_frontend/LogParser.hpp"
+#include "../../clp/utils.hpp"
 #include "../../EncodedVariableInterpreter.hpp"
 #include "../../spdlog_with_specializations.hpp"
 #include "../../Utils.hpp"
 #include "../Constants.hpp"
 
+using log_surgeon::LogEventView;
 using std::list;
 using std::make_unique;
 using std::string;
@@ -262,66 +267,74 @@ namespace streaming_archive::writer {
         update_segment_indices(logtype_id, var_ids);
     }
 
-    void Archive::write_msg_using_schema (compressor_frontend::Token*& uncompressed_msg, uint32_t uncompressed_msg_pos, const bool has_delimiter,
-                                          const bool has_timestamp) {
+    void Archive::write_msg_using_schema (LogEventView const& log_view) {
         epochtime_t timestamp = 0;
         TimestampPattern* timestamp_pattern = nullptr;
-        if (has_timestamp) {
+        auto const& log_output_buffer = log_view.get_log_output_buffer();
+        if (log_output_buffer->has_timestamp()) {
             size_t start;
             size_t end;
-            timestamp_pattern = (TimestampPattern*) TimestampPattern::search_known_ts_patterns(
-                    uncompressed_msg[0].get_string(), timestamp, start, end);
-            if (old_ts_pattern != *timestamp_pattern) {
+            timestamp_pattern = (TimestampPattern*)TimestampPattern::search_known_ts_patterns(
+                    log_output_buffer->get_mutable_token(0).to_string(),
+                    timestamp,
+                    start,
+                    end
+            );
+            if (m_old_ts_pattern != timestamp_pattern) {
                 change_ts_pattern(timestamp_pattern);
-                old_ts_pattern = *timestamp_pattern;
+                m_old_ts_pattern = timestamp_pattern;
             }
-            assert(nullptr != timestamp_pattern);
         }
         if (get_data_size_of_dictionaries() >= m_target_data_size_of_dicts) {
             clp::split_file_and_archive(m_archive_user_config, m_path_for_compression, m_group_id, timestamp_pattern, *this);
         } else if (m_file->get_encoded_size_in_bytes() >= m_target_encoded_file_size) {
             clp::split_file(m_path_for_compression, m_group_id, timestamp_pattern, *this);
         }
-
         m_encoded_vars.clear();
         m_var_ids.clear();
         m_logtype_dict_entry.clear();
-
         size_t num_uncompressed_bytes = 0;
         // Timestamp is included in the uncompressed message size
-        uint32_t start_pos = uncompressed_msg[0].m_start_pos;
+        uint32_t start_pos = log_output_buffer->get_token(0).m_start_pos;
         if (timestamp_pattern == nullptr) {
-            start_pos = uncompressed_msg[1].m_start_pos;
+            start_pos = log_output_buffer->get_token(1).m_start_pos;
         }
-        uint32_t end_pos = uncompressed_msg[uncompressed_msg_pos - 1].m_end_pos;
+        uint32_t end_pos = log_output_buffer->get_token(log_output_buffer->pos() - 1).m_end_pos;
         if (start_pos <= end_pos) {
             num_uncompressed_bytes = end_pos - start_pos;
         } else {
-            num_uncompressed_bytes = *uncompressed_msg[0].m_buffer_size_ptr - start_pos + end_pos;
-        }
-        for (uint32_t i = 1; i < uncompressed_msg_pos; i++) {
-            compressor_frontend::Token& token = uncompressed_msg[i];
-            int token_type = token.m_type_ids->at(0);
-            if (has_delimiter && token_type != (int) compressor_frontend::SymbolID::TokenUncaughtStringID &&
-                token_type != (int) compressor_frontend::SymbolID::TokenNewlineId) {
+            num_uncompressed_bytes
+                    = log_output_buffer->get_token(0).m_buffer_size - start_pos + end_pos;
+        }
+        for (uint32_t i = 1; i < log_output_buffer->pos(); i++) {
+            log_surgeon::Token& token = log_output_buffer->get_mutable_token(i);
+            int token_type = token.m_type_ids_ptr->at(0);
+            if (log_output_buffer->has_delimiters() && (timestamp_pattern != nullptr || i > 1)
+                && token_type != static_cast<int>(log_surgeon::SymbolID::TokenUncaughtStringID)
+                && token_type != static_cast<int>(log_surgeon::SymbolID::TokenNewlineId))
+            {
                 m_logtype_dict_entry.add_constant(token.get_delimiter(), 0, 1);
-                if (token.m_start_pos == *token.m_buffer_size_ptr - 1) {
+                if (token.m_start_pos == token.m_buffer_size - 1) {
                     token.m_start_pos = 0;
                 } else {
                     token.m_start_pos++;
                 }
             }
             switch (token_type) {
-                case (int) compressor_frontend::SymbolID::TokenNewlineId:
-                case (int) compressor_frontend::SymbolID::TokenUncaughtStringID: {
-                    m_logtype_dict_entry.add_constant(token.get_string(), 0, token.get_length());
+                case static_cast<int>(log_surgeon::SymbolID::TokenNewlineId):
+                case static_cast<int>(log_surgeon::SymbolID::TokenUncaughtStringID): {
+                    m_logtype_dict_entry.add_constant(token.to_string(), 0, token.get_length());
                     break;
                 }
-                case (int) compressor_frontend::SymbolID::TokenIntId: {
+                case static_cast<int>(log_surgeon::SymbolID::TokenIntId): {
                     encoded_variable_t encoded_var;
-                    if (!EncodedVariableInterpreter::convert_string_to_representable_integer_var(token.get_string(), encoded_var)) {
+                    if (!EncodedVariableInterpreter::convert_string_to_representable_integer_var(
+                                token.to_string(),
+                                encoded_var
+                        ))
+                    {
                         variable_dictionary_id_t id;
-                        m_var_dict.add_entry(token.get_string(), id);
+                        m_var_dict.add_entry(token.to_string(), id);
                         encoded_var = EncodedVariableInterpreter::encode_var_dict_id(id);
                         m_logtype_dict_entry.add_dictionary_var();
                     } else {
@@ -330,12 +343,15 @@ namespace streaming_archive::writer {
                     m_encoded_vars.push_back(encoded_var);
                     break;
                 }
-                case (int) compressor_frontend::SymbolID::TokenFloatId: {
+                case static_cast<int>(log_surgeon::SymbolID::TokenFloatId): {
                     encoded_variable_t encoded_var;
                     if (!EncodedVariableInterpreter::convert_string_to_representable_float_var(
-                            token.get_string(), encoded_var)) {
+                                token.to_string(),
+                                encoded_var
+                        ))
+                    {
                         variable_dictionary_id_t id;
-                        m_var_dict.add_entry(token.get_string(), id);
+                        m_var_dict.add_entry(token.to_string(), id);
                         encoded_var = EncodedVariableInterpreter::encode_var_dict_id(id);
                         m_logtype_dict_entry.add_dictionary_var();
                     } else {
@@ -348,7 +364,7 @@ namespace streaming_archive::writer {
                     // Variable string looks like a dictionary variable, so encode it as so
                     encoded_variable_t encoded_var;
                     variable_dictionary_id_t id;
-                    m_var_dict.add_entry(token.get_string(), id);
+                    m_var_dict.add_entry(token.to_string(), id);
                     encoded_var = EncodedVariableInterpreter::encode_var_dict_id(id);
                     m_var_ids.push_back(id);
 
diff --git a/components/core/src/streaming_archive/writer/Archive.hpp b/components/core/src/streaming_archive/writer/Archive.hpp
index 64569a9f6..048081603 100644
--- a/components/core/src/streaming_archive/writer/Archive.hpp
+++ b/components/core/src/streaming_archive/writer/Archive.hpp
@@ -14,9 +14,12 @@
 #include <boost/uuid/random_generator.hpp>
 #include <boost/uuid/uuid.hpp>
 
+// Log Surgeon
+#include <log_surgeon/LogEvent.hpp>
+#include <log_surgeon/ReaderParser.hpp>
+
 // Project headers
 #include "../../ArrayBackedPosIntSet.hpp"
-#include "../../compressor_frontend/Token.hpp"
 #include "../../ErrorCode.hpp"
 #include "../../GlobalMetadataDB.hpp"
 #include "../../ir/LogEvent.hpp"
@@ -62,8 +65,7 @@ namespace streaming_archive { namespace writer {
             }
         };
 
-        TimestampPattern old_ts_pattern;
-
+        TimestampPattern* m_old_ts_pattern;
         size_t m_target_data_size_of_dicts;
         UserConfig m_archive_user_config;
         std::string m_path_for_compression;
@@ -73,7 +75,7 @@ namespace streaming_archive { namespace writer {
 
         // Constructors
         Archive () : m_segments_dir_fd(-1), m_compression_level(0), m_global_metadata_db(nullptr),
-                old_ts_pattern(), m_schema_file_path() {}
+                m_old_ts_pattern(nullptr), m_schema_file_path() {}
 
         // Destructor
         ~Archive ();
@@ -130,16 +132,13 @@ namespace streaming_archive { namespace writer {
          * @throw FileWriter::OperationFailed if any write fails
          */
         void write_msg (epochtime_t timestamp, const std::string& message, size_t num_uncompressed_bytes);
+
         /**
          * Encodes and writes a message to the given file using schema file
-         * @param file
-         * @param uncompressed_msg
-         * @param uncompressed_msg_pos
-         * @param has_delimiter
-         * @param has_timestamp
+         * @param log_event_view
          * @throw FileWriter::OperationFailed if any write fails
          */
-        void write_msg_using_schema (compressor_frontend::Token*& uncompressed_msg, uint32_t uncompressed_msg_pos, bool has_delimiter, bool has_timestamp);
+        void write_msg_using_schema (log_surgeon::LogEventView const& log_event_view);
 
         /**
          * Writes an IR log event to the current encoded file
diff --git a/components/core/submodules/log-surgeon b/components/core/submodules/log-surgeon
new file mode 160000
index 000000000..895f46489
--- /dev/null
+++ b/components/core/submodules/log-surgeon
@@ -0,0 +1 @@
+Subproject commit 895f46489b1911ab3b3aac3202afd56c96e8cd98
diff --git a/components/core/tests/test-Grep.cpp b/components/core/tests/test-Grep.cpp
index 783f5c4bd..96a855c82 100644
--- a/components/core/tests/test-Grep.cpp
+++ b/components/core/tests/test-Grep.cpp
@@ -4,25 +4,26 @@
 // Catch2
 #include <Catch2/single_include/catch2/catch.hpp>
 
+// Log Surgeon
+#include <log_surgeon/Lexer.hpp>
+#include <log_surgeon/SchemaParser.hpp>
+
 // Project headers
-#include "../src/compressor_frontend/Lexer.hpp"
-#include "../src/compressor_frontend/SchemaParser.hpp"
-#include "../src/compressor_frontend/utils.hpp"
 #include "../src/Grep.hpp"
 
-using compressor_frontend::DelimiterStringAST;
-using compressor_frontend::lexers::ByteLexer;
-using compressor_frontend::ParserAST;
-using compressor_frontend::SchemaFileAST;
-using compressor_frontend::SchemaParser;
-using compressor_frontend::SchemaVarAST;
+using log_surgeon::DelimiterStringAST;
+using log_surgeon::lexers::ByteLexer;
+using log_surgeon::ParserAST;
+using log_surgeon::SchemaAST;
+using log_surgeon::SchemaParser;
+using log_surgeon::SchemaVarAST;
 using std::string;
 
 TEST_CASE("get_bounds_of_next_potential_var", "[get_bounds_of_next_potential_var]") {
     ByteLexer forward_lexer;
-    compressor_frontend::load_lexer_from_file("../tests/test_schema_files/search_schema.txt", false, forward_lexer);
+    load_lexer_from_file("../tests/test_schema_files/search_schema.txt", false, forward_lexer);
     ByteLexer reverse_lexer;
-    compressor_frontend::load_lexer_from_file("../tests/test_schema_files/search_schema.txt", true, reverse_lexer);
+    load_lexer_from_file("../tests/test_schema_files/search_schema.txt", true, reverse_lexer);
 
     string str;
     size_t begin_pos;
diff --git a/components/core/tests/test-ParserWithUserSchema.cpp b/components/core/tests/test-ParserWithUserSchema.cpp
index ef11d30f5..b96fda3c4 100644
--- a/components/core/tests/test-ParserWithUserSchema.cpp
+++ b/components/core/tests/test-ParserWithUserSchema.cpp
@@ -1,4 +1,8 @@
+// TODO: move this test to log_surgeon
+// TODO: move load_lexer_from_file into SearchParser in log_surgeon
+
 // C libraries
+#include <string>
 #include <sys/stat.h>
 
 // Boost libraries
@@ -8,34 +12,34 @@
 // Catch2
 #include <Catch2/single_include/catch2/catch.hpp>
 
+// Log Surgeon
+#include <log_surgeon/LogParser.hpp>
+
 // Project headers
 #include "../src/clp/run.hpp"
-#include "../src/compressor_frontend/utils.hpp"
-#include "../src/compressor_frontend/LogParser.hpp"
 #include "../src/GlobalMySQLMetadataDB.hpp"
-
-using compressor_frontend::DelimiterStringAST;
-using compressor_frontend::LALR1Parser;
-using compressor_frontend::lexers::ByteLexer;
-using compressor_frontend::LogParser;
-using compressor_frontend::ParserAST;
-using compressor_frontend::SchemaFileAST;
-using compressor_frontend::SchemaParser;
-using compressor_frontend::SchemaVarAST;
-using compressor_frontend::Token;
-
-std::unique_ptr<SchemaFileAST> generate_schema_ast(const std::string& schema_file) {
+#include "../src/LogSurgeonReader.hpp"
+#include "../src/Utils.hpp"
+
+using log_surgeon::DelimiterStringAST;
+using log_surgeon::LALR1Parser;
+using log_surgeon::lexers::ByteLexer;
+using log_surgeon::LogParser;
+using log_surgeon::ParserAST;
+using log_surgeon::SchemaAST;
+using log_surgeon::SchemaParser;
+using log_surgeon::SchemaVarAST;
+using log_surgeon::Token;
+
+std::unique_ptr<SchemaAST> generate_schema_ast(const std::string& schema_file) {
     SchemaParser schema_parser;
-    FileReader schema_file_reader;
-    schema_file_reader.open(schema_file);
-    REQUIRE(schema_file_reader.is_open());
-    std::unique_ptr<SchemaFileAST> schema_ast = schema_parser.generate_schema_ast(schema_file_reader);
+    std::unique_ptr<SchemaAST> schema_ast = SchemaParser::try_schema_file(schema_file);
     REQUIRE(schema_ast.get() != nullptr);
     return schema_ast;
 }
 
 std::unique_ptr<LogParser> generate_log_parser(const std::string& schema_file) {
-    std::unique_ptr<SchemaFileAST> schema_ast = generate_schema_ast(schema_file);
+    std::unique_ptr<SchemaAST> schema_ast = generate_schema_ast(schema_file);
     std::unique_ptr<LogParser> log_parser = std::make_unique<LogParser>(schema_file);
     REQUIRE(log_parser.get() != nullptr);
     return log_parser;
@@ -44,7 +48,7 @@ std::unique_ptr<LogParser> generate_log_parser(const std::string& schema_file) {
 void compress(const std::string& output_dir, const std::string& file_to_compress, std::string schema_file, bool old = false) {
     std::vector<std::string> arguments;
     if(old) {
-        arguments = {"main.cpp", "c", output_dir, file_to_compress};        
+        arguments = {"main.cpp", "c", output_dir, file_to_compress};
     } else {
         arguments = {"main.cpp", "c", output_dir, file_to_compress, "--schema-path", std::move(schema_file)};
     }
@@ -68,32 +72,41 @@ void decompress(std::string archive_dir, std::string output_dir) {
 TEST_CASE("Test error for missing schema file", "[LALR1Parser][SchemaParser]") {
     std::string file_path = "../tests/test_schema_files/missing_schema.txt";
     std::string file_name = boost::filesystem::weakly_canonical(file_path).string();
-    REQUIRE_THROWS_WITH(generate_schema_ast(file_path), "File not found: " + file_name + "\n");
-    SPDLOG_INFO("File not found: " + file_name + "\n");
+    REQUIRE_THROWS_WITH(
+            generate_schema_ast(file_path),
+            "Failed to read '" + file_path + "', error_code="
+                    + std::to_string(static_cast<int>(log_surgeon::ErrorCode::FileNotFound))
+    );
 }
 
 TEST_CASE("Test error for empty schema file", "[LALR1Parser][SchemaParser]") {
     std::string file_path = "../tests/test_schema_files/empty_schema.txt";
-    std::string file_name = boost::filesystem::canonical(file_path).string();
-    REQUIRE_THROWS_WITH(generate_schema_ast(file_path), file_name +":1:1: error: empty file\n"
-                                                                  +"          \n"
-                                                                  +"^\n");
+    REQUIRE_THROWS_WITH(
+            generate_schema_ast(file_path),
+            "Schema:1:1: error: empty file\n"
+            "          \n"
+            "^\n"
+    );
 }
 
 TEST_CASE("Test error for colon missing schema file", "[LALR1Parser][SchemaParser]") {
     std::string file_path = "../tests/test_schema_files/colon_missing_schema.txt";
-    std::string file_name = boost::filesystem::canonical(file_path).string(); 
-    REQUIRE_THROWS_WITH(generate_schema_ast(file_path), file_name +":3:4: error: expected ':','AlphaNumeric' before ' ' token\n"
-                                                                  +"          int [0-9]+\n"
-                                                                  +"             ^\n");
+    REQUIRE_THROWS_WITH(
+            generate_schema_ast(file_path),
+            "Schema:3:4: error: expected ':','AlphaNumeric' before ' ' token\n"
+            "          int [0-9]+\n"
+            "             ^\n"
+    );
 }
 
 TEST_CASE("Test error for multi-character tokens in schema file", "[LALR1Parser][SchemaParser]") {
     std::string file_path = "../tests/test_schema_files/schema_with_multicharacter_token_error.txt";
-    std::string file_name = boost::filesystem::canonical(file_path).string();
-    REQUIRE_THROWS_WITH(generate_schema_ast(file_path), file_name +":2:11: error: expected ':' before ' ' token\n"
-                                                                  +"          delimiters : \\r\\n\n"
-                                                                  +"                    ^\n");
+    REQUIRE_THROWS_WITH(
+            generate_schema_ast(file_path),
+            "Schema:2:11: error: expected ':' before ' ' token\n"
+            "          delimiters : \\r\\n\n"
+            "                    ^\n"
+    );
 }
 
 TEST_CASE("Test creating schema parser", "[LALR1Parser][SchemaParser]") {
@@ -109,19 +122,26 @@ TEST_CASE("Test creating log parser without delimiters", "[LALR1Parser][LogParse
                         "When using --schema-path, \"delimiters:\" line must be used.");
 }
 
-TEST_CASE("Test error for creating log file with delimiter in regex pattern", "[LALR1Parser][SchemaParser]") {
-    std::string file_path = "../tests/test_schema_files/schema_with_delimiter_in_regex_error.txt";
-    std::string file_name = boost::filesystem::canonical(file_path).string();
-    REQUIRE_THROWS_WITH(generate_log_parser(file_path), file_name + ":2: error: 'equals' has regex pattern which contains delimiter '='.\n"
-                                                        + "          equals:.*=.*\n"
-                                                        + "                 ^^^^^\n");
-}
+// TODO: This test doesn't currently work because delimiters are allowed in
+// schema files, and there is no option to disable this yet
+//TEST_CASE("Test error for creating log file with delimiter in regex pattern",
+//          "[LALR1Parser]SchemaParser]") {
+//    std::string file_path = "../tests/test_schema_files/schema_with_delimiter_in_regex_error.txt";
+//    std::string file_name = boost::filesystem::canonical(file_path).string();
+//    REQUIRE_THROWS_WITH(generate_log_parser(file_path),
+//                        file_name +
+//                        ":2: error: 'equals' has regex pattern which contains delimiter '='.\n"
+//                        + "          equals:.*=.*\n"
+//                        + "                 ^^^^^\n");
+//}
 
-/// TODO: This error check is performed correctly by CLP, but it is handled by something different now so this test will fail as is
+// TODO: This error check is performed correctly by CLP, but it is handled by
+// something different now so this test will fail as is
 //TEST_CASE("Test error for missing log file", "[LALR1Parser][LogParser]") {
 //    std::string file_name = "../tests/test_log_files/missing_log.txt";
 //    std::string file_path = boost::filesystem::weakly_canonical(file_name).string();
-//    REQUIRE_THROWS(compress("../tests/test_archives", file_name, "../tests/test_schema_files/schema_that_does_not_exist.txt"),
+//    REQUIRE_THROWS(compress("../tests/test_archives", file_name,
+//                            "../tests/test_schema_files/schema_that_does_not_exist.txt"),
 //                   "Specified schema file does not exist.");
 //}
 
@@ -129,15 +149,21 @@ TEST_CASE("Test forward lexer", "[Search]") {
     ByteLexer forward_lexer;
     std::string schema_file_name = "../tests/test_schema_files/search_schema.txt";
     std::string schema_file_path = boost::filesystem::weakly_canonical(schema_file_name).string();
-    compressor_frontend::load_lexer_from_file(schema_file_path, false, forward_lexer);
-    FileReader reader;
-    reader.open("../tests/test_search_queries/easy.txt");
-    forward_lexer.reset(reader);
-    Token token = forward_lexer.scan();
-    while (token.m_type_ids->at(0) != (int)compressor_frontend::SymbolID::TokenEndID) {
-        SPDLOG_INFO("token:" + token.get_string() + "\n");
-        SPDLOG_INFO("token.m_type_ids->back():" + forward_lexer.m_id_symbol[token.m_type_ids->back()] + "\n");
-        token = forward_lexer.scan();
+    load_lexer_from_file(schema_file_path, false, forward_lexer);
+    FileReader file_reader;
+    LogSurgeonReader reader_wrapper(file_reader);
+    file_reader.open("../tests/test_search_queries/easy.txt");
+    log_surgeon::ParserInputBuffer parser_input_buffer;
+    parser_input_buffer.read_if_safe(reader_wrapper);
+    forward_lexer.reset();
+    Token token;
+    auto error_code = forward_lexer.scan(parser_input_buffer, token);
+    REQUIRE(error_code == log_surgeon::ErrorCode::Success);
+    while (token.m_type_ids_ptr->at(0) != static_cast<int>(log_surgeon::SymbolID::TokenEndID)) {
+        SPDLOG_INFO("token:" + token.to_string() + "\n");
+        SPDLOG_INFO("token.m_type_ids->back():" + forward_lexer.m_id_symbol[token.m_type_ids_ptr->back()] + "\n");
+        error_code = forward_lexer.scan(parser_input_buffer, token);
+        REQUIRE(error_code == log_surgeon::ErrorCode::Success);
     }
 }
 
@@ -145,14 +171,20 @@ TEST_CASE("Test reverse lexer", "[Search]") {
     ByteLexer reverse_lexer;
     std::string schema_file_name = "../tests/test_schema_files/search_schema.txt";
     std::string schema_file_path = boost::filesystem::weakly_canonical(schema_file_name).string();
-    compressor_frontend::load_lexer_from_file(schema_file_path, true, reverse_lexer);
-    FileReader reader;
-    reader.open("../tests/test_search_queries/easy.txt");
-    reverse_lexer.reset(reader);
-    Token token = reverse_lexer.scan();
-    while (token.m_type_ids->at(0) != (int)compressor_frontend::SymbolID::TokenEndID) {
-        SPDLOG_INFO("token:" + token.get_string() + "\n");
-        SPDLOG_INFO("token.m_type_ids->back():" + reverse_lexer.m_id_symbol[token.m_type_ids->back()] + "\n");
-        token = reverse_lexer.scan();
+    load_lexer_from_file(schema_file_path, false, reverse_lexer);
+    FileReader file_reader;
+    LogSurgeonReader reader_wrapper(file_reader);
+    file_reader.open("../tests/test_search_queries/easy.txt");
+    log_surgeon::ParserInputBuffer parser_input_buffer;
+    parser_input_buffer.read_if_safe(reader_wrapper);
+    reverse_lexer.reset();
+    Token token;
+    auto error_code = reverse_lexer.scan(parser_input_buffer, token);
+    REQUIRE(error_code == log_surgeon::ErrorCode::Success);
+    while (token.m_type_ids_ptr->at(0) != static_cast<int>(log_surgeon::SymbolID::TokenEndID)) {
+        SPDLOG_INFO("token:" + token.to_string() + "\n");
+        SPDLOG_INFO("token.m_type_ids->back():" + reverse_lexer.m_id_symbol[token.m_type_ids_ptr->back()] + "\n");
+        error_code = reverse_lexer.scan(parser_input_buffer, token);
+        REQUIRE(error_code == log_surgeon::ErrorCode::Success);
     }
 }
diff --git a/components/core/tests/test_log_files/log.txt b/components/core/tests/test_log_files/log.txt
index 51309fc85..185e4723d 100644
--- a/components/core/tests/test_log_files/log.txt
+++ b/components/core/tests/test_log_files/log.txt
@@ -1,6 +1,7 @@
 2016-05-08 07:34:05.251 MyDog123 APet4123\test.txt
 2016-05-08 07:34:05.252 statictext123
-2016-05-08 07:34:05.253 123
+2016-05-08 07:34:05.253 123 1.9 GB out of 4.2 GB data
 2016-05-08 07:34:05.254 123.123
+is multiline
 2016-05-08 07:34:05.255 Some Static Text Then MyDog123 APet4123\test.txt Then 123 then 123.123
-123123 relative timestamp
\ No newline at end of file
+123123 relative timestamp
diff --git a/components/core/tests/test_schema_files/colon_missing_schema.txt b/components/core/tests/test_schema_files/colon_missing_schema.txt
index 0e063a696..d2c25cfbf 100644
--- a/components/core/tests/test_schema_files/colon_missing_schema.txt
+++ b/components/core/tests/test_schema_files/colon_missing_schema.txt
@@ -1,3 +1,3 @@
 delimiters: 
-double:[0-9]+\.[0-9]+
+float:[0-9]+\.[0-9]+
 int [0-9]+
\ No newline at end of file
diff --git a/components/core/tests/test_schema_files/real_schema.txt b/components/core/tests/test_schema_files/real_schema.txt
index 4a72dff29..3c2cb6e29 100644
--- a/components/core/tests/test_schema_files/real_schema.txt
+++ b/components/core/tests/test_schema_files/real_schema.txt
@@ -4,7 +4,7 @@ delimiters: \r\n
 // First set of variables
 timestamp:[0-9]{4}\-[0-9]{2}\-[0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2}[,\.][0-9]{0,3}
 int:\-{0,1}[0-9]+
-double:\-{0,1}[0-9]+\.[0-9]+
+float:\-{0,1}[0-9]+\.[0-9]+
 
 // Second set of variables
 hex:[a-fA-F]+
diff --git a/components/core/tests/test_schema_files/schema_with_delimiter_in_regex_error.txt b/components/core/tests/test_schema_files/schema_with_delimiter_in_regex_error.txt
index 9bd2488c2..7491d1580 100644
--- a/components/core/tests/test_schema_files/schema_with_delimiter_in_regex_error.txt
+++ b/components/core/tests/test_schema_files/schema_with_delimiter_in_regex_error.txt
@@ -4,4 +4,4 @@ identifier:(My.og)\d{3}APet[0-9]*\\test\.txt
 timestamp:[0-9]{4}\-[0-9]{2}\-[0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2}\.[0-9]{3}
 serverName:(S|s)erver[0-9]*
 int:[0-9]+
-double:[0-9]+\.[0-9]+
\ No newline at end of file
+float:[0-9]+\.[0-9]+
\ No newline at end of file
diff --git a/components/core/tests/test_schema_files/schema_with_delimiters.txt b/components/core/tests/test_schema_files/schema_with_delimiters.txt
index 0b0f9af9f..532dba9de 100644
--- a/components/core/tests/test_schema_files/schema_with_delimiters.txt
+++ b/components/core/tests/test_schema_files/schema_with_delimiters.txt
@@ -3,4 +3,4 @@ identifier:(My.og)\d{3}APet[0-9]*\\test\.txt
 timestamp:[0-9]{4}\-[0-9]{2}\-[0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2}\.[0-9]{3}
 serverName:(S|s)erver[0-9]*
 int:[0-9]+
-double:[0-9]+\.[0-9]+
\ No newline at end of file
+float:[0-9]+\.[0-9]+
\ No newline at end of file
diff --git a/components/core/tests/test_schema_files/schema_with_multicharacter_token_error.txt b/components/core/tests/test_schema_files/schema_with_multicharacter_token_error.txt
index 5fa7f41ea..efe3fff1a 100644
--- a/components/core/tests/test_schema_files/schema_with_multicharacter_token_error.txt
+++ b/components/core/tests/test_schema_files/schema_with_multicharacter_token_error.txt
@@ -4,7 +4,7 @@ delimiters : \r\n
 // First set of variables
 timestamp:[0-9]{4}\-[0-9]{2}\-[0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2}\.[0-9]{3}
 int:\-{0,1}[0-9]+
-double:\-{0,1}[0-9]+\.[0-9]+
+float:\-{0,1}[0-9]+\.[0-9]+
 
 // Second set of variables
 hex:[a-fA-F]+
diff --git a/components/core/tests/test_schema_files/schema_without_delimiters.txt b/components/core/tests/test_schema_files/schema_without_delimiters.txt
index 7b25296d4..ea28b6142 100644
--- a/components/core/tests/test_schema_files/schema_without_delimiters.txt
+++ b/components/core/tests/test_schema_files/schema_without_delimiters.txt
@@ -2,4 +2,4 @@ identifier:(My.og)\d{3}\sAPet[0-9]*\\test\.txt
 timestamp:[0-9]{4}\-[0-9]{2}\-[0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2}\.[0-9]{3}
 serverName:(S|s)erver[0-9]*
 int:[0-9]+
-double:[0-9]+\.[0-9]+
\ No newline at end of file
+float:[0-9]+\.[0-9]+
\ No newline at end of file
diff --git a/components/core/tests/test_schema_files/search_schema.txt b/components/core/tests/test_schema_files/search_schema.txt
index 73f11db6b..f49a6dbfa 100644
--- a/components/core/tests/test_schema_files/search_schema.txt
+++ b/components/core/tests/test_schema_files/search_schema.txt
@@ -4,7 +4,7 @@ delimiters: \r\n:,=!;%?
 // First set of variables
 timestamp:[0-9]{4}\-[0-9]{2}\-[0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2}(\.[0-9]{3}){0,1}
 int:\-{0,1}[0-9]+
-double:\-{0,1}[0-9]+\.[0-9]+
+float:\-{0,1}[0-9]+\.[0-9]+
 
 // Second set of variables
 hex:[a-fA-F]+
diff --git a/components/core/tools/docker-images/clp-env-base-centos7.4/Dockerfile b/components/core/tools/docker-images/clp-env-base-centos7.4/Dockerfile
index d93d575a8..fea78e668 100644
--- a/components/core/tools/docker-images/clp-env-base-centos7.4/Dockerfile
+++ b/components/core/tools/docker-images/clp-env-base-centos7.4/Dockerfile
@@ -13,8 +13,8 @@ RUN ./tools/scripts/lib_install/centos7.4/install-all.sh
 # Set PKG_CONFIG_PATH since CentOS doesn't look in /usr/local by default
 ENV PKG_CONFIG_PATH /usr/local/lib64/pkgconfig:/usr/local/lib/pkgconfig
 
-# Enable gcc 9 in login shells and non-interactive non-login shells
-RUN ln -s /opt/rh/devtoolset-9/enable /etc/profile.d/devtoolset.sh
+# Enable gcc 10 in login shells and non-interactive non-login shells
+RUN ln -s /opt/rh/devtoolset-10/enable /etc/profile.d/devtoolset.sh
 
 # Enable git 2.27
 # NOTE: We use a script to enable the SCL git package on each git call because some Github actions
diff --git a/components/core/tools/docker-images/clp-env-base-ubuntu-focal/Dockerfile b/components/core/tools/docker-images/clp-env-base-ubuntu-focal/Dockerfile
index 794ad77c9..60c307818 100644
--- a/components/core/tools/docker-images/clp-env-base-ubuntu-focal/Dockerfile
+++ b/components/core/tools/docker-images/clp-env-base-ubuntu-focal/Dockerfile
@@ -7,6 +7,12 @@ ADD ./tools/scripts/lib_install ./tools/scripts/lib_install
 
 RUN ./tools/scripts/lib_install/ubuntu-focal/install-all.sh
 
+# Set the compiler to gcc-10
+RUN update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-10 10
+RUN update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-10 10
+RUN update-alternatives --set gcc /usr/bin/gcc-10
+RUN update-alternatives --set g++ /usr/bin/g++-10
+
 # Reset the working directory so that it's accessible by any user who runs the
 # container
 WORKDIR /
diff --git a/components/core/tools/scripts/lib_install/centos7.4/README.md b/components/core/tools/scripts/lib_install/centos7.4/README.md
index 0662e53aa..d529c0d03 100644
--- a/components/core/tools/scripts/lib_install/centos7.4/README.md
+++ b/components/core/tools/scripts/lib_install/centos7.4/README.md
@@ -17,10 +17,10 @@ will not install any dependencies you don't expect.
 
 # Setup dependencies
 
-* Enable gcc 9
+* Enable gcc 10
 
   ```bash
-  ln -s /opt/rh/devtoolset-9/enable /etc/profile.d/devtoolset.sh
+  ln -s /opt/rh/devtoolset-10/enable /etc/profile.d/devtoolset.sh
   ```
 
 * Set PKG_CONFIG_PATH since CentOS doesn't look in `/usr/local` by default.
diff --git a/components/core/tools/scripts/lib_install/centos7.4/install-packages-from-source.sh b/components/core/tools/scripts/lib_install/centos7.4/install-packages-from-source.sh
index 2c911912d..daeef06be 100755
--- a/components/core/tools/scripts/lib_install/centos7.4/install-packages-from-source.sh
+++ b/components/core/tools/scripts/lib_install/centos7.4/install-packages-from-source.sh
@@ -1,7 +1,7 @@
 #!/usr/bin/env bash
 
-# Enable gcc 9
-source /opt/rh/devtoolset-9/enable
+# Enable gcc 10
+source /opt/rh/devtoolset-10/enable
 
 # NOTE: cmake and boost must be installed first since the remaining packages depend on them
 ./tools/scripts/lib_install/install-cmake.sh 3.21.2
diff --git a/components/core/tools/scripts/lib_install/centos7.4/install-prebuilt-packages.sh b/components/core/tools/scripts/lib_install/centos7.4/install-prebuilt-packages.sh
index aab2e8168..e9398083b 100755
--- a/components/core/tools/scripts/lib_install/centos7.4/install-prebuilt-packages.sh
+++ b/components/core/tools/scripts/lib_install/centos7.4/install-prebuilt-packages.sh
@@ -14,5 +14,5 @@ yum install -y \
 
 # Install packages from CentOS' software collections repository (centos-release-scl)
 yum install -y \
-  devtoolset-9 \
+  devtoolset-10 \
   rh-git227
diff --git a/components/core/tools/scripts/lib_install/ubuntu-focal/install-prebuilt-packages.sh b/components/core/tools/scripts/lib_install/ubuntu-focal/install-prebuilt-packages.sh
index 67e165d76..4ee5a0359 100755
--- a/components/core/tools/scripts/lib_install/ubuntu-focal/install-prebuilt-packages.sh
+++ b/components/core/tools/scripts/lib_install/ubuntu-focal/install-prebuilt-packages.sh
@@ -8,6 +8,8 @@ DEBIAN_FRONTEND=noninteractive apt-get install -y \
   curl \
   build-essential \
   git \
+  g++-10 \
+  gcc-10 \
   libboost-filesystem-dev \
   libboost-iostreams-dev \
   libboost-program-options-dev \
diff --git a/components/package-template/src/etc/clp-schema.template.txt b/components/package-template/src/etc/clp-schema.template.txt
index d1d480308..f026b5612 100644
--- a/components/package-template/src/etc/clp-schema.template.txt
+++ b/components/package-template/src/etc/clp-schema.template.txt
@@ -49,7 +49,7 @@ timestamp:\d{4}\-\d{2}\-\d{2} \d{2}:\d{2}:\d{2}.\d{6}
 
 // Specially-encoded variables (using the `int` and `double` keywords)
 int:\-{0,1}[0-9]+
-double:\-{0,1}[0-9]+\.[0-9]+
+float:\-{0,1}[0-9]+\.[0-9]+
 
 // Dictionary variables
 hex:[a-fA-F]+