From 12a5f8dfcb6cd7d83d9bc0b12cafed0efd6ae9fb Mon Sep 17 00:00:00 2001
From: Devin Gibson <gibber9809@users.noreply.github.com>
Date: Tue, 19 Nov 2024 11:02:58 -0500
Subject: [PATCH] feat(clp-s): Record log-order at compression time. (#584)

Co-authored-by: wraymo <37269683+wraymo@users.noreply.github.com>
---
 components/core/src/clp_s/ArchiveReader.cpp   |  9 +++
 components/core/src/clp_s/ArchiveReader.hpp   |  6 ++
 components/core/src/clp_s/ArchiveWriter.cpp   |  2 +
 components/core/src/clp_s/ArchiveWriter.hpp   |  9 ++-
 .../core/src/clp_s/CommandLineArguments.cpp   |  8 ++-
 .../core/src/clp_s/CommandLineArguments.hpp   |  3 +
 components/core/src/clp_s/JsonConstructor.cpp | 28 +++++---
 components/core/src/clp_s/JsonConstructor.hpp |  2 +-
 components/core/src/clp_s/JsonParser.cpp      | 34 ++++++++-
 components/core/src/clp_s/JsonParser.hpp      | 23 +++++--
 components/core/src/clp_s/JsonSerializer.hpp  |  7 +-
 components/core/src/clp_s/ReaderUtils.cpp     |  2 +-
 components/core/src/clp_s/SchemaReader.cpp    | 23 +++++--
 components/core/src/clp_s/SchemaReader.hpp    | 21 +++++-
 components/core/src/clp_s/SchemaTree.cpp      | 39 ++++++++---
 components/core/src/clp_s/SchemaTree.hpp      | 69 ++++++++++++++++---
 .../core/src/clp_s/archive_constants.hpp      | 16 +++++
 components/core/src/clp_s/clp-s.cpp           |  1 +
 .../src/clp_s/search/ColumnDescriptor.hpp     |  3 +-
 components/core/src/clp_s/search/Output.cpp   | 28 +++++++-
 components/core/src/clp_s/search/Output.hpp   |  7 ++
 .../core/src/clp_s/search/OutputHandler.cpp   | 55 +++++++++++----
 .../core/src/clp_s/search/OutputHandler.hpp   | 62 ++++++++++++-----
 .../core/src/clp_s/search/Projection.cpp      |  2 +-
 .../core/src/clp_s/search/SchemaMatch.cpp     |  8 +--
 .../core/src/clp_s/search/SearchUtils.cpp     |  1 +
 26 files changed, 377 insertions(+), 91 deletions(-)

diff --git a/components/core/src/clp_s/ArchiveReader.cpp b/components/core/src/clp_s/ArchiveReader.cpp
index 5362d32cc..7c68b301d 100644
--- a/components/core/src/clp_s/ArchiveReader.cpp
+++ b/components/core/src/clp_s/ArchiveReader.cpp
@@ -27,6 +27,8 @@ void ArchiveReader::open(string_view archives_dir, string_view archive_id) {
     m_schema_tree = ReaderUtils::read_schema_tree(archive_path_str);
     m_schema_map = ReaderUtils::read_schemas(archive_path_str);
 
+    m_log_event_idx_column_id = m_schema_tree->get_metadata_field_id(constants::cLogEventIdxName);
+
     m_table_metadata_file_reader.open(archive_path_str + constants::cArchiveTableMetadataFile);
     m_stream_reader.open_packed_streams(archive_path_str + constants::cArchiveTablesFile);
 }
@@ -310,6 +312,12 @@ void ArchiveReader::initialize_schema_reader(
         }
         BaseColumnReader* column_reader = append_reader_column(reader, column_id);
 
+        if (column_id == m_log_event_idx_column_id
+            && nullptr != dynamic_cast<Int64ColumnReader*>(column_reader))
+        {
+            reader.mark_column_as_log_event_idx(static_cast<Int64ColumnReader*>(column_reader));
+        }
+
         if (should_extract_timestamp && column_reader && timestamp_column_ids.count(column_id) > 0)
         {
             reader.mark_column_as_timestamp(column_reader);
@@ -346,6 +354,7 @@ void ArchiveReader::close() {
     m_cur_stream_id = 0;
     m_stream_buffer.reset();
     m_stream_buffer_size = 0ULL;
+    m_log_event_idx_column_id = -1;
 }
 
 std::shared_ptr<char[]> ArchiveReader::read_stream(size_t stream_id, bool reuse_buffer) {
diff --git a/components/core/src/clp_s/ArchiveReader.hpp b/components/core/src/clp_s/ArchiveReader.hpp
index 41073ec84..6b437dfd2 100644
--- a/components/core/src/clp_s/ArchiveReader.hpp
+++ b/components/core/src/clp_s/ArchiveReader.hpp
@@ -142,6 +142,11 @@ class ArchiveReader {
         m_projection = projection;
     }
 
+    /**
+     * @return true if this archive has log ordering information, and false otherwise.
+     */
+    bool has_log_order() { return m_log_event_idx_column_id >= 0; }
+
 private:
     /**
      * Initializes a schema reader passed by reference to become a reader for a given schema.
@@ -214,6 +219,7 @@ class ArchiveReader {
     std::shared_ptr<char[]> m_stream_buffer{};
     size_t m_stream_buffer_size{0ULL};
     size_t m_cur_stream_id{0ULL};
+    int32_t m_log_event_idx_column_id{-1};
 };
 }  // namespace clp_s
 
diff --git a/components/core/src/clp_s/ArchiveWriter.cpp b/components/core/src/clp_s/ArchiveWriter.cpp
index 369fd79d2..7118ce88b 100644
--- a/components/core/src/clp_s/ArchiveWriter.cpp
+++ b/components/core/src/clp_s/ArchiveWriter.cpp
@@ -68,6 +68,7 @@ void ArchiveWriter::close() {
     m_encoded_message_size = 0UL;
     m_uncompressed_size = 0UL;
     m_compressed_size = 0UL;
+    m_next_log_event_id = 0;
 }
 
 void ArchiveWriter::append_message(
@@ -86,6 +87,7 @@ void ArchiveWriter::append_message(
     }
 
     m_encoded_message_size += schema_writer->append_message(message);
+    ++m_next_log_event_id;
 }
 
 size_t ArchiveWriter::get_data_size() {
diff --git a/components/core/src/clp_s/ArchiveWriter.hpp b/components/core/src/clp_s/ArchiveWriter.hpp
index 7edfe4491..87e9d11e5 100644
--- a/components/core/src/clp_s/ArchiveWriter.hpp
+++ b/components/core/src/clp_s/ArchiveWriter.hpp
@@ -1,6 +1,7 @@
 #ifndef CLP_S_ARCHIVEWRITER_HPP
 #define CLP_S_ARCHIVEWRITER_HPP
 
+#include <string_view>
 #include <utility>
 
 #include <boost/filesystem.hpp>
@@ -93,10 +94,15 @@ class ArchiveWriter {
      * @param key
      * @return the node id
      */
-    int32_t add_node(int parent_node_id, NodeType type, std::string const& key) {
+    int32_t add_node(int parent_node_id, NodeType type, std::string_view const key) {
         return m_schema_tree.add_node(parent_node_id, type, key);
     }
 
+    /**
+     * @return The Id that will be assigned to the next log event when appended to the archive.
+     */
+    int64_t get_next_log_event_id() const { return m_next_log_event_id; }
+
     /**
      * Return a schema's Id and add the schema to the
      * schema map if it does not already exist.
@@ -174,6 +180,7 @@ class ArchiveWriter {
     size_t m_encoded_message_size{};
     size_t m_uncompressed_size{};
     size_t m_compressed_size{};
+    int64_t m_next_log_event_id{};
 
     std::string m_id;
 
diff --git a/components/core/src/clp_s/CommandLineArguments.cpp b/components/core/src/clp_s/CommandLineArguments.cpp
index cf69a066c..ace505788 100644
--- a/components/core/src/clp_s/CommandLineArguments.cpp
+++ b/components/core/src/clp_s/CommandLineArguments.cpp
@@ -194,6 +194,10 @@ CommandLineArguments::parse_arguments(int argc, char const** argv) {
                     "structurize-arrays",
                     po::bool_switch(&m_structurize_arrays),
                     "Structurize arrays instead of compressing them as clp strings."
+            )(
+                    "disable-log-order",
+                    po::bool_switch(&m_disable_log_order),
+                    "Do not record log order at ingestion time."
             );
             // clang-format on
 
@@ -296,13 +300,13 @@ CommandLineArguments::parse_arguments(int argc, char const** argv) {
             decompression_options.add_options()(
                     "ordered",
                     po::bool_switch(&m_ordered_decompression),
-                    "Enable decompression in ascending timestamp order for this archive"
+                    "Enable decompression in log order for this archive"
             )(
                     "ordered-chunk-size",
                     po::value<size_t>(&m_ordered_chunk_size)
                             ->default_value(m_ordered_chunk_size),
                     "Number of records to include in each output file when decompressing records "
-                    "in ascending timestamp order"
+                    "in log order"
             );
             // clang-format on
             extraction_options.add(decompression_options);
diff --git a/components/core/src/clp_s/CommandLineArguments.hpp b/components/core/src/clp_s/CommandLineArguments.hpp
index 798e42728..8f2d79d8f 100644
--- a/components/core/src/clp_s/CommandLineArguments.hpp
+++ b/components/core/src/clp_s/CommandLineArguments.hpp
@@ -112,6 +112,8 @@ class CommandLineArguments {
 
     std::vector<std::string> const& get_projection_columns() const { return m_projection_columns; }
 
+    bool get_record_log_order() const { return false == m_disable_log_order; }
+
 private:
     // Methods
     /**
@@ -178,6 +180,7 @@ class CommandLineArguments {
     bool m_ordered_decompression{false};
     size_t m_ordered_chunk_size{0};
     size_t m_minimum_table_size{1ULL * 1024 * 1024};  // 1 MB
+    bool m_disable_log_order{false};
 
     // Metadata db variables
     std::optional<clp::GlobalMetadataDBConfig> m_metadata_db_config;
diff --git a/components/core/src/clp_s/JsonConstructor.cpp b/components/core/src/clp_s/JsonConstructor.cpp
index 90887f1e5..0c816c5e3 100644
--- a/components/core/src/clp_s/JsonConstructor.cpp
+++ b/components/core/src/clp_s/JsonConstructor.cpp
@@ -48,7 +48,13 @@ void JsonConstructor::store() {
     m_archive_reader = std::make_unique<ArchiveReader>();
     m_archive_reader->open(m_option.archives_dir, m_option.archive_id);
     m_archive_reader->read_dictionaries_and_metadata();
-    if (false == m_option.ordered) {
+
+    if (m_option.ordered && false == m_archive_reader->has_log_order()) {
+        SPDLOG_WARN("This archive is missing ordering information and can not be decompressed in "
+                    "log order. Falling back to out of order decompression.");
+    }
+
+    if (false == m_option.ordered || false == m_archive_reader->has_log_order()) {
         FileWriter writer;
         writer.open(
                 m_option.output_dir + "/original",
@@ -68,15 +74,15 @@ void JsonConstructor::construct_in_order() {
     auto tables = m_archive_reader->read_all_tables();
     using ReaderPointer = std::shared_ptr<SchemaReader>;
     auto cmp = [](ReaderPointer& left, ReaderPointer& right) {
-        return left->get_next_timestamp() > right->get_next_timestamp();
+        return left->get_next_log_event_idx() > right->get_next_log_event_idx();
     };
     std::priority_queue record_queue(tables.begin(), tables.end(), cmp);
     // Clear tables vector so that memory gets deallocated after we have marshalled all records for
     // a given table
     tables.clear();
 
-    epochtime_t first_timestamp{0};
-    epochtime_t last_timestamp{0};
+    int64_t first_idx{0};
+    int64_t last_idx{0};
     size_t num_records_marshalled{0};
     auto src_path = std::filesystem::path(m_option.output_dir) / m_option.archive_id;
     FileWriter writer;
@@ -97,9 +103,11 @@ void JsonConstructor::construct_in_order() {
 
     std::vector<bsoncxx::document::value> results;
     auto finalize_chunk = [&](bool open_new_writer) {
+        // Add one to last_idx to match clp's behaviour of having the end index be exclusive
+        ++last_idx;
         writer.close();
-        std::string new_file_name = src_path.string() + "_" + std::to_string(first_timestamp) + "_"
-                                    + std::to_string(last_timestamp) + ".jsonl";
+        std::string new_file_name = src_path.string() + "_" + std::to_string(first_idx) + "_"
+                                    + std::to_string(last_idx) + ".jsonl";
         auto new_file_path = std::filesystem::path(new_file_name);
         std::error_code ec;
         std::filesystem::rename(src_path, new_file_path, ec);
@@ -119,11 +127,11 @@ void JsonConstructor::construct_in_order() {
                     ),
                     bsoncxx::builder::basic::kvp(
                             constants::results_cache::decompression::cBeginMsgIx,
-                            static_cast<int64_t>(first_timestamp)
+                            first_idx
                     ),
                     bsoncxx::builder::basic::kvp(
                             constants::results_cache::decompression::cEndMsgIx,
-                            static_cast<int64_t>(last_timestamp)
+                            last_idx
                     ),
                     bsoncxx::builder::basic::kvp(
                             constants::results_cache::decompression::cIsLastIrChunk,
@@ -140,9 +148,9 @@ void JsonConstructor::construct_in_order() {
     while (false == record_queue.empty()) {
         ReaderPointer next = record_queue.top();
         record_queue.pop();
-        last_timestamp = next->get_next_timestamp();
+        last_idx = next->get_next_log_event_idx();
         if (0 == num_records_marshalled) {
-            first_timestamp = last_timestamp;
+            first_idx = last_idx;
         }
         next->get_next_message(buffer);
         if (false == next->done()) {
diff --git a/components/core/src/clp_s/JsonConstructor.hpp b/components/core/src/clp_s/JsonConstructor.hpp
index f1f71f9d8..c38e6d00b 100644
--- a/components/core/src/clp_s/JsonConstructor.hpp
+++ b/components/core/src/clp_s/JsonConstructor.hpp
@@ -66,7 +66,7 @@ class JsonConstructor {
 private:
     /**
      * Reads all of the tables from m_archive_reader and writes all of the records
-     * they contain to writer in timestamp order.
+     * they contain to writer in log order.
      */
     void construct_in_order();
 
diff --git a/components/core/src/clp_s/JsonParser.cpp b/components/core/src/clp_s/JsonParser.cpp
index 5336c367a..9e8293510 100644
--- a/components/core/src/clp_s/JsonParser.cpp
+++ b/components/core/src/clp_s/JsonParser.cpp
@@ -15,7 +15,8 @@ JsonParser::JsonParser(JsonParserOption const& option)
           m_target_encoded_size(option.target_encoded_size),
           m_max_document_size(option.max_document_size),
           m_timestamp_key(option.timestamp_key),
-          m_structurize_arrays(option.structurize_arrays) {
+          m_structurize_arrays(option.structurize_arrays),
+          m_record_log_order(option.record_log_order) {
     if (false == FileUtils::validate_path(option.file_paths)) {
         exit(1);
     }
@@ -447,6 +448,16 @@ bool JsonParser::parse() {
         m_num_messages = 0;
         size_t bytes_consumed_up_to_prev_archive = 0;
         size_t bytes_consumed_up_to_prev_record = 0;
+
+        int32_t log_event_idx_node_id{};
+        auto add_log_event_idx_node = [&]() {
+            if (m_record_log_order) {
+                log_event_idx_node_id
+                        = add_metadata_field(constants::cLogEventIdxName, NodeType::Integer);
+            }
+        };
+        add_log_event_idx_node();
+
         while (json_file_iterator.get_json(json_it)) {
             m_current_schema.clear();
 
@@ -467,11 +478,20 @@ bool JsonParser::parse() {
                 return false;
             }
 
+            // Add log_event_idx field to metadata for record
+            if (m_record_log_order) {
+                m_current_parsed_message.add_value(
+                        log_event_idx_node_id,
+                        m_archive_writer->get_next_log_event_id()
+                );
+                m_current_schema.insert_ordered(log_event_idx_node_id);
+            }
+
             // Some errors from simdjson are latent until trying to access invalid JSON fields.
             // Instead of checking for an error every time we access a JSON field in parse_line we
             // just catch simdjson_error here instead.
             try {
-                parse_line(ref.value(), -1, "");
+                parse_line(ref.value(), constants::cRootNodeId, constants::cRootNodeName);
             } catch (simdjson::simdjson_error& error) {
                 SPDLOG_ERROR(
                         "Encountered error - {} - while trying to parse {} after parsing {} bytes",
@@ -496,6 +516,7 @@ bool JsonParser::parse() {
                 );
                 bytes_consumed_up_to_prev_archive = bytes_consumed_up_to_prev_record;
                 split_archive();
+                add_log_event_idx_node();
             }
 
             m_current_parsed_message.clear();
@@ -526,6 +547,15 @@ bool JsonParser::parse() {
     return true;
 }
 
+int32_t JsonParser::add_metadata_field(std::string_view const field_name, NodeType type) {
+    auto metadata_subtree_id = m_archive_writer->add_node(
+            constants::cRootNodeId,
+            NodeType::Metadata,
+            constants::cMetadataSubtreeName
+    );
+    return m_archive_writer->add_node(metadata_subtree_id, type, field_name);
+}
+
 void JsonParser::store() {
     m_archive_writer->close();
 }
diff --git a/components/core/src/clp_s/JsonParser.hpp b/components/core/src/clp_s/JsonParser.hpp
index af6b024ef..d7cc5a2fe 100644
--- a/components/core/src/clp_s/JsonParser.hpp
+++ b/components/core/src/clp_s/JsonParser.hpp
@@ -3,6 +3,7 @@
 
 #include <map>
 #include <string>
+#include <string_view>
 #include <variant>
 #include <vector>
 
@@ -30,12 +31,13 @@ struct JsonParserOption {
     std::vector<std::string> file_paths;
     std::string timestamp_key;
     std::string archives_dir;
-    size_t target_encoded_size;
-    size_t max_document_size;
-    size_t min_table_size;
-    int compression_level;
-    bool print_archive_stats;
-    bool structurize_arrays;
+    size_t target_encoded_size{};
+    size_t max_document_size{};
+    size_t min_table_size{};
+    int compression_level{};
+    bool print_archive_stats{};
+    bool structurize_arrays{};
+    bool record_log_order{true};
     std::shared_ptr<clp::GlobalMySQLMetadataDB> metadata_db;
 };
 
@@ -94,6 +96,14 @@ class JsonParser {
      */
     void split_archive();
 
+    /**
+     * Adds an internal field to the MPT and get its Id.
+     *
+     * Note: this method should be called before parsing a record so that internal fields come first
+     * in each table. This isn't strictly necessary, but it is a nice convention.
+     */
+    int32_t add_metadata_field(std::string_view const field_name, NodeType type);
+
     int m_num_messages;
     std::vector<std::string> m_file_paths;
 
@@ -109,6 +119,7 @@ class JsonParser {
     size_t m_target_encoded_size;
     size_t m_max_document_size;
     bool m_structurize_arrays{false};
+    bool m_record_log_order{true};
 };
 }  // namespace clp_s
 
diff --git a/components/core/src/clp_s/JsonSerializer.hpp b/components/core/src/clp_s/JsonSerializer.hpp
index ff46dfa24..63f845525 100644
--- a/components/core/src/clp_s/JsonSerializer.hpp
+++ b/components/core/src/clp_s/JsonSerializer.hpp
@@ -2,6 +2,7 @@
 #define CLP_S_JSONSERIALIZER_HPP
 
 #include <string>
+#include <string_view>
 #include <vector>
 
 #include "ColumnReader.hpp"
@@ -66,7 +67,7 @@ class JsonSerializer {
         return false;
     }
 
-    void add_special_key(std::string const& key) { m_special_keys.push_back(key); }
+    void add_special_key(std::string_view const key) { m_special_keys.emplace_back(key); }
 
     void begin_object() {
         append_key();
@@ -110,13 +111,13 @@ class JsonSerializer {
 
     void append_key() { append_key(m_special_keys[m_special_keys_index++]); }
 
-    void append_key(std::string const& key) {
+    void append_key(std::string_view const key) {
         m_json_string += "\"";
         m_json_string += key;
         m_json_string += "\":";
     }
 
-    void append_value(std::string const& value) {
+    void append_value(std::string_view const value) {
         m_json_string += value;
         m_json_string += ",";
     }
diff --git a/components/core/src/clp_s/ReaderUtils.cpp b/components/core/src/clp_s/ReaderUtils.cpp
index 11583a60d..a2ab5a34a 100644
--- a/components/core/src/clp_s/ReaderUtils.cpp
+++ b/components/core/src/clp_s/ReaderUtils.cpp
@@ -18,10 +18,10 @@ std::shared_ptr<SchemaTree> ReaderUtils::read_schema_tree(std::string const& arc
         throw OperationFailed(error_code, __FILENAME__, __LINE__);
     }
 
+    std::string key;
     for (size_t i = 0; i < num_nodes; i++) {
         int32_t parent_id;
         size_t key_length;
-        std::string key;
         uint8_t node_type;
 
         error_code = schema_tree_decompressor.try_read_numeric_value(parent_id);
diff --git a/components/core/src/clp_s/SchemaReader.cpp b/components/core/src/clp_s/SchemaReader.cpp
index 35c1d0a6e..8120ab323 100644
--- a/components/core/src/clp_s/SchemaReader.cpp
+++ b/components/core/src/clp_s/SchemaReader.cpp
@@ -37,6 +37,13 @@ void SchemaReader::mark_column_as_timestamp(BaseColumnReader* column_reader) {
     }
 }
 
+int64_t SchemaReader::get_next_log_event_idx() const {
+    if (nullptr != m_log_event_idx_column) {
+        return std::get<int64_t>(m_log_event_idx_column->extract_value(m_cur_message));
+    }
+    return 0;
+}
+
 void SchemaReader::load(
         std::shared_ptr<char[]> stream_buffer,
         size_t offset,
@@ -86,7 +93,7 @@ void SchemaReader::generate_json_string() {
             }
             case JsonSerializer::Op::AddIntField: {
                 column = m_reordered_columns[column_id_index++];
-                auto const& name = m_global_schema_tree->get_node(column->get_id()).get_key_name();
+                auto name = m_global_schema_tree->get_node(column->get_id()).get_key_name();
                 m_json_serializer.append_key(name);
                 m_json_serializer.append_value(
                         std::to_string(std::get<int64_t>(column->extract_value(m_cur_message)))
@@ -102,7 +109,7 @@ void SchemaReader::generate_json_string() {
             }
             case JsonSerializer::Op::AddFloatField: {
                 column = m_reordered_columns[column_id_index++];
-                auto const& name = m_global_schema_tree->get_node(column->get_id()).get_key_name();
+                auto name = m_global_schema_tree->get_node(column->get_id()).get_key_name();
                 m_json_serializer.append_key(name);
                 m_json_serializer.append_value(
                         std::to_string(std::get<double>(column->extract_value(m_cur_message)))
@@ -118,7 +125,7 @@ void SchemaReader::generate_json_string() {
             }
             case JsonSerializer::Op::AddBoolField: {
                 column = m_reordered_columns[column_id_index++];
-                auto const& name = m_global_schema_tree->get_node(column->get_id()).get_key_name();
+                auto name = m_global_schema_tree->get_node(column->get_id()).get_key_name();
                 m_json_serializer.append_key(name);
                 m_json_serializer.append_value(
                         std::get<uint8_t>(column->extract_value(m_cur_message)) != 0 ? "true"
@@ -136,7 +143,7 @@ void SchemaReader::generate_json_string() {
             }
             case JsonSerializer::Op::AddStringField: {
                 column = m_reordered_columns[column_id_index++];
-                auto const& name = m_global_schema_tree->get_node(column->get_id()).get_key_name();
+                auto name = m_global_schema_tree->get_node(column->get_id()).get_key_name();
                 m_json_serializer.append_key(name);
                 m_json_serializer.append_value_from_column_with_quotes(column, m_cur_message);
                 break;
@@ -215,9 +222,10 @@ bool SchemaReader::get_next_message(std::string& message, FilterClass* filter) {
     return false;
 }
 
-bool SchemaReader::get_next_message_with_timestamp(
+bool SchemaReader::get_next_message_with_metadata(
         std::string& message,
         epochtime_t& timestamp,
+        int64_t& log_event_idx,
         FilterClass* filter
 ) {
     // TODO: If we already get max_num_results messages, we can skip messages
@@ -241,6 +249,7 @@ bool SchemaReader::get_next_message_with_timestamp(
         }
 
         timestamp = m_get_timestamp();
+        log_event_idx = get_next_log_event_idx();
 
         m_cur_message++;
         return true;
@@ -561,7 +570,7 @@ void SchemaReader::initialize_serializer() {
     // TODO: this code will have to change once we allow mixing log lines parsed by different
     // parsers.
     if (false == m_local_schema_tree.get_nodes().empty()) {
-        generate_json_template(m_local_schema_tree.get_root_node_id());
+        generate_json_template(m_local_schema_tree.get_object_subtree_node_id());
     }
 }
 
@@ -572,7 +581,7 @@ void SchemaReader::generate_json_template(int32_t id) {
     for (int32_t child_id : children_ids) {
         int32_t child_global_id = m_local_id_to_global_id[child_id];
         auto const& child_node = m_local_schema_tree.get_node(child_id);
-        std::string const& key = child_node.get_key_name();
+        auto key = child_node.get_key_name();
         switch (child_node.get_type()) {
             case NodeType::Object: {
                 m_json_serializer.add_op(JsonSerializer::Op::BeginObject);
diff --git a/components/core/src/clp_s/SchemaReader.hpp b/components/core/src/clp_s/SchemaReader.hpp
index 08651cc39..75f9ff117 100644
--- a/components/core/src/clp_s/SchemaReader.hpp
+++ b/components/core/src/clp_s/SchemaReader.hpp
@@ -99,6 +99,7 @@ class SchemaReader {
         m_reordered_columns.clear();
         m_timestamp_column = nullptr;
         m_get_timestamp = []() -> epochtime_t { return 0; };
+        m_log_event_idx_column = nullptr;
         m_local_id_to_global_id.clear();
         m_global_id_to_local_id.clear();
         m_global_id_to_unordered_object.clear();
@@ -159,15 +160,17 @@ class SchemaReader {
     bool get_next_message(std::string& message, FilterClass* filter);
 
     /**
-     * Gets the next message matching a filter, and its timestamp
+     * Gets the next message matching a filter as well as its timestamp and log event index.
      * @param message
      * @param timestamp
+     * @param log_event_idx
      * @param filter
      * @return true if there is a next message
      */
-    bool get_next_message_with_timestamp(
+    bool get_next_message_with_metadata(
             std::string& message,
             epochtime_t& timestamp,
+            int64_t& log_event_idx,
             FilterClass* filter
     );
 
@@ -183,6 +186,13 @@ class SchemaReader {
      */
     void mark_column_as_timestamp(BaseColumnReader* column_reader);
 
+    /**
+     * Marks a column as the log_event_idx column.
+     */
+    void mark_column_as_log_event_idx(Int64ColumnReader* column_reader) {
+        m_log_event_idx_column = column_reader;
+    }
+
     int32_t get_schema_id() const { return m_schema_id; }
 
     /**
@@ -197,6 +207,12 @@ class SchemaReader {
      */
     epochtime_t get_next_timestamp() const { return m_get_timestamp(); }
 
+    /**
+     * @return the log_event_idx in the row pointed to by m_cur_message or 0 if there is no
+     * log_event_idx in this table.
+     */
+    int64_t get_next_log_event_idx() const;
+
     /**
      * @return true if all records in this table have been iterated over, false otherwise
      */
@@ -288,6 +304,7 @@ class SchemaReader {
 
     BaseColumnReader* m_timestamp_column;
     std::function<epochtime_t()> m_get_timestamp;
+    Int64ColumnReader* m_log_event_idx_column{nullptr};
 
     std::shared_ptr<SchemaTree> m_global_schema_tree;
     SchemaTree m_local_schema_tree;
diff --git a/components/core/src/clp_s/SchemaTree.cpp b/components/core/src/clp_s/SchemaTree.cpp
index e56168a2c..4408efc01 100644
--- a/components/core/src/clp_s/SchemaTree.cpp
+++ b/components/core/src/clp_s/SchemaTree.cpp
@@ -5,9 +5,8 @@
 #include "ZstdCompressor.hpp"
 
 namespace clp_s {
-int32_t SchemaTree::add_node(int32_t parent_node_id, NodeType type, std::string const& key) {
-    std::tuple<int32_t, std::string, NodeType> node_key = {parent_node_id, key, type};
-    auto node_it = m_node_map.find(node_key);
+int32_t SchemaTree::add_node(int32_t parent_node_id, NodeType type, std::string_view const key) {
+    auto node_it = m_node_map.find({parent_node_id, key, type});
     if (node_it != m_node_map.end()) {
         auto node_id = node_it->second;
         m_nodes[node_id].increase_count();
@@ -15,18 +14,42 @@ int32_t SchemaTree::add_node(int32_t parent_node_id, NodeType type, std::string
     }
 
     int32_t node_id = m_nodes.size();
-    auto& node = m_nodes.emplace_back(parent_node_id, node_id, key, type, 0);
+    auto& node = m_nodes.emplace_back(parent_node_id, node_id, std::string{key}, type, 0);
     node.increase_count();
-    if (parent_node_id >= 0) {
+    if (constants::cRootNodeId == parent_node_id) {
+        if (NodeType::Object == type) {
+            m_object_subtree_id = node_id;
+        } else if (NodeType::Metadata == type) {
+            m_metadata_subtree_id = node_id;
+        }
+    }
+
+    if (constants::cRootNodeId != parent_node_id) {
         auto& parent_node = m_nodes[parent_node_id];
         node.set_depth(parent_node.get_depth() + 1);
         parent_node.add_child(node_id);
     }
-    m_node_map[node_key] = node_id;
+    m_node_map.emplace(std::make_tuple(parent_node_id, node.get_key_name(), type), node_id);
 
     return node_id;
 }
 
+int32_t SchemaTree::get_metadata_field_id(std::string_view const field_name) {
+    if (m_metadata_subtree_id < 0) {
+        return -1;
+    }
+
+    auto& metadata_subtree_node = m_nodes[m_metadata_subtree_id];
+    for (auto child_id : metadata_subtree_node.get_children_ids()) {
+        auto& child_node = m_nodes[child_id];
+        if (child_node.get_key_name() == field_name) {
+            return child_id;
+        }
+    }
+
+    return -1;
+}
+
 size_t SchemaTree::store(std::string const& archives_dir, int compression_level) {
     FileWriter schema_tree_writer;
     ZstdCompressor schema_tree_compressor;
@@ -41,9 +64,9 @@ size_t SchemaTree::store(std::string const& archives_dir, int compression_level)
     for (auto const& node : m_nodes) {
         schema_tree_compressor.write_numeric_value(node.get_parent_id());
 
-        std::string const& key = node.get_key_name();
+        auto key = node.get_key_name();
         schema_tree_compressor.write_numeric_value(key.size());
-        schema_tree_compressor.write_string(key);
+        schema_tree_compressor.write(key.begin(), key.size());
         schema_tree_compressor.write_numeric_value(node.get_type());
     }
 
diff --git a/components/core/src/clp_s/SchemaTree.hpp b/components/core/src/clp_s/SchemaTree.hpp
index 05ed52935..438714fa2 100644
--- a/components/core/src/clp_s/SchemaTree.hpp
+++ b/components/core/src/clp_s/SchemaTree.hpp
@@ -5,11 +5,25 @@
 #include <memory>
 #include <stdexcept>
 #include <string>
+#include <string_view>
 #include <vector>
 
 #include <absl/container/flat_hash_map.h>
 
 namespace clp_s {
+/**
+ * This enum defines the valid MPT node types as well as the 8-bit number used to encode them.
+ *
+ * The number used to represent each node type can not change. That means that elements in this
+ * enum can never be reordered and that new node types always need to be added to the end of the
+ * enum (but before Unknown).
+ *
+ * Node types are used to help record the structure of a log record, with the exception of the
+ * "Metadata" node type. The "Metadata" type is a special type used by the implementation to
+ * demarcate data needed by the implementation that is not part of the log record. In particular,
+ * the implementation may create a special subtree of the MPT which contains fields used to record
+ * things like original log order.
+ */
 enum class NodeType : uint8_t {
     Integer,
     Float,
@@ -21,21 +35,37 @@ enum class NodeType : uint8_t {
     NullValue,
     DateString,
     StructuredArray,
+    Metadata,
     Unknown = std::underlying_type<NodeType>::type(~0ULL)
 };
 
+/**
+ * This class represents a single node in the SchemaTree.
+ *
+ * Note: the result of get_key_name is valid even if the original SchemaNode is later
+ * move-constructed.
+ */
 class SchemaNode {
 public:
     // Constructor
     SchemaNode() : m_parent_id(-1), m_id(-1), m_type(NodeType::Integer), m_count(0) {}
 
-    SchemaNode(int32_t parent_id, int32_t id, std::string key_name, NodeType type, int32_t depth)
+    SchemaNode(
+            int32_t parent_id,
+            int32_t id,
+            std::string_view const key_name,
+            NodeType type,
+            int32_t depth
+    )
             : m_parent_id(parent_id),
               m_id(id),
-              m_key_name(std::move(key_name)),
+              m_key_name_buf(std::make_unique<char[]>(key_name.size())),
+              m_key_name(m_key_name_buf.get(), key_name.size()),
               m_type(type),
               m_count(0),
-              m_depth(depth) {}
+              m_depth(depth) {
+        memcpy(m_key_name_buf.get(), key_name.begin(), key_name.size());
+    }
 
     /**
      * Getters
@@ -48,7 +78,7 @@ class SchemaNode {
 
     NodeType get_type() const { return m_type; }
 
-    std::string const& get_key_name() const { return m_key_name; }
+    std::string_view const get_key_name() const { return m_key_name; }
 
     int32_t get_count() const { return m_count; }
 
@@ -71,7 +101,10 @@ class SchemaNode {
     int32_t m_id;
     int32_t m_parent_id;
     std::vector<int32_t> m_children_ids;
-    std::string m_key_name;
+    // We use a buffer so that references to this key name are stable after this SchemaNode is move
+    // constructed
+    std::unique_ptr<char[]> m_key_name_buf;
+    std::string_view m_key_name;
     NodeType m_type;
     int32_t m_count;
     int32_t m_depth{0};
@@ -81,7 +114,7 @@ class SchemaTree {
 public:
     SchemaTree() = default;
 
-    int32_t add_node(int parent_node_id, NodeType type, std::string const& key);
+    int32_t add_node(int parent_node_id, NodeType type, std::string_view const key);
 
     bool has_node(int32_t id) { return id < m_nodes.size() && id >= 0; }
 
@@ -93,7 +126,25 @@ class SchemaTree {
         return m_nodes[id];
     }
 
-    int32_t get_root_node_id() const { return m_nodes[0].get_id(); }
+    /**
+     * @return the Id of the root of the Object sub-tree that records the structure of JSON data.
+     * @return -1 if the Object sub-tree does not exist.
+     */
+    int32_t get_object_subtree_node_id() const { return m_object_subtree_id; }
+
+    /**
+     * Get the field Id for a specified field within the Metadata subtree.
+     * @param field_name
+     *
+     * @return the field Id if the field exists within the Metadata sub-tree, -1 otherwise.
+     */
+    int32_t get_metadata_field_id(std::string_view const field_name);
+
+    /**
+     * @return the Id of the root of the Metadata sub-tree.
+     * @return -1 if the Metadata sub-tree does not exist.
+     */
+    int32_t get_metadata_subtree_node_id() { return m_metadata_subtree_id; }
 
     std::vector<SchemaNode> const& get_nodes() const { return m_nodes; }
 
@@ -129,7 +180,9 @@ class SchemaTree {
 
 private:
     std::vector<SchemaNode> m_nodes;
-    absl::flat_hash_map<std::tuple<int32_t, std::string, NodeType>, int32_t> m_node_map;
+    absl::flat_hash_map<std::tuple<int32_t, std::string_view const, NodeType>, int32_t> m_node_map;
+    int32_t m_object_subtree_id{-1};
+    int32_t m_metadata_subtree_id{-1};
 };
 }  // namespace clp_s
 
diff --git a/components/core/src/clp_s/archive_constants.hpp b/components/core/src/clp_s/archive_constants.hpp
index 30e2b78d5..604c97f66 100644
--- a/components/core/src/clp_s/archive_constants.hpp
+++ b/components/core/src/clp_s/archive_constants.hpp
@@ -1,6 +1,8 @@
 #ifndef CLP_S_ARCHIVE_CONSTANTS_HPP
 #define CLP_S_ARCHIVE_CONSTANTS_HPP
 
+#include <cstdint>
+
 namespace clp_s::constants {
 // Schema files
 constexpr char cArchiveSchemaMapFile[] = "/schema_ids";
@@ -16,6 +18,12 @@ constexpr char cArchiveLogDictFile[] = "/log.dict";
 constexpr char cArchiveTimestampDictFile[] = "/timestamp.dict";
 constexpr char cArchiveVarDictFile[] = "/var.dict";
 
+// Schema tree constants
+constexpr char cRootNodeName[] = "";
+constexpr int32_t cRootNodeId = -1;
+constexpr char cMetadataSubtreeName[] = "";
+constexpr char cLogEventIdxName[] = "log_event_idx";
+
 namespace results_cache::decompression {
 constexpr char cPath[]{"path"};
 constexpr char cOrigFileId[]{"orig_file_id"};
@@ -24,5 +32,13 @@ constexpr char cEndMsgIx[]{"end_msg_ix"};
 constexpr char cIsLastIrChunk[]{"is_last_ir_chunk"};
 }  // namespace results_cache::decompression
 
+namespace results_cache::search {
+constexpr char cOrigFilePath[]{"orig_file_path"};
+constexpr char cLogEventIx[]{"log_event_ix"};
+constexpr char cTimestamp[]{"timestamp"};
+constexpr char cMessage[]{"message"};
+constexpr char cArchiveId[]{"archive_id"};
+}  // namespace results_cache::search
+
 }  // namespace clp_s::constants
 #endif  // CLP_S_ARCHIVE_CONSTANTS_HPP
diff --git a/components/core/src/clp_s/clp-s.cpp b/components/core/src/clp_s/clp-s.cpp
index 8752384ae..941fd4366 100644
--- a/components/core/src/clp_s/clp-s.cpp
+++ b/components/core/src/clp_s/clp-s.cpp
@@ -96,6 +96,7 @@ bool compress(CommandLineArguments const& command_line_arguments) {
     option.timestamp_key = command_line_arguments.get_timestamp_key();
     option.print_archive_stats = command_line_arguments.print_archive_stats();
     option.structurize_arrays = command_line_arguments.get_structurize_arrays();
+    option.record_log_order = command_line_arguments.get_record_log_order();
 
     auto const& db_config_container = command_line_arguments.get_metadata_db_config();
     if (db_config_container.has_value()) {
diff --git a/components/core/src/clp_s/search/ColumnDescriptor.hpp b/components/core/src/clp_s/search/ColumnDescriptor.hpp
index 7341ff07a..ea1cfd7ec 100644
--- a/components/core/src/clp_s/search/ColumnDescriptor.hpp
+++ b/components/core/src/clp_s/search/ColumnDescriptor.hpp
@@ -4,6 +4,7 @@
 #include <memory>
 #include <set>
 #include <string>
+#include <string_view>
 #include <vector>
 
 #include "Literal.hpp"
@@ -22,7 +23,7 @@ class DescriptorToken {
      * wildcards
      * @param token the string to initialize the token from
      */
-    explicit DescriptorToken(std::string const& token)
+    explicit DescriptorToken(std::string_view const token)
             : m_token(token),
               m_wildcard(false),
               m_regex(false) {
diff --git a/components/core/src/clp_s/search/Output.cpp b/components/core/src/clp_s/search/Output.cpp
index 4d36b4e29..c9954779b 100644
--- a/components/core/src/clp_s/search/Output.cpp
+++ b/components/core/src/clp_s/search/Output.cpp
@@ -62,6 +62,7 @@ bool Output::filter() {
         }
     }
 
+    populate_internal_columns();
     populate_string_queries(top_level_expr);
 
     std::string message;
@@ -92,9 +93,10 @@ bool Output::filter() {
         reader.initialize_filter(this);
 
         if (m_output_handler->should_output_metadata()) {
-            epochtime_t timestamp;
-            while (reader.get_next_message_with_timestamp(message, timestamp, this)) {
-                m_output_handler->write(message, timestamp, archive_id);
+            epochtime_t timestamp{};
+            int64_t log_event_idx{};
+            while (reader.get_next_message_with_metadata(message, timestamp, log_event_idx, this)) {
+                m_output_handler->write(message, timestamp, archive_id, log_event_idx);
             }
         } else {
             while (reader.get_next_message(message, this)) {
@@ -136,6 +138,10 @@ void Output::init(
 
     for (auto column_reader : column_readers) {
         auto column_id = column_reader->get_id();
+        if (0 != m_metadata_columns.count(column_id)) {
+            continue;
+        }
+
         if ((0
              != (m_wildcard_type_mask
                  & node_to_literal_type(m_schema_tree->get_node(column_id).get_type())))
@@ -959,6 +965,19 @@ void Output::populate_string_queries(std::shared_ptr<Expression> const& expr) {
     }
 }
 
+void Output::populate_internal_columns() {
+    int32_t metadata_subtree_root_node_id = m_schema_tree->get_metadata_subtree_node_id();
+    if (-1 == metadata_subtree_root_node_id) {
+        return;
+    }
+
+    // This code assumes that the metadata subtree contains no nested structures
+    auto& metadata_node = m_schema_tree->get_node(metadata_subtree_root_node_id);
+    for (auto child_id : metadata_node.get_children_ids()) {
+        m_metadata_columns.insert(child_id);
+    }
+}
+
 void Output::populate_searched_wildcard_columns(std::shared_ptr<Expression> const& expr) {
     if (expr->has_only_expression_operands()) {
         for (auto const& op : expr->get_op_list()) {
@@ -975,6 +994,9 @@ void Output::populate_searched_wildcard_columns(std::shared_ptr<Expression> cons
             if (Schema::schema_entry_is_unordered_object(node)) {
                 continue;
             }
+            if (0 != m_metadata_columns.count(node)) {
+                continue;
+            }
             auto tree_node_type = m_schema_tree->get_node(node).get_type();
             if (col->matches_type(node_to_literal_type(tree_node_type))) {
                 auto literal_type = node_to_literal_type(tree_node_type);
diff --git a/components/core/src/clp_s/search/Output.hpp b/components/core/src/clp_s/search/Output.hpp
index ba4c4e1d7..bfd364457 100644
--- a/components/core/src/clp_s/search/Output.hpp
+++ b/components/core/src/clp_s/search/Output.hpp
@@ -5,6 +5,7 @@
 #include <set>
 #include <stack>
 #include <string>
+#include <unordered_map>
 #include <unordered_set>
 #include <utility>
 
@@ -88,6 +89,7 @@ class Output : public FilterClass {
     std::vector<ColumnDescriptor*> m_wildcard_columns;
     std::map<ColumnDescriptor*, std::set<int32_t>> m_wildcard_to_searched_basic_columns;
     LiteralTypeBitmask m_wildcard_type_mask{0};
+    std::unordered_set<int32_t> m_metadata_columns;
 
     std::stack<
             std::pair<ExpressionType, OpList::iterator>,
@@ -342,6 +344,11 @@ class Output : public FilterClass {
      */
     void populate_string_queries(std::shared_ptr<Expression> const& expr);
 
+    /**
+     * Populates the set of internal columns that get ignored during dynamic wildcard expansion.
+     */
+    void populate_internal_columns();
+
     /**
      * Constant propagates an expression
      * @param expr
diff --git a/components/core/src/clp_s/search/OutputHandler.cpp b/components/core/src/clp_s/search/OutputHandler.cpp
index 8d6a1eaa5..13771e9b5 100644
--- a/components/core/src/clp_s/search/OutputHandler.cpp
+++ b/components/core/src/clp_s/search/OutputHandler.cpp
@@ -10,6 +10,7 @@
 #include "../../reducer/CountOperator.hpp"
 #include "../../reducer/network_utils.hpp"
 #include "../../reducer/Record.hpp"
+#include "../archive_constants.hpp"
 
 using std::string;
 using std::string_view;
@@ -31,11 +32,12 @@ NetworkOutputHandler::NetworkOutputHandler(
 void NetworkOutputHandler::write(
         string_view message,
         epochtime_t timestamp,
-        string_view archive_id
+        string_view archive_id,
+        int64_t log_event_idx
 ) {
     static constexpr string_view cOrigFilePathPlaceholder{""};
-    msgpack::type::tuple<epochtime_t, string, string, string> const
-            src(timestamp, message, cOrigFilePathPlaceholder, archive_id);
+    msgpack::type::tuple<epochtime_t, string, string, string, int64_t> const
+            src(timestamp, message, cOrigFilePathPlaceholder, archive_id, log_event_idx);
     msgpack::sbuffer m;
     msgpack::pack(m, src);
 
@@ -72,10 +74,26 @@ ErrorCode ResultsCacheOutputHandler::flush() {
 
         try {
             m_results.emplace_back(std::move(bsoncxx::builder::basic::make_document(
-                    bsoncxx::builder::basic::kvp("original_path", std::move(result.original_path)),
-                    bsoncxx::builder::basic::kvp("message", std::move(result.message)),
-                    bsoncxx::builder::basic::kvp("timestamp", result.timestamp),
-                    bsoncxx::builder::basic::kvp("archive_id", std::move(result.archive_id))
+                    bsoncxx::builder::basic::kvp(
+                            constants::results_cache::search::cOrigFilePath,
+                            std::move(result.original_path)
+                    ),
+                    bsoncxx::builder::basic::kvp(
+                            constants::results_cache::search::cMessage,
+                            std::move(result.message)
+                    ),
+                    bsoncxx::builder::basic::kvp(
+                            constants::results_cache::search::cTimestamp,
+                            result.timestamp
+                    ),
+                    bsoncxx::builder::basic::kvp(
+                            constants::results_cache::search::cArchiveId,
+                            std::move(result.archive_id)
+                    ),
+                    bsoncxx::builder::basic::kvp(
+                            constants::results_cache::search::cLogEventIx,
+                            result.log_event_idx
+                    )
             )));
             count++;
 
@@ -103,17 +121,26 @@ ErrorCode ResultsCacheOutputHandler::flush() {
 void ResultsCacheOutputHandler::write(
         string_view message,
         epochtime_t timestamp,
-        string_view archive_id
+        string_view archive_id,
+        int64_t log_event_idx
 ) {
     if (m_latest_results.size() < m_max_num_results) {
-        m_latest_results.emplace(
-                std::make_unique<QueryResult>(string_view{}, message, timestamp, archive_id)
-        );
+        m_latest_results.emplace(std::make_unique<QueryResult>(
+                string_view{},
+                message,
+                timestamp,
+                archive_id,
+                log_event_idx
+        ));
     } else if (m_latest_results.top()->timestamp < timestamp) {
         m_latest_results.pop();
-        m_latest_results.emplace(
-                std::make_unique<QueryResult>(string_view{}, message, timestamp, archive_id)
-        );
+        m_latest_results.emplace(std::make_unique<QueryResult>(
+                string_view{},
+                message,
+                timestamp,
+                archive_id,
+                log_event_idx
+        ));
     }
 }
 
diff --git a/components/core/src/clp_s/search/OutputHandler.hpp b/components/core/src/clp_s/search/OutputHandler.hpp
index ca12e32d3..f033d37e8 100644
--- a/components/core/src/clp_s/search/OutputHandler.hpp
+++ b/components/core/src/clp_s/search/OutputHandler.hpp
@@ -43,9 +43,14 @@ class OutputHandler {
      * @param message The message in the log event.
      * @param timestamp The timestamp of the log event.
      * @param archive_id The archive containing the log event.
+     * @param log_event_idx The index of the log event within an archive.
      */
-    virtual void write(std::string_view message, epochtime_t timestamp, std::string_view archive_id)
-            = 0;
+    virtual void write(
+            std::string_view message,
+            epochtime_t timestamp,
+            std::string_view archive_id,
+            int64_t log_event_idx
+    ) = 0;
 
     /**
      * Writes a message to the output handler.
@@ -84,9 +89,13 @@ class StandardOutputHandler : public OutputHandler {
             : OutputHandler(should_output_metadata, true) {}
 
     // Methods inherited from OutputHandler
-    void
-    write(std::string_view message, epochtime_t timestamp, std::string_view archive_id) override {
-        std::cout << archive_id << ": " << timestamp << " " << message;
+    void write(
+            std::string_view message,
+            epochtime_t timestamp,
+            std::string_view archive_id,
+            int64_t log_event_idx
+    ) override {
+        std::cout << archive_id << ": " << log_event_idx << ": " << timestamp << " " << message;
     }
 
     void write(std::string_view message) override { std::cout << message; }
@@ -120,10 +129,14 @@ class NetworkOutputHandler : public OutputHandler {
     }
 
     // Methods inherited from OutputHandler
-    void
-    write(std::string_view message, epochtime_t timestamp, std::string_view archive_id) override;
+    void write(
+            std::string_view message,
+            epochtime_t timestamp,
+            std::string_view archive_id,
+            int64_t log_event_idx
+    ) override;
 
-    void write(std::string_view message) override { write(message, 0, {}); }
+    void write(std::string_view message) override { write(message, 0, {}, 0); }
 
 private:
     std::string m_host;
@@ -143,17 +156,20 @@ class ResultsCacheOutputHandler : public OutputHandler {
                 std::string_view original_path,
                 std::string_view message,
                 epochtime_t timestamp,
-                std::string_view archive_id
+                std::string_view archive_id,
+                int64_t log_event_idx
         )
                 : original_path(original_path),
                   message(message),
                   timestamp(timestamp),
-                  archive_id(archive_id) {}
+                  archive_id(archive_id),
+                  log_event_idx(log_event_idx) {}
 
         std::string original_path;
         std::string message;
         epochtime_t timestamp;
         std::string archive_id;
+        int64_t log_event_idx;
     };
 
     struct QueryResultGreaterTimestampComparator {
@@ -189,10 +205,14 @@ class ResultsCacheOutputHandler : public OutputHandler {
      */
     ErrorCode flush() override;
 
-    void
-    write(std::string_view message, epochtime_t timestamp, std::string_view archive_id) override;
+    void write(
+            std::string_view message,
+            epochtime_t timestamp,
+            std::string_view archive_id,
+            int64_t log_event_idx
+    ) override;
 
-    void write(std::string_view message) override { write(message, 0, {}); }
+    void write(std::string_view message) override { write(message, 0, {}, 0); }
 
 private:
     mongocxx::client m_client;
@@ -216,8 +236,12 @@ class CountOutputHandler : public OutputHandler {
     CountOutputHandler(int reducer_socket_fd);
 
     // Methods inherited from OutputHandler
-    void
-    write(std::string_view message, epochtime_t timestamp, std::string_view archive_id) override {}
+    void write(
+            std::string_view message,
+            epochtime_t timestamp,
+            std::string_view archive_id,
+            int64_t log_event_idx
+    ) override {}
 
     void write(std::string_view message) override;
 
@@ -246,8 +270,12 @@ class CountByTimeOutputHandler : public OutputHandler {
               m_count_by_time_bucket_size{count_by_time_bucket_size} {}
 
     // Methods inherited from OutputHandler
-    void
-    write(std::string_view message, epochtime_t timestamp, std::string_view archive_id) override {
+    void write(
+            std::string_view message,
+            epochtime_t timestamp,
+            std::string_view archive_id,
+            int64_t log_event_idx
+    ) override {
         int64_t bucket = (timestamp / m_count_by_time_bucket_size) * m_count_by_time_bucket_size;
         m_bucket_counts[bucket] += 1;
     }
diff --git a/components/core/src/clp_s/search/Projection.cpp b/components/core/src/clp_s/search/Projection.cpp
index 69836e312..b1c453776 100644
--- a/components/core/src/clp_s/search/Projection.cpp
+++ b/components/core/src/clp_s/search/Projection.cpp
@@ -52,7 +52,7 @@ void Projection::resolve_column(
      * what we need.
      */
 
-    auto cur_node_id = tree->get_root_node_id();
+    auto cur_node_id = tree->get_object_subtree_node_id();
     auto it = column->descriptor_begin();
     while (it != column->descriptor_end()) {
         bool matched_any{false};
diff --git a/components/core/src/clp_s/search/SchemaMatch.cpp b/components/core/src/clp_s/search/SchemaMatch.cpp
index 82c9af866..203634000 100644
--- a/components/core/src/clp_s/search/SchemaMatch.cpp
+++ b/components/core/src/clp_s/search/SchemaMatch.cpp
@@ -76,7 +76,7 @@ std::shared_ptr<Expression> SchemaMatch::populate_column_mapping(std::shared_ptr
                     auto const* node = &m_tree->get_node(node_id);
                     auto literal_type = node_to_literal_type(node->get_type());
                     DescriptorList descriptors;
-                    while (node->get_id() != m_tree->get_root_node_id()) {
+                    while (node->get_id() != m_tree->get_object_subtree_node_id()) {
                         // may have to explicitly mark non-regex
                         descriptors.emplace_back(node->get_key_name());
                         node = &m_tree->get_node(node->get_parent_id());
@@ -111,9 +111,9 @@ bool SchemaMatch::populate_column_mapping(ColumnDescriptor* column) {
         return matched;
     }
 
-    // TODO: once we start supporting multi-rooted MPTs this (and anything that uses
-    // get_root_node_id, or assumes root node id is 0) will have to change
-    auto const& root = m_tree->get_node(m_tree->get_root_node_id());
+    // TODO: Once we start supporting mixing different types of logs we will have to match against
+    // more than just the object subtree.
+    auto const& root = m_tree->get_node(m_tree->get_object_subtree_node_id());
     for (int32_t child_node_id : root.get_children_ids()) {
         matched |= populate_column_mapping(column, child_node_id);
     }
diff --git a/components/core/src/clp_s/search/SearchUtils.cpp b/components/core/src/clp_s/search/SearchUtils.cpp
index 3f7c522cb..39ae694a0 100644
--- a/components/core/src/clp_s/search/SearchUtils.cpp
+++ b/components/core/src/clp_s/search/SearchUtils.cpp
@@ -34,6 +34,7 @@ LiteralType node_to_literal_type(NodeType type) {
             return LiteralType::NullT;
         case NodeType::DateString:
             return LiteralType::EpochDateT;
+        case NodeType::Metadata:
         case NodeType::Unknown:
         default:
             return LiteralType::UnknownT;