From ab618851c24c6e82333168de09419f103f0fd8ee Mon Sep 17 00:00:00 2001
From: Sam Ansmink <samansmink@hotmail.com>
Date: Mon, 10 Jun 2024 16:01:21 +0200
Subject: [PATCH 01/38] bump delta kernel

---
 CMakeLists.txt                 | 2 +-
 src/delta_utils.cpp            | 4 ++--
 test/sql/dat/basic_append.test | 7 -------
 3 files changed, 3 insertions(+), 10 deletions(-)
diff --git a/CMakeLists.txt b/CMakeLists.txt
index f8d12ec..a37370c 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -60,7 +60,7 @@ endif()
 ExternalProject_Add(
         ${KERNEL_NAME}
         GIT_REPOSITORY "https://github.com/delta-incubator/delta-kernel-rs"
-        GIT_TAG 08f0764a00e89f42136fd478823d28278adc7ee8
+        GIT_TAG 823367e4dc13b627914412ee2ca7933a1c7b822a
         CONFIGURE_COMMAND ""
         UPDATE_COMMAND ""
         BUILD_IN_SOURCE 1
diff --git a/src/delta_utils.cpp b/src/delta_utils.cpp
index b02e898..7f1d3a7 100644
--- a/src/delta_utils.cpp
+++ b/src/delta_utils.cpp
@@ -226,7 +226,7 @@ static unordered_map<string, TableFilter*> PrunePredicates(unordered_map<string,
 }
 
 uintptr_t PredicateVisitor::VisitPredicate(PredicateVisitor* predicate, ffi::KernelExpressionVisitorState* state) {
-    auto filters = PrunePredicates(predicate->column_filters);
+    auto filters = predicate->column_filters;
 
     auto it = filters.begin();
     auto end = filters.end();
@@ -312,7 +312,7 @@ uintptr_t PredicateVisitor::VisitFilter(const string &col_name, const TableFilte
         case TableFilterType::CONJUNCTION_AND:
             return VisitAndFilter(col_name, static_cast<const ConjunctionAndFilter&>(filter), state);
         default:
-            throw NotImplementedException("Attempted to push down unimplemented filter type: '%s'", EnumUtil::ToString(filter.filter_type));
+            return ~0;
     }
 }
 
diff --git a/test/sql/dat/basic_append.test b/test/sql/dat/basic_append.test
index 4ff31bc..87930b8 100644
--- a/test/sql/dat/basic_append.test
+++ b/test/sql/dat/basic_append.test
@@ -56,9 +56,6 @@ FROM delta_scan('${DAT_PATH}/out/reader_tests/generated/basic_append/delta')
 2
 3
 
-# TODO: Figure out what's wrong here
-mode skip
-
 # Now we add a filter that filters out one of the files
 query II
 SELECT letter, number
@@ -67,8 +64,6 @@ WHERE number < 2
 ----
 a	1
 
-mode unskip
-
 # Now we add a filter that filters out the other file
 query III
 SELECT a_float, letter, number, 
@@ -77,8 +72,6 @@ WHERE number > 4
 ----
 5.5	e	5
 
-mode skip
-
 # Now we add a filter that filters out all columns
 query III
 SELECT a_float, number, letter

From a6f85ef00572de92836b7b9ece90da8affaf25b1 Mon Sep 17 00:00:00 2001
From: Sam Ansmink <samansmink@hotmail.com>
Date: Mon, 10 Jun 2024 16:02:23 +0200
Subject: [PATCH 02/38] set correct duckdb version for submodule

---
 duckdb | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/duckdb b/duckdb
index 7b8efd3..1f98600 160000
--- a/duckdb
+++ b/duckdb
@@ -1 +1 @@
-Subproject commit 7b8efd3d0fab38ec9dae467861a317af3f1d7f3e
+Subproject commit 1f98600c2cf8722a6d2f2d805bb4af5e701319fc

From ef1dd70dee920175a559f69fc3ffa388b778c9ea Mon Sep 17 00:00:00 2001
From: Sam Ansmink <samansmink@hotmail.com>
Date: Thu, 13 Jun 2024 11:33:00 +0200
Subject: [PATCH 03/38] bump delta to c901665b98b

---
 CMakeLists.txt                |  2 +-
 Makefile                      |  5 -----
 extension-ci-tools            |  2 +-
 scripts/generate_test_data.py |  8 +++++++-
 src/delta_utils.cpp           | 36 ++++++++++++++++++++++++++++++++---
 src/functions/delta_scan.cpp  |  2 ++
 src/include/delta_utils.hpp   |  5 +++++
 7 files changed, 49 insertions(+), 11 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index a37370c..28ea1d2 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -60,7 +60,7 @@ endif()
 ExternalProject_Add(
         ${KERNEL_NAME}
         GIT_REPOSITORY "https://github.com/delta-incubator/delta-kernel-rs"
-        GIT_TAG 823367e4dc13b627914412ee2ca7933a1c7b822a
+        GIT_TAG c901665b98b2fed5ff1c713a9666eba9d16ea281
         CONFIGURE_COMMAND ""
         UPDATE_COMMAND ""
         BUILD_IN_SOURCE 1
diff --git a/Makefile b/Makefile
index 05db957..78144e6 100644
--- a/Makefile
+++ b/Makefile
@@ -14,11 +14,6 @@ test_debug: export DAT_PATH=./build/debug/rust/src/delta_kernel/acceptance/tests
 # Include the Makefile from extension-ci-tools
 include extension-ci-tools/makefiles/duckdb_extension.Makefile
 
-reldebug:
-	mkdir -p build/reldebug && \
-	cmake $(GENERATOR) $(BUILD_FLAGS) $(EXT_RELEASE_FLAGS) -DCMAKE_BUILD_TYPE=RelWithDebInfo -S ./duckdb/ -B build/reldebug && \
-	cmake --build build/reldebug --config RelWithDebInfo
-
 # Generate some test data to test with
 generate-data:
 	python3 -m pip install delta-spark duckdb pandas deltalake pyspark delta
diff --git a/extension-ci-tools b/extension-ci-tools
index 71b8a60..c0cc931 160000
--- a/extension-ci-tools
+++ b/extension-ci-tools
@@ -1 +1 @@
-Subproject commit 71b8a603ea24b1ac8a2cff134aca28163576548f
+Subproject commit c0cc9319492bfa38344c2f28bd35f2304c74cdde
diff --git a/scripts/generate_test_data.py b/scripts/generate_test_data.py
index 715e882..cb1d2f7 100644
--- a/scripts/generate_test_data.py
+++ b/scripts/generate_test_data.py
@@ -136,8 +136,14 @@ def generate_test_data_pyspark(name, current_path, input_path, delete_predicate
 for table in ["customer","lineitem","nation","orders","part","partsupp","region","supplier"]:
     generate_test_data_pyspark(f"tpch_sf0_01_{table}", f'tpch_sf0_01/{table}', f'{TMP_PATH}/tpch_sf0_01_export/{table}.parquet')
 
+## TPCH SF1 full dataset
+con = duckdb.connect()
+con.query(f"call dbgen(sf=1); EXPORT DATABASE '{TMP_PATH}/tpch_sf1_export' (FORMAT parquet)")
+for table in ["customer","lineitem","nation","orders","part","partsupp","region","supplier"]:
+    generate_test_data_pyspark(f"tpch_sf1_{table}", f'tpch_sf1/{table}', f'{TMP_PATH}/tpch_sf1_export/{table}.parquet')
+
 ## TPCDS SF0.01 full dataset
 con = duckdb.connect()
 con.query(f"call dsdgen(sf=0.01); EXPORT DATABASE '{TMP_PATH}/tpcds_sf0_01_export' (FORMAT parquet)")
 for table in ["call_center","catalog_page","catalog_returns","catalog_sales","customer","customer_demographics","customer_address","date_dim","household_demographics","inventory","income_band","item","promotion","reason","ship_mode","store","store_returns","store_sales","time_dim","warehouse","web_page","web_returns","web_sales","web_site"]:
-    generate_test_data_pyspark(f"tpcds_sf0_01_{table}", f'tpcds_sf0_01/{table}', f'{TMP_PATH}/tpcds_sf0_01_export/{table}.parquet')
+    generate_test_data_pyspark(f"tpcds_sf0_01_{table}", f'tpcds_sf0_01/{table}', f'{TMP_PATH}/tpcds_sf0_01_export/{table}.parquet')
\ No newline at end of file
diff --git a/src/delta_utils.cpp b/src/delta_utils.cpp
index 7f1d3a7..a805d15 100644
--- a/src/delta_utils.cpp
+++ b/src/delta_utils.cpp
@@ -3,6 +3,7 @@
 #include "duckdb.hpp"
 #include "duckdb/main/extension_util.hpp"
 #include <duckdb/parser/parsed_data/create_scalar_function_info.hpp>
+#include <duckdb/planner/filter/null_filter.hpp>
 
 namespace duckdb {
 
@@ -257,8 +258,24 @@ uintptr_t PredicateVisitor::VisitConstantFilter(const string &col_name, const Co
         case LogicalType::BIGINT:
             right = visit_expression_literal_long(state, BigIntValue::Get(value));
             break;
-
-
+        // case LogicalType::INTEGER:
+        //     right = visit_expression_literal_int(state, IntegerValue::Get(value));
+        //     break;
+        // case LogicalType::SMALLINT:
+        //     right = visit_expression_literal_short(state, SmallIntValue::Get(value));
+        //     break;
+        // case LogicalType::TINYINT:
+        //     right = visit_expression_literal_byte(state, TinyIntValue::Get(value));
+        //     break;
+        // case LogicalType::FLOAT:
+        //     right = visit_expression_literal_float(state, FloatValue::Get(value));
+        //     break;
+        // case LogicalType::DOUBLE:
+        //     right = visit_expression_literal_double(state, DoubleValue::Get(value));
+        //     break;
+        //  case LogicalType::BOOLEAN:
+        //     right = visit_expression_literal_bool(state, BooleanValue::Get(value));
+        //     break;
         case LogicalType::VARCHAR: {
             // WARNING: C++ lifetime extension rules don't protect calls of the form foo(std::string(...).c_str())
             auto str = StringValue::Get(value);
@@ -266,7 +283,6 @@ uintptr_t PredicateVisitor::VisitConstantFilter(const string &col_name, const Co
             right = KernelUtils::UnpackResult(maybe_right, "VisitConstantFilter failed to visit_expression_literal_string");
             break;
         }
-
         default:
             break; // unsupported type
     }
@@ -305,12 +321,26 @@ uintptr_t PredicateVisitor::VisitAndFilter(const string &col_name, const Conjunc
     return visit_expression_and(state, &eit);
 }
 
+uintptr_t PredicateVisitor::VisitIsNull(const string &col_name, ffi::KernelExpressionVisitorState *state) {
+    auto maybe_left = ffi::visit_expression_column(state, KernelUtils::ToDeltaString(col_name), DuckDBEngineError::AllocateError);
+    uintptr_t left = KernelUtils::UnpackResult(maybe_left, "VisitIsNull failed to visit_expression_column");
+    return ffi::visit_expression_is_null(state, left);
+}
+
+uintptr_t PredicateVisitor::VisitIsNotNull(const string &col_name, ffi::KernelExpressionVisitorState *state) {
+    return ffi::visit_expression_not(state, VisitIsNull(col_name, state));
+}
+
 uintptr_t PredicateVisitor::VisitFilter(const string &col_name, const TableFilter &filter, ffi::KernelExpressionVisitorState* state) {
     switch (filter.filter_type) {
         case TableFilterType::CONSTANT_COMPARISON:
             return VisitConstantFilter(col_name, static_cast<const ConstantFilter&>(filter), state);
         case TableFilterType::CONJUNCTION_AND:
             return VisitAndFilter(col_name, static_cast<const ConjunctionAndFilter&>(filter), state);
+        // case TableFilterType::IS_NULL:
+        //     return VisitIsNull(col_name, state);
+        // case TableFilterType::IS_NOT_NULL:
+        //     return VisitIsNotNull(col_name, state);
         default:
             return ~0;
     }
diff --git a/src/functions/delta_scan.cpp b/src/functions/delta_scan.cpp
index dd2a027..d4320e5 100644
--- a/src/functions/delta_scan.cpp
+++ b/src/functions/delta_scan.cpp
@@ -31,6 +31,8 @@ static void visit_callback(ffi::NullableCvoid engine_context, struct ffi::Kernel
     StringUtil::RTrim(path_string, "/");
     path_string += "/" + KernelUtils::FromDeltaString(path);
 
+    printf("Got File %s\n", path_string.c_str());
+
     // First we append the file to our resolved files
     context->resolved_files.push_back(DeltaSnapshot::ToDuckDBPath(path_string));
     context->metadata.emplace_back(make_uniq<DeltaFileMetaData>());
diff --git a/src/include/delta_utils.hpp b/src/include/delta_utils.hpp
index bcb5f74..37dc289 100644
--- a/src/include/delta_utils.hpp
+++ b/src/include/delta_utils.hpp
@@ -5,6 +5,7 @@
 #include "duckdb/planner/filter/conjunction_filter.hpp"
 #include "duckdb/common/enum_util.hpp"
 #include <iostream>
+#include <duckdb/planner/filter/null_filter.hpp>
 
 // TODO: clean up this file as we go
 
@@ -140,6 +141,10 @@ class PredicateVisitor : public ffi::EnginePredicate {
 
     uintptr_t VisitConstantFilter(const string &col_name, const ConstantFilter &filter, ffi::KernelExpressionVisitorState* state);
     uintptr_t VisitAndFilter(const string &col_name, const ConjunctionAndFilter &filter, ffi::KernelExpressionVisitorState* state);
+
+    uintptr_t VisitIsNull(const string &col_name, ffi::KernelExpressionVisitorState* state);
+    uintptr_t VisitIsNotNull(const string &col_name, ffi::KernelExpressionVisitorState* state);
+
     uintptr_t VisitFilter(const string &col_name, const TableFilter &filter, ffi::KernelExpressionVisitorState* state);
 };
 

From d6ec36945701e293b480d901255673ae451e1f89 Mon Sep 17 00:00:00 2001
From: Norman Foerster <norman.foerster@fmc-data-solutions.com>
Date: Thu, 13 Jun 2024 20:28:23 +0200
Subject: [PATCH 04/38] azure support

---
 src/functions/delta_scan.cpp | 119 +++++++++++++++++++++++++++--------
 1 file changed, 94 insertions(+), 25 deletions(-)

diff --git a/src/functions/delta_scan.cpp b/src/functions/delta_scan.cpp
index dd2a027..41c38cf 100644
--- a/src/functions/delta_scan.cpp
+++ b/src/functions/delta_scan.cpp
@@ -69,27 +69,43 @@ static ffi::EngineBuilder* CreateBuilder(ClientContext &context, const string &p
     ffi::EngineBuilder* builder;
 
     // For "regular" paths we early out with the default builder config
-    if (!StringUtil::StartsWith(path, "s3://")) {
+    if (!StringUtil::StartsWith(path, "s3://") && !StringUtil::StartsWith(path, "azure://")) {
         auto interface_builder_res = ffi::get_engine_builder(KernelUtils::ToDeltaString(path), DuckDBEngineError::AllocateError);
         return KernelUtils::UnpackResult(interface_builder_res, "get_engine_interface_builder for path " + path);
     }
 
-    auto end_of_container = path.find('/',5);
+    string bucket;
+    string path_in_bucket;
+    string secret_type;
 
-    if(end_of_container == string::npos) {
-        throw IOException("Invalid s3 url passed to delta scan: %s", path);
+    if (StringUtil::StartsWith(path, "s3://")) {
+        auto end_of_container = path.find('/',5);
+
+        if(end_of_container == string::npos) {
+            throw IOException("Invalid s3 url passed to delta scan: %s", path);
+        }
+        bucket = path.substr(5, end_of_container-5);
+        path_in_bucket = path.substr(end_of_container);
+        secret_type = "s3";
+    } else if (StringUtil::StartsWith(path, "azure://")) {
+        auto end_of_container = path.find('/',8);
+
+        if(end_of_container == string::npos) {
+            throw IOException("Invalid azure url passed to delta scan: %s", path);
+        }
+        bucket = path.substr(8, end_of_container-8);
+        path_in_bucket = path.substr(end_of_container);
+        secret_type = "azure";
     }
-    auto bucket = path.substr(5, end_of_container-5);
-    auto path_in_bucket = path.substr(end_of_container);
 
     auto interface_builder_res = ffi::get_engine_builder(KernelUtils::ToDeltaString(path), DuckDBEngineError::AllocateError);
     builder = KernelUtils::UnpackResult(interface_builder_res, "get_engine_interface_builder for path " + path);
 
-    // For S3 paths we need to trim the url, set the container, and fetch a potential secret
+    // For S3 or Azure paths we need to trim the url, set the container, and fetch a potential secret
     auto &secret_manager = SecretManager::Get(context);
     auto transaction = CatalogTransaction::GetSystemCatalogTransaction(context);
 
-    auto secret_match = secret_manager.LookupSecret(transaction, path, "s3");
+    auto secret_match = secret_manager.LookupSecret(transaction, path, secret_type);
 
     // No secret: nothing left to do here!
     if (!secret_match.HasMatch()) {
@@ -97,26 +113,79 @@ static ffi::EngineBuilder* CreateBuilder(ClientContext &context, const string &p
     }
     const auto &kv_secret = dynamic_cast<const KeyValueSecret &>(*secret_match.secret_entry->secret);
 
-    auto key_id = kv_secret.TryGetValue("key_id").ToString();
-    auto secret = kv_secret.TryGetValue("secret").ToString();
-    auto session_token = kv_secret.TryGetValue("session_token").ToString();
-    auto region = kv_secret.TryGetValue("region").ToString();
 
-    if (key_id.empty() && secret.empty()) {
-        ffi::set_builder_option(builder, KernelUtils::ToDeltaString("skip_signature"), KernelUtils::ToDeltaString("true"));
-    }
+    // Here you would need to add the logic for setting the builder options for Azure
+    // This is just a placeholder and will need to be replaced with the actual logic
+    if (secret_type == "s3") {
+        auto key_id = kv_secret.TryGetValue("key_id").ToString();
+        auto secret = kv_secret.TryGetValue("secret").ToString();
+        auto session_token = kv_secret.TryGetValue("session_token").ToString();
+        auto region = kv_secret.TryGetValue("region").ToString();
 
-    if (!key_id.empty()) {
-        ffi::set_builder_option(builder, KernelUtils::ToDeltaString("aws_access_key_id"), KernelUtils::ToDeltaString(key_id));
-    }
-    if (!secret.empty()) {
-        ffi::set_builder_option(builder, KernelUtils::ToDeltaString("aws_secret_access_key"), KernelUtils::ToDeltaString(secret));
-    }
-    if (!session_token.empty()) {
-        ffi::set_builder_option(builder, KernelUtils::ToDeltaString("aws_session_token"), KernelUtils::ToDeltaString(session_token));
-    }
-    ffi::set_builder_option(builder, KernelUtils::ToDeltaString("aws_region"), KernelUtils::ToDeltaString(region));
+        if (key_id.empty() && secret.empty()) {
+            ffi::set_builder_option(builder, KernelUtils::ToDeltaString("skip_signature"), KernelUtils::ToDeltaString("true"));
+        }
+
+        if (!key_id.empty()) {
+            ffi::set_builder_option(builder, KernelUtils::ToDeltaString("aws_access_key_id"), KernelUtils::ToDeltaString(key_id));
+        }
+        if (!secret.empty()) {
+            ffi::set_builder_option(builder, KernelUtils::ToDeltaString("aws_secret_access_key"), KernelUtils::ToDeltaString(secret));
+        }
+        if (!session_token.empty()) {
+            ffi::set_builder_option(builder, KernelUtils::ToDeltaString("aws_session_token"), KernelUtils::ToDeltaString(session_token));
+        }
+        ffi::set_builder_option(builder, KernelUtils::ToDeltaString("aws_region"), KernelUtils::ToDeltaString(region));
+
+    } else if (secret_type == "azure") {
+        
+        auto connection_string = kv_secret.TryGetValue("connection_string").ToString();
+        auto account_name = kv_secret.TryGetValue("account_name").ToString();
+        auto account_key = kv_secret.TryGetValue("account_key").ToString();
+        auto client_id = kv_secret.TryGetValue("client_id").ToString();
+        auto client_secret = kv_secret.TryGetValue("client_secret").ToString();
+        auto tenant_id = kv_secret.TryGetValue("tenant_id").ToString();
+        auto azure_client_certificate_path = kv_secret.TryGetValue("certificate_path").ToString();
+        auto sas_token = kv_secret.TryGetValue("sas_token").ToString();
+        auto http_proxy = kv_secret.TryGetValue("http_proxy").ToString();
+        auto proxy_user_name = kv_secret.TryGetValue("proxy_user_name").ToString();
+        auto proxy_password = kv_secret.TryGetValue("proxy_password").ToString();
+
+        if (!connection_string.empty()) {
+            ffi::set_builder_option(builder, KernelUtils::ToDeltaString("azure_storage_connection_string"), KernelUtils::ToDeltaString(connection_string));
+        }
+        if (!account_name.empty()) {
+            ffi::set_builder_option(builder, KernelUtils::ToDeltaString("azure_storage_account_name"), KernelUtils::ToDeltaString(account_name));
+        }
+        if (!account_key.empty()) {
+            ffi::set_builder_option(builder, KernelUtils::ToDeltaString("azure_storage_account_key"), KernelUtils::ToDeltaString(account_key));
+        }
+        if (!client_id.empty()) {
+            ffi::set_builder_option(builder, KernelUtils::ToDeltaString("azure_client_id"), KernelUtils::ToDeltaString(client_id));
+        }
+        if (!client_secret.empty()) {
+            ffi::set_builder_option(builder, KernelUtils::ToDeltaString("azure_client_secret"), KernelUtils::ToDeltaString(client_secret));
+        }
+        if (!tenant_id.empty()) {
+            ffi::set_builder_option(builder, KernelUtils::ToDeltaString("azure_tenant_id"), KernelUtils::ToDeltaString(tenant_id));
+        }
+        if (!azure_client_certificate_path.empty()) {
+            ffi::set_builder_option(builder, KernelUtils::ToDeltaString("azure_client_certificate_path"), KernelUtils::ToDeltaString(azure_client_certificate_path));
+        }
+        if (!sas_token.empty()) {
+            ffi::set_builder_option(builder, KernelUtils::ToDeltaString("azure_sas_token"), KernelUtils::ToDeltaString(sas_token));
+        }
+        if (!http_proxy.empty()) {
+            ffi::set_builder_option(builder, KernelUtils::ToDeltaString("http_proxy"), KernelUtils::ToDeltaString(http_proxy));
+        }
+        if (!proxy_user_name.empty()) {
+            ffi::set_builder_option(builder, KernelUtils::ToDeltaString("proxy_user_name"), KernelUtils::ToDeltaString(proxy_user_name));
+        }
+        if (!proxy_password.empty()) {
+            ffi::set_builder_option(builder, KernelUtils::ToDeltaString("proxy_password"), KernelUtils::ToDeltaString(proxy_password));
+        }
 
+    }
     return builder;
 }
 

From a2ddb6c7d65cbae8e7466a4d35c315b2c34b8799 Mon Sep 17 00:00:00 2001
From: Norman Foerster <norman.foerster@fmc-data-solutions.com>
Date: Thu, 13 Jun 2024 22:02:59 +0200
Subject: [PATCH 05/38] azure test impl

---
 src/functions/delta_scan.cpp | 20 +++++++++++++++++++-
 1 file changed, 19 insertions(+), 1 deletion(-)

diff --git a/src/functions/delta_scan.cpp b/src/functions/delta_scan.cpp
index 41c38cf..1b7d894 100644
--- a/src/functions/delta_scan.cpp
+++ b/src/functions/delta_scan.cpp
@@ -69,7 +69,7 @@ static ffi::EngineBuilder* CreateBuilder(ClientContext &context, const string &p
     ffi::EngineBuilder* builder;
 
     // For "regular" paths we early out with the default builder config
-    if (!StringUtil::StartsWith(path, "s3://") && !StringUtil::StartsWith(path, "azure://")) {
+    if (!StringUtil::StartsWith(path, "s3://") && !StringUtil::StartsWith(path, "azure://") && !StringUtil::StartsWith(path, "az://") && !StringUtil::StartsWith(path, "abfss://")) {
         auto interface_builder_res = ffi::get_engine_builder(KernelUtils::ToDeltaString(path), DuckDBEngineError::AllocateError);
         return KernelUtils::UnpackResult(interface_builder_res, "get_engine_interface_builder for path " + path);
     }
@@ -90,6 +90,24 @@ static ffi::EngineBuilder* CreateBuilder(ClientContext &context, const string &p
     } else if (StringUtil::StartsWith(path, "azure://")) {
         auto end_of_container = path.find('/',8);
 
+        if(end_of_container == string::npos) {
+            throw IOException("Invalid azure url passed to delta scan: %s", path);
+        }
+        bucket = path.substr(8, end_of_container-8);
+        path_in_bucket = path.substr(end_of_container);
+        secret_type = "azure";
+    } else if (StringUtil::StartsWith(path, "az://")) {
+        auto end_of_container = path.find('/',5);
+
+        if(end_of_container == string::npos) {
+            throw IOException("Invalid azure url passed to delta scan: %s", path);
+        }
+        bucket = path.substr(5, end_of_container-5);
+        path_in_bucket = path.substr(end_of_container);
+        secret_type = "azure";
+    } else if (StringUtil::StartsWith(path, "abfss://")) {
+        auto end_of_container = path.find('/',8);
+
         if(end_of_container == string::npos) {
             throw IOException("Invalid azure url passed to delta scan: %s", path);
         }

From efd4db01aeb81dec4900d9957a67360eb83fbd18 Mon Sep 17 00:00:00 2001
From: Norman Foerster <norman.foerster@fmc-data-solutions.com>
Date: Fri, 14 Jun 2024 11:11:15 +0200
Subject: [PATCH 06/38] update azure values for azure extension

---
 src/functions/delta_scan.cpp | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/src/functions/delta_scan.cpp b/src/functions/delta_scan.cpp
index 1b7d894..05b958e 100644
--- a/src/functions/delta_scan.cpp
+++ b/src/functions/delta_scan.cpp
@@ -159,12 +159,12 @@ static ffi::EngineBuilder* CreateBuilder(ClientContext &context, const string &p
         
         auto connection_string = kv_secret.TryGetValue("connection_string").ToString();
         auto account_name = kv_secret.TryGetValue("account_name").ToString();
-        auto account_key = kv_secret.TryGetValue("account_key").ToString();
+        auto endpoint = kv_secret.TryGetValue("endpoint").ToString();
+        auto credential_chain = kv_secret.TryGetValue("credential_chain").ToString();
         auto client_id = kv_secret.TryGetValue("client_id").ToString();
         auto client_secret = kv_secret.TryGetValue("client_secret").ToString();
         auto tenant_id = kv_secret.TryGetValue("tenant_id").ToString();
-        auto azure_client_certificate_path = kv_secret.TryGetValue("certificate_path").ToString();
-        auto sas_token = kv_secret.TryGetValue("sas_token").ToString();
+        auto certificate_path = kv_secret.TryGetValue("certificate_path").ToString();
         auto http_proxy = kv_secret.TryGetValue("http_proxy").ToString();
         auto proxy_user_name = kv_secret.TryGetValue("proxy_user_name").ToString();
         auto proxy_password = kv_secret.TryGetValue("proxy_password").ToString();
@@ -175,8 +175,11 @@ static ffi::EngineBuilder* CreateBuilder(ClientContext &context, const string &p
         if (!account_name.empty()) {
             ffi::set_builder_option(builder, KernelUtils::ToDeltaString("azure_storage_account_name"), KernelUtils::ToDeltaString(account_name));
         }
-        if (!account_key.empty()) {
-            ffi::set_builder_option(builder, KernelUtils::ToDeltaString("azure_storage_account_key"), KernelUtils::ToDeltaString(account_key));
+        if (!endpoint.empty()) {
+            ffi::set_builder_option(builder, KernelUtils::ToDeltaString("azure_endpoint"), KernelUtils::ToDeltaString(endpoint));
+        }
+        if (!credential_chain.empty()) {
+            ffi::set_builder_option(builder, KernelUtils::ToDeltaString("azure_credential_chain"), KernelUtils::ToDeltaString(credential_chain));
         }
         if (!client_id.empty()) {
             ffi::set_builder_option(builder, KernelUtils::ToDeltaString("azure_client_id"), KernelUtils::ToDeltaString(client_id));
@@ -187,11 +190,8 @@ static ffi::EngineBuilder* CreateBuilder(ClientContext &context, const string &p
         if (!tenant_id.empty()) {
             ffi::set_builder_option(builder, KernelUtils::ToDeltaString("azure_tenant_id"), KernelUtils::ToDeltaString(tenant_id));
         }
-        if (!azure_client_certificate_path.empty()) {
-            ffi::set_builder_option(builder, KernelUtils::ToDeltaString("azure_client_certificate_path"), KernelUtils::ToDeltaString(azure_client_certificate_path));
-        }
-        if (!sas_token.empty()) {
-            ffi::set_builder_option(builder, KernelUtils::ToDeltaString("azure_sas_token"), KernelUtils::ToDeltaString(sas_token));
+        if (!certificate_path.empty()) {
+            ffi::set_builder_option(builder, KernelUtils::ToDeltaString("azure_client_certificate_path"), KernelUtils::ToDeltaString(certificate_path));
         }
         if (!http_proxy.empty()) {
             ffi::set_builder_option(builder, KernelUtils::ToDeltaString("http_proxy"), KernelUtils::ToDeltaString(http_proxy));

From 1563715a79c5b17db469ca233f28a30cd080ef4f Mon Sep 17 00:00:00 2001
From: Sam Ansmink <samansmink@hotmail.com>
Date: Sat, 15 Jun 2024 10:54:26 +0200
Subject: [PATCH 07/38] bump delta to 181232a45562, enable cardinalty
 estimation, fix varchar pushdown

---
 CMakeLists.txt                |  4 +--
 scripts/generate_test_data.py |  3 +-
 src/delta_utils.cpp           | 57 +++++++++++++++++++----------------
 src/functions/delta_scan.cpp  |  3 --
 4 files changed, 35 insertions(+), 32 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 28ea1d2..58e3d39 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -59,8 +59,8 @@ endif()
 # Add rust_example as a CMake target
 ExternalProject_Add(
         ${KERNEL_NAME}
-        GIT_REPOSITORY "https://github.com/delta-incubator/delta-kernel-rs"
-        GIT_TAG c901665b98b2fed5ff1c713a9666eba9d16ea281
+        GIT_REPOSITORY "https://github.com/nicklan/delta-kernel-rs"
+        GIT_TAG 181232a45562ca78be763c2f5fb46b88a2463b5c
         CONFIGURE_COMMAND ""
         UPDATE_COMMAND ""
         BUILD_IN_SOURCE 1
diff --git a/scripts/generate_test_data.py b/scripts/generate_test_data.py
index cb1d2f7..e7bf588 100644
--- a/scripts/generate_test_data.py
+++ b/scripts/generate_test_data.py
@@ -78,7 +78,8 @@ def generate_test_data_pyspark(name, current_path, input_path, delete_predicate
 
     ## CREATE
     ## CONFIGURE USAGE OF DELETION VECTORS
-    spark.sql(f"ALTER TABLE test_table_{name} SET TBLPROPERTIES ('delta.enableDeletionVectors' = true);")
+    if (delete_predicate):
+        spark.sql(f"ALTER TABLE test_table_{name} SET TBLPROPERTIES ('delta.enableDeletionVectors' = true);")
 
     ## ADDING DELETES
     deltaTable = DeltaTable.forPath(spark, delta_table_path)
diff --git a/src/delta_utils.cpp b/src/delta_utils.cpp
index a805d15..104d65a 100644
--- a/src/delta_utils.cpp
+++ b/src/delta_utils.cpp
@@ -200,6 +200,10 @@ static bool CanHandleFilter(TableFilter *filter) {
     switch (filter->filter_type) {
         case TableFilterType::CONSTANT_COMPARISON:
             return true;
+        case TableFilterType::IS_NULL:
+            return true;
+        case TableFilterType::IS_NOT_NULL:
+            return true;
         case TableFilterType::CONJUNCTION_AND: {
             auto &conjunction = static_cast<const ConjunctionAndFilter&>(*filter);
             bool can_handle = true;
@@ -258,28 +262,28 @@ uintptr_t PredicateVisitor::VisitConstantFilter(const string &col_name, const Co
         case LogicalType::BIGINT:
             right = visit_expression_literal_long(state, BigIntValue::Get(value));
             break;
-        // case LogicalType::INTEGER:
-        //     right = visit_expression_literal_int(state, IntegerValue::Get(value));
-        //     break;
-        // case LogicalType::SMALLINT:
-        //     right = visit_expression_literal_short(state, SmallIntValue::Get(value));
-        //     break;
-        // case LogicalType::TINYINT:
-        //     right = visit_expression_literal_byte(state, TinyIntValue::Get(value));
-        //     break;
-        // case LogicalType::FLOAT:
-        //     right = visit_expression_literal_float(state, FloatValue::Get(value));
-        //     break;
-        // case LogicalType::DOUBLE:
-        //     right = visit_expression_literal_double(state, DoubleValue::Get(value));
-        //     break;
-        //  case LogicalType::BOOLEAN:
-        //     right = visit_expression_literal_bool(state, BooleanValue::Get(value));
-        //     break;
+        case LogicalType::INTEGER:
+            right = visit_expression_literal_int(state, IntegerValue::Get(value));
+            break;
+        case LogicalType::SMALLINT:
+            right = visit_expression_literal_short(state, SmallIntValue::Get(value));
+            break;
+        case LogicalType::TINYINT:
+            right = visit_expression_literal_byte(state, TinyIntValue::Get(value));
+            break;
+        case LogicalType::FLOAT:
+            right = visit_expression_literal_float(state, FloatValue::Get(value));
+            break;
+        case LogicalType::DOUBLE:
+            right = visit_expression_literal_double(state, DoubleValue::Get(value));
+            break;
+         case LogicalType::BOOLEAN:
+            right = visit_expression_literal_bool(state, BooleanValue::Get(value));
+            break;
         case LogicalType::VARCHAR: {
             // WARNING: C++ lifetime extension rules don't protect calls of the form foo(std::string(...).c_str())
             auto str = StringValue::Get(value);
-            auto maybe_right = ffi::visit_expression_literal_string(state, KernelUtils::ToDeltaString(col_name), DuckDBEngineError::AllocateError);
+            auto maybe_right = ffi::visit_expression_literal_string(state, KernelUtils::ToDeltaString(str), DuckDBEngineError::AllocateError);
             right = KernelUtils::UnpackResult(maybe_right, "VisitConstantFilter failed to visit_expression_literal_string");
             break;
         }
@@ -315,6 +319,7 @@ uintptr_t PredicateVisitor::VisitAndFilter(const string &col_name, const Conjunc
             return 0;
         }
         auto &child_filter = *it++;
+
         return VisitFilter(col_name, *child_filter, state);
     };
     auto eit = EngineIteratorFromCallable(get_next);
@@ -322,9 +327,9 @@ uintptr_t PredicateVisitor::VisitAndFilter(const string &col_name, const Conjunc
 }
 
 uintptr_t PredicateVisitor::VisitIsNull(const string &col_name, ffi::KernelExpressionVisitorState *state) {
-    auto maybe_left = ffi::visit_expression_column(state, KernelUtils::ToDeltaString(col_name), DuckDBEngineError::AllocateError);
-    uintptr_t left = KernelUtils::UnpackResult(maybe_left, "VisitIsNull failed to visit_expression_column");
-    return ffi::visit_expression_is_null(state, left);
+    auto maybe_inner = ffi::visit_expression_column(state, KernelUtils::ToDeltaString(col_name), DuckDBEngineError::AllocateError);
+    uintptr_t inner = KernelUtils::UnpackResult(maybe_inner, "VisitIsNull failed to visit_expression_column");
+    return ffi::visit_expression_is_null(state, inner);
 }
 
 uintptr_t PredicateVisitor::VisitIsNotNull(const string &col_name, ffi::KernelExpressionVisitorState *state) {
@@ -337,10 +342,10 @@ uintptr_t PredicateVisitor::VisitFilter(const string &col_name, const TableFilte
             return VisitConstantFilter(col_name, static_cast<const ConstantFilter&>(filter), state);
         case TableFilterType::CONJUNCTION_AND:
             return VisitAndFilter(col_name, static_cast<const ConjunctionAndFilter&>(filter), state);
-        // case TableFilterType::IS_NULL:
-        //     return VisitIsNull(col_name, state);
-        // case TableFilterType::IS_NOT_NULL:
-        //     return VisitIsNotNull(col_name, state);
+        case TableFilterType::IS_NULL:
+            return VisitIsNull(col_name, state);
+        case TableFilterType::IS_NOT_NULL:
+            return VisitIsNotNull(col_name, state);
         default:
             return ~0;
     }
diff --git a/src/functions/delta_scan.cpp b/src/functions/delta_scan.cpp
index d4320e5..ed968a2 100644
--- a/src/functions/delta_scan.cpp
+++ b/src/functions/delta_scan.cpp
@@ -31,8 +31,6 @@ static void visit_callback(ffi::NullableCvoid engine_context, struct ffi::Kernel
     StringUtil::RTrim(path_string, "/");
     path_string += "/" + KernelUtils::FromDeltaString(path);
 
-    printf("Got File %s\n", path_string.c_str());
-
     // First we append the file to our resolved files
     context->resolved_files.push_back(DeltaSnapshot::ToDuckDBPath(path_string));
     context->metadata.emplace_back(make_uniq<DeltaFileMetaData>());
@@ -589,7 +587,6 @@ TableFunctionSet DeltaFunctions::GetDeltaScanFunction(DatabaseInstance &instance
         function.deserialize = nullptr;
         function.statistics = nullptr;
         function.table_scan_progress = nullptr;
-        function.cardinality = nullptr;
         function.get_bind_info = nullptr;
 
         // Schema param is just confusing here

From 7291aa51e970107095eefd25936e6dfc547d7610 Mon Sep 17 00:00:00 2001
From: Sam Ansmink <samansmink@hotmail.com>
Date: Sat, 15 Jun 2024 13:04:53 +0200
Subject: [PATCH 08/38] add tests for pushdown all types

---
 scripts/generate_test_data.py                 |  5 +++
 .../generated/file_skipping_all_types.test    | 44 +++++++++++++++++++
 2 files changed, 49 insertions(+)
 create mode 100644 test/sql/generated/file_skipping_all_types.test

diff --git a/scripts/generate_test_data.py b/scripts/generate_test_data.py
index e7bf588..eaf9d30 100644
--- a/scripts/generate_test_data.py
+++ b/scripts/generate_test_data.py
@@ -116,6 +116,11 @@ def generate_test_data_pyspark(name, current_path, input_path, delete_predicate
 query = "CREATE table test_table AS SELECT {'i':i, 'j':i+1} as value, i%2 as part from range(0,10) tbl(i);"
 generate_test_data_delta_rs("simple_partitioned_with_structs", query, "part");
 
+## Partitioned table with all types we can file skip on
+for type in ["bool", "int", "tinyint", "smallint", "bigint", "float", "double", "varchar"]:
+    query = f"CREATE table test_table as select i::{type} as value, i::{type} as part from range(0,2) tbl(i)"
+    generate_test_data_delta_rs(f"test_file_skipping/{type}", query, "part");
+
 ## Simple table with deletion vector
 con = duckdb.connect()
 con.query(f"COPY (SELECT i as id, ('val' || i::VARCHAR) as value  FROM range(0,1000000) tbl(i))TO '{TMP_PATH}/simple_sf1_with_dv.parquet'")
diff --git a/test/sql/generated/file_skipping_all_types.test b/test/sql/generated/file_skipping_all_types.test
new file mode 100644
index 0000000..e4348e8
--- /dev/null
+++ b/test/sql/generated/file_skipping_all_types.test
@@ -0,0 +1,44 @@
+# name: test/sql/generated/file_skipping_all_types.test
+# description: Test filter pushdown succeeds on all file types we can push down
+# group: [delta_generated]
+
+require parquet
+
+require delta
+
+require-env GENERATED_DATA_AVAILABLE
+
+# TODO: this doesn't appear to skip files yet
+# TODO: add tests once https://github.com/duckdb/duckdb/pull/12488 is available
+
+query I
+select value
+from delta_scan('./data/generated/test_file_skipping/bool/delta_lake')
+where part != false
+order by value
+----
+true
+
+foreach type bool int tinyint smallint bigint varchar
+
+query I
+select value
+from delta_scan('./data/generated/test_file_skipping/${type}/delta_lake')
+where part != 0
+order by value
+----
+1
+
+endloop
+
+foreach type float double
+
+query I
+select value
+from delta_scan('./data/generated/test_file_skipping/${type}/delta_lake')
+where part > 0.5
+order by value
+----
+1.0
+
+endloop

From 638292f1888f0e5911a65aaa34ac0163bbde1f36 Mon Sep 17 00:00:00 2001
From: Sam Ansmink <samansmink@hotmail.com>
Date: Mon, 17 Jun 2024 14:49:35 +0200
Subject: [PATCH 09/38] add linux arm64 builds

---
 .../workflows/MainDistributionPipeline.yml    |  4 +--
 .github/workflows/_extension_distribution.yml |  8 ++++-
 CMakeLists.txt                                | 29 ++++++++++++++++---
 3 files changed, 34 insertions(+), 7 deletions(-)

diff --git a/.github/workflows/MainDistributionPipeline.yml b/.github/workflows/MainDistributionPipeline.yml
index d0d7916..40ff65e 100644
--- a/.github/workflows/MainDistributionPipeline.yml
+++ b/.github/workflows/MainDistributionPipeline.yml
@@ -18,7 +18,7 @@ jobs:
     with:
       duckdb_version: v1.0.0
       extension_name: delta
-      exclude_archs: 'wasm_mvp;wasm_eh;wasm_threads;windows_amd64_rtools;windows_amd64;linux_arm64'
+      exclude_archs: 'wasm_mvp;wasm_eh;wasm_threads;windows_amd64_rtools;windows_amd64'
 
   duckdb-stable-deploy:
     name: Deploy extension binaries
@@ -28,5 +28,5 @@ jobs:
     with:
       extension_name: delta
       duckdb_version: v1.0.0
-      exclude_archs: 'wasm_mvp;wasm_eh;wasm_threads;windows_amd64_rtools;windows_amd64;linux_arm64'
+      exclude_archs: 'wasm_mvp;wasm_eh;wasm_threads;windows_amd64_rtools;windows_amd64'
       deploy_latest: ${{ startsWith(github.ref, 'refs/tags/v') || github.ref == 'refs/heads/main' }}
\ No newline at end of file
diff --git a/.github/workflows/_extension_distribution.yml b/.github/workflows/_extension_distribution.yml
index b20bbb8..f38506e 100644
--- a/.github/workflows/_extension_distribution.yml
+++ b/.github/workflows/_extension_distribution.yml
@@ -140,9 +140,15 @@ jobs:
           ./duckdb/scripts/setup_manylinux2014.sh general aws-cli ccache ssh python_alias openssl
 
       - name: Setup Rust
-        if: ${{ matrix.duckdb_arch == 'linux_amd64' }}
+        if: ${{ matrix.duckdb_arch == 'linux_amd64'}}
         uses: dtolnay/rust-toolchain@stable
 
+      - name: Setup Rust for cross compilation
+        if: ${{ matrix.duckdb_arch == 'linux_arm64'}}
+        uses: dtolnay/rust-toolchain@stable
+        with:
+          targets: aarch64-unknown-linux-gnu
+
       - name: Setup Rust for manylinux (dtolnay/rust-toolchain doesn't work due to curl being old here)
         if: ${{ matrix.duckdb_arch == 'linux_amd64_gcc4' }}
         run: |
diff --git a/CMakeLists.txt b/CMakeLists.txt
index f8d12ec..3000a81 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -35,11 +35,19 @@ set(KERNEL_NAME delta_kernel)
 # Set default ExternalProject root directory
 set_directory_properties(PROPERTIES EP_PREFIX ${CMAKE_BINARY_DIR}/rust)
 
+set(RUST_ENV_VARS "")
+
 # Propagate arch to rust build for CI
 set(RUST_PLATFORM_TARGET "")
 if("${OS_NAME}" STREQUAL "linux")
     if ("${OS_ARCH}" STREQUAL "arm64")
         set(RUST_PLATFORM_TARGET "aarch64-unknown-linux-gnu")
+    elseif("${CMAKE_CXX_COMPILER}" MATCHES "aarch64")
+        set(RUST_ENV_VARS ${RUST_ENV_VARS} CFLAGS_aarch64_unknown_linux_gnu=--sysroot=/usr/aarch64-linux-gnu)
+        set(RUST_ENV_VARS ${RUST_ENV_VARS} CARGO_TARGET_AARCH64_UNKNOWN_LINUX_GNU_LINKER=aarch64-linux-gnu-gcc)
+        set(RUST_ENV_VARS ${RUST_ENV_VARS} OPENSSL_LIB_DIR=${CMAKE_BINARY_DIR}/vcpkg_installed/${VCPKG_TARGET_TRIPLET}/lib)
+        set(RUST_ENV_VARS ${RUST_ENV_VARS} OPENSSL_INCLUDE_DIR=${CMAKE_BINARY_DIR}/vcpkg_installed/${VCPKG_TARGET_TRIPLET}/include)
+        set(RUST_PLATFORM_TARGET "aarch64-unknown-linux-gnu")
     else()
         set(RUST_PLATFORM_TARGET "x86_64-unknown-linux-gnu")
     endif()
@@ -56,20 +64,33 @@ elseif("${OS_NAME}" STREQUAL "osx")
     endif()
 endif()
 
+string(STRIP "${RUST_ENV_VARS}" RUST_ENV_VARS)
+
+# Having these set will mess up cross compilation to linux arm
+set(RUST_UNSET_ENV_VARS --unset=CC --unset=CXX --unset=LD)
+
 # Add rust_example as a CMake target
 ExternalProject_Add(
         ${KERNEL_NAME}
         GIT_REPOSITORY "https://github.com/delta-incubator/delta-kernel-rs"
         GIT_TAG 08f0764a00e89f42136fd478823d28278adc7ee8
-        CONFIGURE_COMMAND ""
+        # Prints the env variables passed to the cargo build to the terminal, useful in debugging because passing them
+        # through CMake is an error-prone mess
+        CONFIGURE_COMMAND ${CMAKE_COMMAND} -E env ${RUST_UNSET_ENV_VARS} ${RUST_ENV_VARS} env
         UPDATE_COMMAND ""
         BUILD_IN_SOURCE 1
         # Build debug build
-        BUILD_COMMAND cargo build --package delta_kernel_ffi --workspace --all-features --target=${RUST_PLATFORM_TARGET}
+        BUILD_COMMAND
+            ${CMAKE_COMMAND} -E env ${RUST_UNSET_ENV_VARS} ${RUST_ENV_VARS}
+            cargo build --package delta_kernel_ffi --workspace --all-features --target=${RUST_PLATFORM_TARGET}
         # Build release build
-        COMMAND cargo build --package delta_kernel_ffi --workspace --all-features --release --target=${RUST_PLATFORM_TARGET}
+        COMMAND
+            ${CMAKE_COMMAND} -E env ${RUST_UNSET_ENV_VARS} ${RUST_ENV_VARS}
+            cargo build --package delta_kernel_ffi --workspace --all-features --release --target=${RUST_PLATFORM_TARGET}
         # Build DATs
-        COMMAND cargo build --manifest-path=${CMAKE_BINARY_DIR}/rust/src/delta_kernel/acceptance/Cargo.toml
+        COMMAND
+            ${CMAKE_COMMAND} -E env ${RUST_UNSET_ENV_VARS} ${RUST_ENV_VARS}
+            cargo build --manifest-path=${CMAKE_BINARY_DIR}/rust/src/delta_kernel/acceptance/Cargo.toml
         BUILD_BYPRODUCTS "${CMAKE_BINARY_DIR}/rust/src/delta_kernel/target/${RUST_PLATFORM_TARGET}/debug/libdelta_kernel_ffi.a"
         BUILD_BYPRODUCTS "${CMAKE_BINARY_DIR}/rust/src/delta_kernel/target/${RUST_PLATFORM_TARGET}/release/libdelta_kernel_ffi.a"
         BUILD_BYPRODUCTS "${CMAKE_BINARY_DIR}/rust/src/delta_kernel/target/ffi-headers/delta_kernel_ffi.h"

From aa60e1d8a7411b890f0ee914f59b29a0d15f8f6d Mon Sep 17 00:00:00 2001
From: Sam Ansmink <samansmink@hotmail.com>
Date: Mon, 17 Jun 2024 14:49:35 +0200
Subject: [PATCH 10/38] add linux arm64 builds

---
 .../workflows/MainDistributionPipeline.yml    |  4 +--
 .github/workflows/_extension_distribution.yml |  8 ++++-
 CMakeLists.txt                                | 29 ++++++++++++++++---
 3 files changed, 34 insertions(+), 7 deletions(-)

diff --git a/.github/workflows/MainDistributionPipeline.yml b/.github/workflows/MainDistributionPipeline.yml
index d0d7916..40ff65e 100644
--- a/.github/workflows/MainDistributionPipeline.yml
+++ b/.github/workflows/MainDistributionPipeline.yml
@@ -18,7 +18,7 @@ jobs:
     with:
       duckdb_version: v1.0.0
       extension_name: delta
-      exclude_archs: 'wasm_mvp;wasm_eh;wasm_threads;windows_amd64_rtools;windows_amd64;linux_arm64'
+      exclude_archs: 'wasm_mvp;wasm_eh;wasm_threads;windows_amd64_rtools;windows_amd64'
 
   duckdb-stable-deploy:
     name: Deploy extension binaries
@@ -28,5 +28,5 @@ jobs:
     with:
       extension_name: delta
       duckdb_version: v1.0.0
-      exclude_archs: 'wasm_mvp;wasm_eh;wasm_threads;windows_amd64_rtools;windows_amd64;linux_arm64'
+      exclude_archs: 'wasm_mvp;wasm_eh;wasm_threads;windows_amd64_rtools;windows_amd64'
       deploy_latest: ${{ startsWith(github.ref, 'refs/tags/v') || github.ref == 'refs/heads/main' }}
\ No newline at end of file
diff --git a/.github/workflows/_extension_distribution.yml b/.github/workflows/_extension_distribution.yml
index b20bbb8..f38506e 100644
--- a/.github/workflows/_extension_distribution.yml
+++ b/.github/workflows/_extension_distribution.yml
@@ -140,9 +140,15 @@ jobs:
           ./duckdb/scripts/setup_manylinux2014.sh general aws-cli ccache ssh python_alias openssl
 
       - name: Setup Rust
-        if: ${{ matrix.duckdb_arch == 'linux_amd64' }}
+        if: ${{ matrix.duckdb_arch == 'linux_amd64'}}
         uses: dtolnay/rust-toolchain@stable
 
+      - name: Setup Rust for cross compilation
+        if: ${{ matrix.duckdb_arch == 'linux_arm64'}}
+        uses: dtolnay/rust-toolchain@stable
+        with:
+          targets: aarch64-unknown-linux-gnu
+
       - name: Setup Rust for manylinux (dtolnay/rust-toolchain doesn't work due to curl being old here)
         if: ${{ matrix.duckdb_arch == 'linux_amd64_gcc4' }}
         run: |
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 58e3d39..5484048 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -35,11 +35,19 @@ set(KERNEL_NAME delta_kernel)
 # Set default ExternalProject root directory
 set_directory_properties(PROPERTIES EP_PREFIX ${CMAKE_BINARY_DIR}/rust)
 
+set(RUST_ENV_VARS "")
+
 # Propagate arch to rust build for CI
 set(RUST_PLATFORM_TARGET "")
 if("${OS_NAME}" STREQUAL "linux")
     if ("${OS_ARCH}" STREQUAL "arm64")
         set(RUST_PLATFORM_TARGET "aarch64-unknown-linux-gnu")
+    elseif("${CMAKE_CXX_COMPILER}" MATCHES "aarch64")
+        set(RUST_ENV_VARS ${RUST_ENV_VARS} CFLAGS_aarch64_unknown_linux_gnu=--sysroot=/usr/aarch64-linux-gnu)
+        set(RUST_ENV_VARS ${RUST_ENV_VARS} CARGO_TARGET_AARCH64_UNKNOWN_LINUX_GNU_LINKER=aarch64-linux-gnu-gcc)
+        set(RUST_ENV_VARS ${RUST_ENV_VARS} OPENSSL_LIB_DIR=${CMAKE_BINARY_DIR}/vcpkg_installed/${VCPKG_TARGET_TRIPLET}/lib)
+        set(RUST_ENV_VARS ${RUST_ENV_VARS} OPENSSL_INCLUDE_DIR=${CMAKE_BINARY_DIR}/vcpkg_installed/${VCPKG_TARGET_TRIPLET}/include)
+        set(RUST_PLATFORM_TARGET "aarch64-unknown-linux-gnu")
     else()
         set(RUST_PLATFORM_TARGET "x86_64-unknown-linux-gnu")
     endif()
@@ -56,20 +64,33 @@ elseif("${OS_NAME}" STREQUAL "osx")
     endif()
 endif()
 
+string(STRIP "${RUST_ENV_VARS}" RUST_ENV_VARS)
+
+# Having these set will mess up cross compilation to linux arm
+set(RUST_UNSET_ENV_VARS --unset=CC --unset=CXX --unset=LD)
+
 # Add rust_example as a CMake target
 ExternalProject_Add(
         ${KERNEL_NAME}
         GIT_REPOSITORY "https://github.com/nicklan/delta-kernel-rs"
         GIT_TAG 181232a45562ca78be763c2f5fb46b88a2463b5c
-        CONFIGURE_COMMAND ""
+        # Prints the env variables passed to the cargo build to the terminal, useful in debugging because passing them
+        # through CMake is an error-prone mess
+        CONFIGURE_COMMAND ${CMAKE_COMMAND} -E env ${RUST_UNSET_ENV_VARS} ${RUST_ENV_VARS} env
         UPDATE_COMMAND ""
         BUILD_IN_SOURCE 1
         # Build debug build
-        BUILD_COMMAND cargo build --package delta_kernel_ffi --workspace --all-features --target=${RUST_PLATFORM_TARGET}
+        BUILD_COMMAND
+            ${CMAKE_COMMAND} -E env ${RUST_UNSET_ENV_VARS} ${RUST_ENV_VARS}
+            cargo build --package delta_kernel_ffi --workspace --all-features --target=${RUST_PLATFORM_TARGET}
         # Build release build
-        COMMAND cargo build --package delta_kernel_ffi --workspace --all-features --release --target=${RUST_PLATFORM_TARGET}
+        COMMAND
+            ${CMAKE_COMMAND} -E env ${RUST_UNSET_ENV_VARS} ${RUST_ENV_VARS}
+            cargo build --package delta_kernel_ffi --workspace --all-features --release --target=${RUST_PLATFORM_TARGET}
         # Build DATs
-        COMMAND cargo build --manifest-path=${CMAKE_BINARY_DIR}/rust/src/delta_kernel/acceptance/Cargo.toml
+        COMMAND
+            ${CMAKE_COMMAND} -E env ${RUST_UNSET_ENV_VARS} ${RUST_ENV_VARS}
+            cargo build --manifest-path=${CMAKE_BINARY_DIR}/rust/src/delta_kernel/acceptance/Cargo.toml
         BUILD_BYPRODUCTS "${CMAKE_BINARY_DIR}/rust/src/delta_kernel/target/${RUST_PLATFORM_TARGET}/debug/libdelta_kernel_ffi.a"
         BUILD_BYPRODUCTS "${CMAKE_BINARY_DIR}/rust/src/delta_kernel/target/${RUST_PLATFORM_TARGET}/release/libdelta_kernel_ffi.a"
         BUILD_BYPRODUCTS "${CMAKE_BINARY_DIR}/rust/src/delta_kernel/target/ffi-headers/delta_kernel_ffi.h"

From deff1b967ae722522f03efe183c239928abfd274 Mon Sep 17 00:00:00 2001
From: Sam Ansmink <samansmink@hotmail.com>
Date: Wed, 19 Jun 2024 22:49:29 +0200
Subject: [PATCH 11/38] added windows archs, making things ugly in the process

---
 .../workflows/MainDistributionPipeline.yml    |   4 +-
 .github/workflows/_extension_distribution.yml |  18 +-
 CMakeLists.txt                                |  51 +-
 src/delta_utils.cpp                           |  10 +-
 src/include/delta_kernel_ffi.hpp              | 537 ++++++++++++++++++
 5 files changed, 591 insertions(+), 29 deletions(-)
 create mode 100644 src/include/delta_kernel_ffi.hpp

diff --git a/.github/workflows/MainDistributionPipeline.yml b/.github/workflows/MainDistributionPipeline.yml
index 40ff65e..abdc095 100644
--- a/.github/workflows/MainDistributionPipeline.yml
+++ b/.github/workflows/MainDistributionPipeline.yml
@@ -18,7 +18,7 @@ jobs:
     with:
       duckdb_version: v1.0.0
       extension_name: delta
-      exclude_archs: 'wasm_mvp;wasm_eh;wasm_threads;windows_amd64_rtools;windows_amd64'
+      exclude_archs: 'wasm_mvp;wasm_eh;wasm_threads;windows_amd64_rtools'
 
   duckdb-stable-deploy:
     name: Deploy extension binaries
@@ -28,5 +28,5 @@ jobs:
     with:
       extension_name: delta
       duckdb_version: v1.0.0
-      exclude_archs: 'wasm_mvp;wasm_eh;wasm_threads;windows_amd64_rtools;windows_amd64'
+      exclude_archs: 'wasm_mvp;wasm_eh;wasm_threads;windows_amd64_rtools'
       deploy_latest: ${{ startsWith(github.ref, 'refs/tags/v') || github.ref == 'refs/heads/main' }}
\ No newline at end of file
diff --git a/.github/workflows/_extension_distribution.yml b/.github/workflows/_extension_distribution.yml
index f38506e..a536982 100644
--- a/.github/workflows/_extension_distribution.yml
+++ b/.github/workflows/_extension_distribution.yml
@@ -317,6 +317,9 @@ jobs:
         with:
           python-version: '3.11'
 
+      - name: Setup Rust
+        uses: dtolnay/rust-toolchain@stable
+
       - uses: r-lib/actions/setup-r@v2
         if: matrix.duckdb_arch == 'windows_amd64_rtools'
         with:
@@ -340,16 +343,6 @@ jobs:
         with:
           vcpkgGitCommitId: ${{ inputs.vcpkg_commit }}
 
-      - name: Fix for MSVC issue
-        shell: bash
-        env:
-          OVERLAY_TRIPLET_SRC:  ${{ github.workspace }}/vcpkg/triplets/community/x64-windows-static-md.cmake
-          OVERLAY_TRIPLET_DST:  ${{ github.workspace }}/overlay_triplets/x64-windows-static-md.cmake
-        run: |
-          mkdir overlay_triplets
-          cp $OVERLAY_TRIPLET_SRC $OVERLAY_TRIPLET_DST
-          echo "set(VCPKG_PLATFORM_TOOLSET_VERSION "14.38")" >> $OVERLAY_TRIPLET_DST
-
       - name: Build & test extension
         env:
           VCPKG_OVERLAY_TRIPLETS: "${{ github.workspace }}/overlay_triplets"
@@ -357,6 +350,11 @@ jobs:
         run: |
           make test_release
 
+      - name: Error log
+        if: always()
+        run: |
+          cat build/release/rust/src/delta_kernel-stamp/delta_kernel-build-*.log
+
       - uses: actions/upload-artifact@v2
         with:
           name: ${{ inputs.extension_name }}-${{ inputs.duckdb_version }}-extension-${{matrix.duckdb_arch}}${{inputs.artifact_postfix}}
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 3000a81..41ace4b 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -24,7 +24,7 @@ if(APPLE)
 elseif(UNIX)
     set(PLATFORM_LIBS m c resolv)
 elseif(WIN32)
-    set(PLATFORM_LIBS ws2_32 userenv advapi32)
+    set(PLATFORM_LIBS ntdll ncrypt secur32 ws2_32 userenv bcrypt msvcrt advapi32)
 else()
     message(STATUS "UNKNOWN OS")
 endif()
@@ -52,27 +52,53 @@ if("${OS_NAME}" STREQUAL "linux")
         set(RUST_PLATFORM_TARGET "x86_64-unknown-linux-gnu")
     endif()
 elseif("${OS_NAME}" STREQUAL "osx")
-    # TODO: clean up upstream; we are not correctly setting OS_ARCH for cross compile
     if ("${OSX_BUILD_ARCH}" STREQUAL "arm64")
         set(RUST_PLATFORM_TARGET "aarch64-apple-darwin")
     elseif ("${OSX_BUILD_ARCH}" STREQUAL "x86_64")
         set(RUST_PLATFORM_TARGET "x86_64-apple-darwin")
     elseif ("${OS_ARCH}" STREQUAL "arm64")
         set(RUST_PLATFORM_TARGET "aarch64-apple-darwin")
-    else()
-        set(RUST_PLATFORM_TARGET "x86_64-apple-darwin")
+    endif()
+elseif(WIN32)
+    if (MINGW AND "${OS_ARCH}" STREQUAL "arm64")
+        set(RUST_PLATFORM_TARGET "aarch64-pc-windows-gnu")
+    elseif (MINGW AND "${OS_ARCH}" STREQUAL "amd64")
+        set(RUST_PLATFORM_TARGET "x86_64-pc-windows-gnu")
+    elseif (MSVC AND "${OS_ARCH}" STREQUAL "arm64")
+        set(RUST_PLATFORM_TARGET "aarch64-pc-windows-msvc")
+    elseif (MSVC AND "${OS_ARCH}" STREQUAL "amd64")
+        set(RUST_PLATFORM_TARGET "x86_64-pc-windows-msvc")
     endif()
 endif()
 
+# We currently only support the predefined targets.
+if ("${RUST_PLATFORM_TARGET}" STREQUAL "")
+    message(FATAL_ERROR "Failed to detect the correct platform")
+endif()
+
+set(RUST_PLATFORM_PARAM "--target=${RUST_PLATFORM_TARGET}")
+message(STATUS "Building for rust target: ${RUST_PLATFORM_TARGET}")
+
+# Remove whitespaces before and after to prevent messed up env variables
 string(STRIP "${RUST_ENV_VARS}" RUST_ENV_VARS)
 
 # Having these set will mess up cross compilation to linux arm
 set(RUST_UNSET_ENV_VARS --unset=CC --unset=CXX --unset=LD)
 
+# Define all the relevant delta-kernel-rs paths/names
+set(DELTA_KERNEL_LIBNAME "${CMAKE_STATIC_LIBRARY_PREFIX}delta_kernel_ffi${CMAKE_STATIC_LIBRARY_SUFFIX}")
+set(DELTA_KERNEL_LIBPATH_DEBUG "${CMAKE_BINARY_DIR}/rust/src/delta_kernel/target/${RUST_PLATFORM_TARGET}/debug/${DELTA_KERNEL_LIBNAME}")
+set(DELTA_KERNEL_LIBPATH_RELEASE "${CMAKE_BINARY_DIR}/rust/src/delta_kernel/target/${RUST_PLATFORM_TARGET}/release/${DELTA_KERNEL_LIBNAME}")
+set(DELTA_KERNEL_FFI_HEADER_PATH "${CMAKE_BINARY_DIR}/rust/src/delta_kernel/target/ffi-headers")
+set(DELTA_KERNEL_FFI_HEADER_C "${CMAKE_BINARY_DIR}/rust/src/delta_kernel/target/ffi-headers/delta_kernel_ffi.h")
+set(DELTA_KERNEL_FFI_HEADER_CXX "${CMAKE_BINARY_DIR}/rust/src/delta_kernel/target/ffi-headers/delta_kernel_ffi.hpp")
+
 # Add rust_example as a CMake target
 ExternalProject_Add(
         ${KERNEL_NAME}
         GIT_REPOSITORY "https://github.com/delta-incubator/delta-kernel-rs"
+        # WARNING: the FFI headers are currently pinned due to the C linkage issue of the c++ headers. Currently, when bumping
+        # the kernel version, the produced header in ./src/include/delta_kernel_ffi.hpp should be also bumped, applying the fix
         GIT_TAG 08f0764a00e89f42136fd478823d28278adc7ee8
         # Prints the env variables passed to the cargo build to the terminal, useful in debugging because passing them
         # through CMake is an error-prone mess
@@ -82,27 +108,28 @@ ExternalProject_Add(
         # Build debug build
         BUILD_COMMAND
             ${CMAKE_COMMAND} -E env ${RUST_UNSET_ENV_VARS} ${RUST_ENV_VARS}
-            cargo build --package delta_kernel_ffi --workspace --all-features --target=${RUST_PLATFORM_TARGET}
+            cargo build --package delta_kernel_ffi --workspace --all-features ${RUST_PLATFORM_PARAM}
         # Build release build
         COMMAND
             ${CMAKE_COMMAND} -E env ${RUST_UNSET_ENV_VARS} ${RUST_ENV_VARS}
-            cargo build --package delta_kernel_ffi --workspace --all-features --release --target=${RUST_PLATFORM_TARGET}
+            cargo build --package delta_kernel_ffi --workspace --all-features --release ${RUST_PLATFORM_PARAM}
         # Build DATs
         COMMAND
             ${CMAKE_COMMAND} -E env ${RUST_UNSET_ENV_VARS} ${RUST_ENV_VARS}
             cargo build --manifest-path=${CMAKE_BINARY_DIR}/rust/src/delta_kernel/acceptance/Cargo.toml
-        BUILD_BYPRODUCTS "${CMAKE_BINARY_DIR}/rust/src/delta_kernel/target/${RUST_PLATFORM_TARGET}/debug/libdelta_kernel_ffi.a"
-        BUILD_BYPRODUCTS "${CMAKE_BINARY_DIR}/rust/src/delta_kernel/target/${RUST_PLATFORM_TARGET}/release/libdelta_kernel_ffi.a"
-        BUILD_BYPRODUCTS "${CMAKE_BINARY_DIR}/rust/src/delta_kernel/target/ffi-headers/delta_kernel_ffi.h"
-        BUILD_BYPRODUCTS "${CMAKE_BINARY_DIR}/rust/src/delta_kernel/target/ffi-headers/delta_kernel_ffi.hpp"
+        # Define the byproducts, required for building with Ninja
+        BUILD_BYPRODUCTS "${DELTA_KERNEL_LIBPATH_DEBUG}"
+        BUILD_BYPRODUCTS "${DELTA_KERNEL_LIBPATH_RELEASE}"
+        BUILD_BYPRODUCTS "${DELTA_KERNEL_FFI_HEADER_C}"
+        BUILD_BYPRODUCTS "${DELTA_KERNEL_FFI_HEADER_CXX}"
         INSTALL_COMMAND ""
         LOG_BUILD ON)
 
 build_static_extension(${TARGET_NAME} ${EXTENSION_SOURCES})
 build_loadable_extension(${TARGET_NAME} " " ${EXTENSION_SOURCES})
 
-include_directories(${CMAKE_BINARY_DIR}/rust/src/delta_kernel/target/ffi-headers)
-include_directories(${CMAKE_BINARY_DIR}/rust/src/delta_kernel/target/ffi-headers)
+# TODO: when C linkage issue is resolved, we should switch back to using the generated headers
+#include_directories(${DELTA_KERNEL_FFI_HEADER_PATH})
 
 # Hides annoying linker warnings
 set(CMAKE_OSX_DEPLOYMENT_TARGET 13.3 CACHE STRING "Minimum OS X deployment version" FORCE)
diff --git a/src/delta_utils.cpp b/src/delta_utils.cpp
index b02e898..57d34d0 100644
--- a/src/delta_utils.cpp
+++ b/src/delta_utils.cpp
@@ -169,10 +169,10 @@ vector<bool> KernelUtils::FromDeltaBoolSlice(const struct ffi::KernelBoolSlice s
     return result;
 }
 
-PredicateVisitor::PredicateVisitor(const vector<string> &column_names, optional_ptr<TableFilterSet> filters) : EnginePredicate {
-        .predicate = this,
-        .visitor = (uintptr_t (*)(void*, ffi::KernelExpressionVisitorState*)) &VisitPredicate}
-{
+PredicateVisitor::PredicateVisitor(const vector<string> &column_names, optional_ptr<TableFilterSet> filters) {
+    predicate = this;
+    visitor = (uintptr_t (*)(void*, ffi::KernelExpressionVisitorState*)) &VisitPredicate;
+
     if (filters) {
         for (auto& filter : filters->filters) {
             column_filters[column_names[filter.first]] = filter.second.get();
@@ -190,7 +190,7 @@ static auto GetNextFromCallable(Callable* callable) -> decltype(std::declval<Cal
 template <typename Callable>
 ffi::EngineIterator EngineIteratorFromCallable(Callable& callable) {
     auto* get_next = &GetNextFromCallable<Callable>;
-    return {.data = &callable, .get_next = (const void *(*)(void*)) get_next};
+    return {&callable, (const void *(*)(void*)) get_next};
 };
 
 // Helper function to prevent pushing down filters kernel cant handle
diff --git a/src/include/delta_kernel_ffi.hpp b/src/include/delta_kernel_ffi.hpp
new file mode 100644
index 0000000..813d31e
--- /dev/null
+++ b/src/include/delta_kernel_ffi.hpp
@@ -0,0 +1,537 @@
+#pragma once
+
+#include <cstdarg>
+#include <cstdint>
+#include <cstdlib>
+#include <ostream>
+#include <new>
+
+namespace ffi {
+
+enum class KernelError {
+  UnknownError,
+  FFIError,
+#if (defined(DEFINE_DEFAULT_ENGINE) || defined(DEFINE_SYNC_ENGINE))
+  ArrowError,
+#endif
+  EngineDataTypeError,
+  ExtractError,
+  GenericError,
+  IOErrorError,
+#if (defined(DEFINE_DEFAULT_ENGINE) || defined(DEFINE_SYNC_ENGINE))
+  ParquetError,
+#endif
+#if defined(DEFINE_DEFAULT_ENGINE)
+  ObjectStoreError,
+#endif
+#if defined(DEFINE_DEFAULT_ENGINE)
+  ObjectStorePathError,
+#endif
+#if defined(DEFINE_DEFAULT_ENGINE)
+  Reqwest,
+#endif
+  FileNotFoundError,
+  MissingColumnError,
+  UnexpectedColumnTypeError,
+  MissingDataError,
+  MissingVersionError,
+  DeletionVectorError,
+  InvalidUrlError,
+  MalformedJsonError,
+  MissingMetadataError,
+  MissingProtocolError,
+  MissingMetadataAndProtocolError,
+  ParseError,
+  JoinFailureError,
+  Utf8Error,
+  ParseIntError,
+  InvalidColumnMappingMode,
+  InvalidTableLocation,
+  InvalidDecimalError,
+};
+
+#if defined(DEFINE_DEFAULT_ENGINE)
+/// Struct to allow binding to the arrow [C Data
+/// Interface](https://arrow.apache.org/docs/format/CDataInterface.html). This includes the data and
+/// the schema.
+struct ArrowFFIData;
+#endif
+
+struct CStringMap;
+
+/// this struct can be used by an engine to materialize a selection vector
+struct DvInfo;
+
+#if (defined(DEFINE_DEFAULT_ENGINE) || defined(DEFINE_SYNC_ENGINE))
+/// A builder that allows setting options on the `Engine` before actually building it
+struct EngineBuilder;
+#endif
+
+/// an opaque struct that encapsulates data read by an engine. this handle can be passed back into
+/// some kernel calls to operate on the data, or can be converted into the raw data as read by the
+/// [`delta_kernel::Engine`] by calling [`get_raw_engine_data`]
+struct EngineData;
+
+struct KernelExpressionVisitorState;
+
+struct SharedExternEngine;
+
+struct SharedGlobalScanState;
+
+struct SharedScan;
+
+struct SharedScanDataIterator;
+
+struct SharedSnapshot;
+
+/// Represents an owned slice of boolean values allocated by the kernel. Any time the engine
+/// receives a `KernelBoolSlice` as a return value from a kernel method, engine is responsible
+/// to free that slice, by calling [super::drop_bool_slice] exactly once.
+struct KernelBoolSlice {
+  bool *ptr;
+  uintptr_t len;
+};
+
+/// An error that can be returned to the engine. Engines that wish to associate additional
+/// information can define and use any type that is [pointer
+/// interconvertible](https://en.cppreference.com/w/cpp/language/static_cast#pointer-interconvertible)
+/// with this one -- e.g. by subclassing this struct or by embedding this struct as the first member
+/// of a [standard layout](https://en.cppreference.com/w/cpp/language/data_members#Standard-layout)
+/// class.
+struct EngineError {
+  KernelError etype;
+};
+
+/// Semantics: Kernel will always immediately return the leaked engine error to the engine (if it
+/// allocated one at all), and engine is responsible for freeing it.
+template<typename T>
+struct ExternResult {
+  enum class Tag {
+    Ok,
+    Err,
+  };
+
+  struct Ok_Body {
+    T _0;
+  };
+
+  struct Err_Body {
+    EngineError *_0;
+  };
+
+  Tag tag;
+  union {
+    Ok_Body ok;
+    Err_Body err;
+  };
+};
+
+/// A non-owned slice of a UTF8 string, intended for arg-passing between kernel and engine. The
+/// slice is only valid until the function it was passed into returns, and should not be copied.
+///
+/// # Safety
+///
+/// Intentionally not Copy, Clone, Send, nor Sync.
+///
+/// Whoever instantiates the struct must ensure it does not outlive the data it points to. The
+/// compiler cannot help us here, because raw pointers don't have lifetimes. To reduce the risk of
+/// accidental misuse, it is recommended to only instantiate this struct as a function arg, by
+/// converting a string slice `Into` a `KernelStringSlice`. That way, the borrowed reference at call
+/// site protects the `KernelStringSlice` until the function returns. Meanwhile, the callee should
+/// assume that the slice is only valid until the function returns, and must not retain any
+/// references to the slice or its data that could outlive the function call.
+///
+/// ```
+/// # use delta_kernel_ffi::KernelStringSlice;
+/// fn wants_slice(slice: KernelStringSlice) { }
+/// let msg = String::from("hello");
+/// wants_slice(msg.into());
+/// ```
+struct KernelStringSlice {
+  const char *ptr;
+  uintptr_t len;
+};
+
+using AllocateErrorFn = EngineError*(*)(KernelError etype, KernelStringSlice msg);
+
+/// Represents an object that crosses the FFI boundary and which outlives the scope that created
+/// it. It can be passed freely between rust code and external code. The
+///
+/// An accompanying [`HandleDescriptor`] trait defines the behavior of each handle type:
+///
+/// * The true underlying ("target") type the handle represents. For safety reasons, target type
+/// must always be [`Send`].
+///
+/// * Mutable (`Box`-like) vs. shared (`Arc`-like). For safety reasons, the target type of a
+/// shared handle must always be [`Send`]+[`Sync`].
+///
+/// * Sized vs. unsized. Sized types allow handle operations to be implemented more efficiently.
+///
+/// # Validity
+///
+/// A `Handle` is _valid_ if all of the following hold:
+///
+/// * It was created by a call to [`Handle::from`]
+/// * Not yet dropped by a call to [`Handle::drop_handle`]
+/// * Not yet consumed by a call to [`Handle::into_inner`]
+///
+/// Additionally, in keeping with the [`Send`] contract, multi-threaded external code must
+/// enforce mutual exclusion -- no mutable handle should ever be passed to more than one kernel
+/// API call at a time. If thread races are possible, the handle should be protected with a
+/// mutex. Due to Rust [reference
+/// rules](https://doc.rust-lang.org/book/ch04-02-references-and-borrowing.html#the-rules-of-references),
+/// this requirement applies even for API calls that appear to be read-only (because Rust code
+/// always receives the handle as mutable).
+///
+/// NOTE: Because the underlying type is always [`Sync`], multi-threaded external code can
+/// freely access shared (non-mutable) handles.
+///
+template<typename H>
+using Handle = H*;
+
+/// The `EngineSchemaVisitor` defines a visitor system to allow engines to build their own
+/// representation of a schema from a particular schema within kernel.
+///
+/// The model is list based. When the kernel needs a list, it will ask engine to allocate one of a
+/// particular size. Once allocated the engine returns an `id`, which can be any integer identifier
+/// ([`usize`]) the engine wants, and will be passed back to the engine to identify the list in the
+/// future.
+///
+/// Every schema element the kernel visits belongs to some list of "sibling" elements. The schema
+/// itself is a list of schema elements, and every complex type (struct, map, array) contains a list
+/// of "child" elements.
+///  1. Before visiting schema or any complex type, the kernel asks the engine to allocate a list to
+///     hold its children
+///  2. When visiting any schema element, the kernel passes its parent's "child list" as the
+///     "sibling list" the element should be appended to:
+///      - For the top-level schema, visit each top-level column, passing the column's name and type
+///      - For a struct, first visit each struct field, passing the field's name, type, nullability,
+///        and metadata
+///      - For a map, visit the key and value, passing its special name ("map_key" or "map_value"),
+///        type, and value nullability (keys are never nullable)
+///      - For a list, visit the element, passing its special name ("array_element"), type, and
+///        nullability
+///  3. When visiting a complex schema element, the kernel also passes the "child list" containing
+///     that element's (already-visited) children.
+///  4. The [`visit_schema`] method returns the id of the list of top-level columns
+struct EngineSchemaVisitor {
+  /// opaque state pointer
+  void *data;
+  /// Creates a new field list, optionally reserving capacity up front
+  uintptr_t (*make_field_list)(void *data, uintptr_t reserve);
+  /// Indicate that the schema contains a `Struct` type. The top level of a Schema is always a
+  /// `Struct`. The fields of the `Struct` are in the list identified by `child_list_id`.
+  void (*visit_struct)(void *data,
+                       uintptr_t sibling_list_id,
+                       KernelStringSlice name,
+                       uintptr_t child_list_id);
+  /// Indicate that the schema contains an Array type. `child_list_id` will be a _one_ item list
+  /// with the array's element type
+  void (*visit_array)(void *data,
+                      uintptr_t sibling_list_id,
+                      KernelStringSlice name,
+                      bool contains_null,
+                      uintptr_t child_list_id);
+  /// Indicate that the schema contains an Map type. `child_list_id` will be a _two_ item list
+  /// where the first element is the map's key type and the second element is the
+  /// map's value type
+  void (*visit_map)(void *data,
+                    uintptr_t sibling_list_id,
+                    KernelStringSlice name,
+                    bool value_contains_null,
+                    uintptr_t child_list_id);
+  /// visit a `decimal` with the specified `precision` and `scale`
+  void (*visit_decimal)(void *data,
+                        uintptr_t sibling_list_id,
+                        KernelStringSlice name,
+                        uint8_t precision,
+                        uint8_t scale);
+  /// Visit a `string` belonging to the list identified by `sibling_list_id`.
+  void (*visit_string)(void *data, uintptr_t sibling_list_id, KernelStringSlice name);
+  /// Visit a `long` belonging to the list identified by `sibling_list_id`.
+  void (*visit_long)(void *data, uintptr_t sibling_list_id, KernelStringSlice name);
+  /// Visit an `integer` belonging to the list identified by `sibling_list_id`.
+  void (*visit_integer)(void *data, uintptr_t sibling_list_id, KernelStringSlice name);
+  /// Visit a `short` belonging to the list identified by `sibling_list_id`.
+  void (*visit_short)(void *data, uintptr_t sibling_list_id, KernelStringSlice name);
+  /// Visit a `byte` belonging to the list identified by `sibling_list_id`.
+  void (*visit_byte)(void *data, uintptr_t sibling_list_id, KernelStringSlice name);
+  /// Visit a `float` belonging to the list identified by `sibling_list_id`.
+  void (*visit_float)(void *data, uintptr_t sibling_list_id, KernelStringSlice name);
+  /// Visit a `double` belonging to the list identified by `sibling_list_id`.
+  void (*visit_double)(void *data, uintptr_t sibling_list_id, KernelStringSlice name);
+  /// Visit a `boolean` belonging to the list identified by `sibling_list_id`.
+  void (*visit_boolean)(void *data, uintptr_t sibling_list_id, KernelStringSlice name);
+  /// Visit `binary` belonging to the list identified by `sibling_list_id`.
+  void (*visit_binary)(void *data, uintptr_t sibling_list_id, KernelStringSlice name);
+  /// Visit a `date` belonging to the list identified by `sibling_list_id`.
+  void (*visit_date)(void *data, uintptr_t sibling_list_id, KernelStringSlice name);
+  /// Visit a `timestamp` belonging to the list identified by `sibling_list_id`.
+  void (*visit_timestamp)(void *data, uintptr_t sibling_list_id, KernelStringSlice name);
+  /// Visit a `timestamp` with no timezone belonging to the list identified by `sibling_list_id`.
+  void (*visit_timestamp_ntz)(void *data, uintptr_t sibling_list_id, KernelStringSlice name);
+};
+
+/// Model iterators. This allows an engine to specify iteration however it likes, and we simply wrap
+/// the engine functions. The engine retains ownership of the iterator.
+struct EngineIterator {
+  void *data;
+  /// A function that should advance the iterator and return the next time from the data
+  /// If the iterator is complete, it should return null. It should be safe to
+  /// call `get_next()` multiple times if it returns null.
+  const void *(*get_next)(void *data);
+};
+
+/// A predicate that can be used to skip data when scanning.
+///
+/// When invoking [`scan::scan`], The engine provides a pointer to the (engine's native) predicate,
+/// along with a visitor function that can be invoked to recursively visit the predicate. This
+/// engine state must be valid until the call to `scan::scan` returns. Inside that method, the
+/// kernel allocates visitor state, which becomes the second argument to the predicate visitor
+/// invocation along with the engine-provided predicate pointer. The visitor state is valid for the
+/// lifetime of the predicate visitor invocation. Thanks to this double indirection, engine and
+/// kernel each retain ownership of their respective objects, with no need to coordinate memory
+/// lifetimes with the other.
+struct EnginePredicate {
+  void *predicate;
+  uintptr_t (*visitor)(void *predicate, KernelExpressionVisitorState *state);
+};
+
+using NullableCvoid = void*;
+
+/// Allow engines to allocate strings of their own type. the contract of calling a passed allocate
+/// function is that `kernel_str` is _only_ valid until the return from this function
+using AllocateStringFn = NullableCvoid(*)(KernelStringSlice kernel_str);
+
+using CScanCallback = void(*)(NullableCvoid engine_context,
+                              KernelStringSlice path,
+                              int64_t size,
+                              const DvInfo *dv_info,
+                              const CStringMap *partition_map);
+
+// This trickery is from https://github.com/mozilla/cbindgen/issues/402#issuecomment-578680163
+struct im_an_unused_struct_that_tricks_msvc_into_compilation {
+    ExternResult<KernelBoolSlice> field;
+    ExternResult<bool> field2;
+    ExternResult<EngineBuilder*> field3;
+    ExternResult<Handle<SharedExternEngine>> field4;
+    ExternResult<Handle<SharedSnapshot>> field5;
+    ExternResult<uintptr_t> field6;
+    ExternResult<ArrowFFIData*> field7;
+    ExternResult<Handle<SharedScanDataIterator>> field8;
+    ExternResult<Handle<SharedScan>> field9;
+    ExternResult<Handle<SharedScan>> field10;
+};
+
+
+extern "C" {
+
+/// # Safety
+///
+/// Caller is responsible for passing a valid handle.
+void drop_bool_slice(KernelBoolSlice slice);
+
+#if defined(DEFINE_DEFAULT_ENGINE)
+/// Get a "builder" that can be used to construct an engine. The function
+/// [`set_builder_option`] can be used to set options on the builder prior to constructing the
+/// actual engine
+///
+/// # Safety
+/// Caller is responsible for passing a valid path pointer.
+ExternResult<EngineBuilder*> get_engine_builder(KernelStringSlice path,
+                                                AllocateErrorFn allocate_error);
+#endif
+
+#if defined(DEFINE_DEFAULT_ENGINE)
+/// Set an option on the builder
+///
+/// # Safety
+///
+/// Caller must pass a valid EngineBuilder pointer, and valid slices for key and value
+void set_builder_option(EngineBuilder *builder, KernelStringSlice key, KernelStringSlice value);
+#endif
+
+#if defined(DEFINE_DEFAULT_ENGINE)
+/// Consume the builder and return an engine. After calling, the passed pointer is _no
+/// longer valid_.
+///
+/// # Safety
+///
+/// Caller is responsible to pass a valid EngineBuilder pointer, and to not use it again afterwards
+ExternResult<Handle<SharedExternEngine>> builder_build(EngineBuilder *builder);
+#endif
+
+#if defined(DEFINE_DEFAULT_ENGINE)
+/// # Safety
+///
+/// Caller is responsible for passing a valid path pointer.
+ExternResult<Handle<SharedExternEngine>> get_default_engine(KernelStringSlice path,
+                                                            AllocateErrorFn allocate_error);
+#endif
+
+/// # Safety
+///
+/// Caller is responsible for passing a valid handle.
+void drop_engine(Handle<SharedExternEngine> engine);
+
+/// Get the latest snapshot from the specified table
+///
+/// # Safety
+///
+/// Caller is responsible for passing valid handles and path pointer.
+ExternResult<Handle<SharedSnapshot>> snapshot(KernelStringSlice path,
+                                              Handle<SharedExternEngine> engine);
+
+/// # Safety
+///
+/// Caller is responsible for passing a valid handle.
+void drop_snapshot(Handle<SharedSnapshot> snapshot);
+
+/// Get the version of the specified snapshot
+///
+/// # Safety
+///
+/// Caller is responsible for passing a valid handle.
+uint64_t version(Handle<SharedSnapshot> snapshot);
+
+/// Visit the schema of the passed `SnapshotHandle`, using the provided `visitor`. See the
+/// documentation of [`EngineSchemaVisitor`] for a description of how this visitor works.
+///
+/// This method returns the id of the list allocated to hold the top level schema columns.
+///
+/// # Safety
+///
+/// Caller is responsible for passing a valid snapshot handle and schema visitor.
+uintptr_t visit_schema(Handle<SharedSnapshot> snapshot, EngineSchemaVisitor *visitor);
+
+uintptr_t visit_expression_and(KernelExpressionVisitorState *state, EngineIterator *children);
+
+uintptr_t visit_expression_lt(KernelExpressionVisitorState *state, uintptr_t a, uintptr_t b);
+
+uintptr_t visit_expression_le(KernelExpressionVisitorState *state, uintptr_t a, uintptr_t b);
+
+uintptr_t visit_expression_gt(KernelExpressionVisitorState *state, uintptr_t a, uintptr_t b);
+
+uintptr_t visit_expression_ge(KernelExpressionVisitorState *state, uintptr_t a, uintptr_t b);
+
+uintptr_t visit_expression_eq(KernelExpressionVisitorState *state, uintptr_t a, uintptr_t b);
+
+/// # Safety
+/// The string slice must be valid
+ExternResult<uintptr_t> visit_expression_column(KernelExpressionVisitorState *state,
+                                                KernelStringSlice name,
+                                                AllocateErrorFn allocate_error);
+
+/// # Safety
+/// The string slice must be valid
+ExternResult<uintptr_t> visit_expression_literal_string(KernelExpressionVisitorState *state,
+                                                        KernelStringSlice value,
+                                                        AllocateErrorFn allocate_error);
+
+uintptr_t visit_expression_literal_long(KernelExpressionVisitorState *state, int64_t value);
+
+/// Allow an engine to "unwrap" an [`EngineData`] into the raw pointer for the case it wants
+/// to use its own engine data format
+///
+/// # Safety
+///
+/// `data_handle` must be a valid pointer to a kernel allocated `EngineData`. The Engine must
+/// ensure the handle outlives the returned pointer.
+void *get_raw_engine_data(Handle<EngineData> data);
+
+#if defined(DEFINE_DEFAULT_ENGINE)
+/// Get an [`ArrowFFIData`] to allow binding to the arrow [C Data
+/// Interface](https://arrow.apache.org/docs/format/CDataInterface.html). This includes the data and
+/// the schema.
+///
+/// # Safety
+/// data_handle must be a valid EngineData as read by the
+/// [`delta_kernel::engine::default::DefaultEngine`] obtained from `get_default_engine`.
+ExternResult<ArrowFFIData*> get_raw_arrow_data(Handle<EngineData> data,
+                                               Handle<SharedExternEngine> engine);
+#endif
+
+/// Drops a scan.
+/// # Safety
+/// Caller is responsible for passing a [valid][Handle#Validity] scan handle.
+void drop_scan(Handle<SharedScan> scan);
+
+/// Get a [`Scan`] over the table specified by the passed snapshot.
+/// # Safety
+///
+/// Caller is responsible for passing a valid snapshot pointer, and engine pointer
+ExternResult<Handle<SharedScan>> scan(Handle<SharedSnapshot> snapshot,
+                                      Handle<SharedExternEngine> engine,
+                                      EnginePredicate *predicate);
+
+/// Get the global state for a scan. See the docs for [`delta_kernel::scan::state::GlobalScanState`]
+/// for more information.
+///
+/// # Safety
+/// Engine is responsible for providing a valid scan pointer
+Handle<SharedGlobalScanState> get_global_scan_state(Handle<SharedScan> scan);
+
+/// # Safety
+///
+/// Caller is responsible for passing a valid global scan pointer.
+void drop_global_scan_state(Handle<SharedGlobalScanState> state);
+
+/// Get an iterator over the data needed to perform a scan. This will return a
+/// [`KernelScanDataIterator`] which can be passed to [`kernel_scan_data_next`] to get the actual
+/// data in the iterator.
+///
+/// # Safety
+///
+/// Engine is responsible for passing a valid [`SharedExternEngine`] and [`SharedScan`]
+ExternResult<Handle<SharedScanDataIterator>> kernel_scan_data_init(Handle<SharedExternEngine> engine,
+                                                                   Handle<SharedScan> scan);
+
+/// # Safety
+///
+/// The iterator must be valid (returned by [kernel_scan_data_init]) and not yet freed by
+/// [kernel_scan_data_free]. The visitor function pointer must be non-null.
+ExternResult<bool> kernel_scan_data_next(Handle<SharedScanDataIterator> data,
+                                         NullableCvoid engine_context,
+                                         void (*engine_visitor)(NullableCvoid engine_context,
+                                                                Handle<EngineData> engine_data,
+                                                                KernelBoolSlice selection_vector));
+
+/// # Safety
+///
+/// Caller is responsible for (at most once) passing a valid pointer returned by a call to
+/// [`kernel_scan_data_init`].
+void kernel_scan_data_free(Handle<SharedScanDataIterator> data);
+
+/// allow probing into a CStringMap. If the specified key is in the map, kernel will call
+/// allocate_fn with the value associated with the key and return the value returned from that
+/// function. If the key is not in the map, this will return NULL
+///
+/// # Safety
+///
+/// The engine is responsible for providing a valid [`CStringMap`] pointer and [`KernelStringSlice`]
+NullableCvoid get_from_map(const CStringMap *map,
+                           KernelStringSlice key,
+                           AllocateStringFn allocate_fn);
+
+/// Get a selection vector out of a [`DvInfo`] struct
+///
+/// # Safety
+/// Engine is responsible for providing valid pointers for each argument
+ExternResult<KernelBoolSlice> selection_vector_from_dv(const DvInfo *dv_info,
+                                                       Handle<SharedExternEngine> engine,
+                                                       Handle<SharedGlobalScanState> state);
+
+/// Shim for ffi to call visit_scan_data. This will generally be called when iterating through scan
+/// data which provides the data handle and selection vector as each element in the iterator.
+///
+/// # Safety
+/// engine is responsbile for passing a valid [`EngineData`] and selection vector.
+void visit_scan_data(Handle<EngineData> data,
+                     KernelBoolSlice selection_vec,
+                     NullableCvoid engine_context,
+                     CScanCallback callback);
+
+} // extern "C"
+
+
+} // namespace ffi

From 6c9dc2163954cfa689fdd654533078352a684f11 Mon Sep 17 00:00:00 2001
From: Sam Ansmink <samansmink@hotmail.com>
Date: Wed, 19 Jun 2024 23:20:04 +0200
Subject: [PATCH 12/38] update inlined ffi header

---
 src/include/delta_kernel_ffi.hpp | 30 +++++++++++++++++++++++++++---
 1 file changed, 27 insertions(+), 3 deletions(-)

diff --git a/src/include/delta_kernel_ffi.hpp b/src/include/delta_kernel_ffi.hpp
index 813d31e..8c56f74 100644
--- a/src/include/delta_kernel_ffi.hpp
+++ b/src/include/delta_kernel_ffi.hpp
@@ -48,6 +48,7 @@ enum class KernelError {
   InvalidColumnMappingMode,
   InvalidTableLocation,
   InvalidDecimalError,
+  InvalidStructData,
 };
 
 #if defined(DEFINE_DEFAULT_ENGINE)
@@ -62,7 +63,7 @@ struct CStringMap;
 /// this struct can be used by an engine to materialize a selection vector
 struct DvInfo;
 
-#if (defined(DEFINE_DEFAULT_ENGINE) || defined(DEFINE_SYNC_ENGINE))
+#if defined(DEFINE_DEFAULT_ENGINE)
 /// A builder that allows setting options on the `Engine` before actually building it
 struct EngineBuilder;
 #endif
@@ -352,9 +353,10 @@ void set_builder_option(EngineBuilder *builder, KernelStringSlice key, KernelStr
 #endif
 
 #if defined(DEFINE_DEFAULT_ENGINE)
-/// Consume the builder and return an engine. After calling, the passed pointer is _no
+/// Consume the builder and return a `default` engine. After calling, the passed pointer is _no
 /// longer valid_.
 ///
+///
 /// # Safety
 ///
 /// Caller is responsible to pass a valid EngineBuilder pointer, and to not use it again afterwards
@@ -369,6 +371,13 @@ ExternResult<Handle<SharedExternEngine>> get_default_engine(KernelStringSlice pa
                                                             AllocateErrorFn allocate_error);
 #endif
 
+#if defined(DEFINE_SYNC_ENGINE)
+/// # Safety
+///
+/// Caller is responsible for passing a valid path pointer.
+ExternResult<Handle<SharedExternEngine>> get_sync_engine(AllocateErrorFn allocate_error);
+#endif
+
 /// # Safety
 ///
 /// Caller is responsible for passing a valid handle.
@@ -422,14 +431,30 @@ ExternResult<uintptr_t> visit_expression_column(KernelExpressionVisitorState *st
                                                 KernelStringSlice name,
                                                 AllocateErrorFn allocate_error);
 
+uintptr_t visit_expression_not(KernelExpressionVisitorState *state, uintptr_t inner_expr);
+
+uintptr_t visit_expression_is_null(KernelExpressionVisitorState *state, uintptr_t inner_expr);
+
 /// # Safety
 /// The string slice must be valid
 ExternResult<uintptr_t> visit_expression_literal_string(KernelExpressionVisitorState *state,
                                                         KernelStringSlice value,
                                                         AllocateErrorFn allocate_error);
 
+uintptr_t visit_expression_literal_int(KernelExpressionVisitorState *state, int32_t value);
+
 uintptr_t visit_expression_literal_long(KernelExpressionVisitorState *state, int64_t value);
 
+uintptr_t visit_expression_literal_short(KernelExpressionVisitorState *state, int16_t value);
+
+uintptr_t visit_expression_literal_byte(KernelExpressionVisitorState *state, int8_t value);
+
+uintptr_t visit_expression_literal_float(KernelExpressionVisitorState *state, float value);
+
+uintptr_t visit_expression_literal_double(KernelExpressionVisitorState *state, double value);
+
+uintptr_t visit_expression_literal_bool(KernelExpressionVisitorState *state, bool value);
+
 /// Allow an engine to "unwrap" an [`EngineData`] into the raw pointer for the case it wants
 /// to use its own engine data format
 ///
@@ -533,5 +558,4 @@ void visit_scan_data(Handle<EngineData> data,
 
 } // extern "C"
 
-
 } // namespace ffi

From 76f832b938e4cb5e8eec2a66772772c18ab087bc Mon Sep 17 00:00:00 2001
From: Sam Ansmink <samansmink@hotmail.com>
Date: Wed, 19 Jun 2024 23:44:16 +0200
Subject: [PATCH 13/38] remove accidentally duplicated condition

---
 CMakeLists.txt | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index c3d259c..54b84b4 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -48,12 +48,6 @@ if("${OS_NAME}" STREQUAL "linux")
         set(RUST_ENV_VARS ${RUST_ENV_VARS} OPENSSL_LIB_DIR=${CMAKE_BINARY_DIR}/vcpkg_installed/${VCPKG_TARGET_TRIPLET}/lib)
         set(RUST_ENV_VARS ${RUST_ENV_VARS} OPENSSL_INCLUDE_DIR=${CMAKE_BINARY_DIR}/vcpkg_installed/${VCPKG_TARGET_TRIPLET}/include)
         set(RUST_PLATFORM_TARGET "aarch64-unknown-linux-gnu")
-    elseif("${CMAKE_CXX_COMPILER}" MATCHES "aarch64")
-        set(RUST_ENV_VARS ${RUST_ENV_VARS} CFLAGS_aarch64_unknown_linux_gnu=--sysroot=/usr/aarch64-linux-gnu)
-        set(RUST_ENV_VARS ${RUST_ENV_VARS} CARGO_TARGET_AARCH64_UNKNOWN_LINUX_GNU_LINKER=aarch64-linux-gnu-gcc)
-        set(RUST_ENV_VARS ${RUST_ENV_VARS} OPENSSL_LIB_DIR=${CMAKE_BINARY_DIR}/vcpkg_installed/${VCPKG_TARGET_TRIPLET}/lib)
-        set(RUST_ENV_VARS ${RUST_ENV_VARS} OPENSSL_INCLUDE_DIR=${CMAKE_BINARY_DIR}/vcpkg_installed/${VCPKG_TARGET_TRIPLET}/include)
-        set(RUST_PLATFORM_TARGET "aarch64-unknown-linux-gnu")
     else()
         set(RUST_PLATFORM_TARGET "x86_64-unknown-linux-gnu")
     endif()

From 7229c0ceb616e14688c70f9de1a7c1faa24fb5fd Mon Sep 17 00:00:00 2001
From: Sam Ansmink <samansmink@hotmail.com>
Date: Thu, 20 Jun 2024 10:26:03 +0200
Subject: [PATCH 14/38] fix another small merge issue

---
 CMakeLists.txt | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 54b84b4..390f876 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -139,15 +139,15 @@ add_compile_definitions(DEFINE_DEFAULT_ENGINE)
 
 # Link delta-kernal-rs to static lib
 target_link_libraries(${EXTENSION_NAME}
-        debug "${CMAKE_BINARY_DIR}/rust/src/delta_kernel/target/${RUST_PLATFORM_TARGET}/debug/libdelta_kernel_ffi.a"
-        optimized "${CMAKE_BINARY_DIR}/rust/src/delta_kernel/target/${RUST_PLATFORM_TARGET}/release/libdelta_kernel_ffi.a"
+        debug ${DELTA_KERNEL_LIBPATH_DEBUG}
+        optimized ${DELTA_KERNEL_LIBPATH_RELEASE}
         ${PLATFORM_LIBS})
 add_dependencies(${EXTENSION_NAME} delta_kernel)
 
 # Link delta-kernal-rs to dynamic lib
 target_link_libraries(${LOADABLE_EXTENSION_NAME}
-        debug "${CMAKE_BINARY_DIR}/rust/src/delta_kernel/target/${RUST_PLATFORM_TARGET}/debug/libdelta_kernel_ffi.a"
-        optimized "${CMAKE_BINARY_DIR}/rust/src/delta_kernel/target/${RUST_PLATFORM_TARGET}/release/libdelta_kernel_ffi.a"
+        debug ${DELTA_KERNEL_LIBPATH_DEBUG}
+        optimized ${DELTA_KERNEL_LIBPATH_RELEASE}
         ${PLATFORM_LIBS})
 add_dependencies(${LOADABLE_EXTENSION_NAME} delta_kernel)
 

From 5cb6f1dc87317b5c87bb9177c99a75c0c94df8ec Mon Sep 17 00:00:00 2001
From: Sam Ansmink <samansmink@hotmail.com>
Date: Thu, 20 Jun 2024 16:29:03 +0200
Subject: [PATCH 15/38] bump kernel

---
 CMakeLists.txt                   | 4 ++--
 src/include/delta_kernel_ffi.hpp | 8 ++++----
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 390f876..1f94ea9 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -96,10 +96,10 @@ set(DELTA_KERNEL_FFI_HEADER_CXX "${CMAKE_BINARY_DIR}/rust/src/delta_kernel/targe
 # Add rust_example as a CMake target
 ExternalProject_Add(
         ${KERNEL_NAME}
-        GIT_REPOSITORY "https://github.com/nicklan/delta-kernel-rs"
+        GIT_REPOSITORY "https://github.com/delta-incubator/delta-kernel-rs"
         # WARNING: the FFI headers are currently pinned due to the C linkage issue of the c++ headers. Currently, when bumping
         # the kernel version, the produced header in ./src/include/delta_kernel_ffi.hpp should be also bumped, applying the fix
-        GIT_TAG 181232a45562ca78be763c2f5fb46b88a2463b5c
+        GIT_TAG 6f95fd3bfaaa57698d72f539f8c6a0475a52c4e7
         # Prints the env variables passed to the cargo build to the terminal, useful in debugging because passing them
         # through CMake is an error-prone mess
         CONFIGURE_COMMAND ${CMAKE_COMMAND} -E env ${RUST_UNSET_ENV_VARS} ${RUST_ENV_VARS} env
diff --git a/src/include/delta_kernel_ffi.hpp b/src/include/delta_kernel_ffi.hpp
index 8c56f74..12833be 100644
--- a/src/include/delta_kernel_ffi.hpp
+++ b/src/include/delta_kernel_ffi.hpp
@@ -28,7 +28,7 @@ enum class KernelError {
   ObjectStorePathError,
 #endif
 #if defined(DEFINE_DEFAULT_ENGINE)
-  Reqwest,
+  ReqwestError,
 #endif
   FileNotFoundError,
   MissingColumnError,
@@ -45,10 +45,10 @@ enum class KernelError {
   JoinFailureError,
   Utf8Error,
   ParseIntError,
-  InvalidColumnMappingMode,
-  InvalidTableLocation,
+  InvalidColumnMappingModeError,
+  InvalidTableLocationError,
   InvalidDecimalError,
-  InvalidStructData,
+  InvalidStructDataError,
 };
 
 #if defined(DEFINE_DEFAULT_ENGINE)

From c1f44a31c092ef5907f336dec0d9ef6ca3a983b9 Mon Sep 17 00:00:00 2001
From: Sam Ansmink <samansmink@hotmail.com>
Date: Thu, 20 Jun 2024 18:16:45 +0200
Subject: [PATCH 16/38] apply workaround for when partition values are NULL

---
 src/functions/delta_scan.cpp                  | 68 ++++++++++++++++++-
 test/sql/dat/all.test                         | 14 ++--
 .../delta_kernel_rs/basic_partitioned.test    | 12 ++--
 3 files changed, 82 insertions(+), 12 deletions(-)

diff --git a/src/functions/delta_scan.cpp b/src/functions/delta_scan.cpp
index ed968a2..1065a7e 100644
--- a/src/functions/delta_scan.cpp
+++ b/src/functions/delta_scan.cpp
@@ -467,13 +467,79 @@ unique_ptr<MultiFileReaderGlobalState> DeltaMultiFileReader::InitializeGlobalSta
     return std::move(res);
 }
 
+// This code is duplicated from MultiFileReader::CreateNameMapping the difference is that for columns that are not found
+// in the parquet files, we just add null constant columns
+static void CustomMulfiFileNameMapping(const string &file_name, const vector<LogicalType> &local_types,
+                                        const vector<string> &local_names, const vector<LogicalType> &global_types,
+                                        const vector<string> &global_names, const vector<column_t> &global_column_ids,
+                                        MultiFileReaderData &reader_data, const string &initial_file,
+                                        optional_ptr<MultiFileReaderGlobalState> global_state) {
+    D_ASSERT(global_types.size() == global_names.size());
+	D_ASSERT(local_types.size() == local_names.size());
+	// we have expected types: create a map of name -> column index
+	case_insensitive_map_t<idx_t> name_map;
+	for (idx_t col_idx = 0; col_idx < local_names.size(); col_idx++) {
+		name_map[local_names[col_idx]] = col_idx;
+	}
+	for (idx_t i = 0; i < global_column_ids.size(); i++) {
+		// check if this is a constant column
+		bool constant = false;
+		for (auto &entry : reader_data.constant_map) {
+			if (entry.column_id == i) {
+				constant = true;
+				break;
+			}
+		}
+		if (constant) {
+			// this column is constant for this file
+			continue;
+		}
+		// not constant - look up the column in the name map
+		auto global_id = global_column_ids[i];
+		if (global_id >= global_types.size()) {
+			throw InternalException(
+			    "MultiFileReader::CreatePositionalMapping - global_id is out of range in global_types for this file");
+		}
+		auto &global_name = global_names[global_id];
+		auto entry = name_map.find(global_name);
+		if (entry == name_map.end()) {
+			string candidate_names;
+			for (auto &local_name : local_names) {
+				if (!candidate_names.empty()) {
+					candidate_names += ", ";
+				}
+				candidate_names += local_name;
+			}
+			// FIXME: this override is pretty hacky: for missing columns we just insert NULL constants
+		    auto &global_type = global_types[global_id];
+		    Value val (global_type);
+		    reader_data.constant_map.push_back({i, val});
+		    continue;
+		}
+		// we found the column in the local file - check if the types are the same
+		auto local_id = entry->second;
+		D_ASSERT(global_id < global_types.size());
+		D_ASSERT(local_id < local_types.size());
+		auto &global_type = global_types[global_id];
+		auto &local_type = local_types[local_id];
+		if (global_type != local_type) {
+			reader_data.cast_map[local_id] = global_type;
+		}
+		// the types are the same - create the mapping
+		reader_data.column_mapping.push_back(i);
+		reader_data.column_ids.push_back(local_id);
+	}
+
+	reader_data.empty_columns = reader_data.column_ids.empty();
+}
+
 void DeltaMultiFileReader::CreateNameMapping(const string &file_name, const vector<LogicalType> &local_types,
                                         const vector<string> &local_names, const vector<LogicalType> &global_types,
                                         const vector<string> &global_names, const vector<column_t> &global_column_ids,
                                         MultiFileReaderData &reader_data, const string &initial_file,
                                         optional_ptr<MultiFileReaderGlobalState> global_state) {
     // First call the base implementation to do most mapping
-    MultiFileReader::CreateNameMapping(file_name, local_types, local_names, global_types, global_names, global_column_ids, reader_data, initial_file, global_state);
+    CustomMulfiFileNameMapping(file_name, local_types, local_names, global_types, global_names, global_column_ids, reader_data, initial_file, global_state);
 
     // Then we handle delta specific mapping
     D_ASSERT(global_state);
diff --git a/test/sql/dat/all.test b/test/sql/dat/all.test
index 6afeb84..b3ba2d8 100644
--- a/test/sql/dat/all.test
+++ b/test/sql/dat/all.test
@@ -54,12 +54,6 @@ SELECT *
 FROM parquet_scan('${DAT_PATH}/out/reader_tests/generated/with_schema_change/expected/latest/**/*.parquet')
 ----
 
-
-### FAILING DAT TESTS
-
-# TODO fix all of these
-mode skip
-
 # basic_partitioned
 query I rowsort basic_partitioned
 SELECT *
@@ -71,6 +65,14 @@ SELECT *
 FROM parquet_scan('${DAT_PATH}/out/reader_tests/generated/basic_partitioned/expected/latest/**/*.parquet')
 ----
 
+### FAILING DAT TESTS
+
+# TODO fix all of these
+mode skip
+
+# Fetches path containing`letter=%252F%252520%2525f` from kernel
+# Should be letter= %2F%2520%25f, which means its doubly url encoded
+
 # multi_partitioned
 query I rowsort multi_partitioned
 SELECT *
diff --git a/test/sql/delta_kernel_rs/basic_partitioned.test b/test/sql/delta_kernel_rs/basic_partitioned.test
index 79804d1..d66d012 100644
--- a/test/sql/delta_kernel_rs/basic_partitioned.test
+++ b/test/sql/delta_kernel_rs/basic_partitioned.test
@@ -8,10 +8,12 @@ require delta
 
 require-env DELTA_KERNEL_TESTS_PATH
 
-# FIXME: this fails due some weird error
-mode skip
-
-statement error
+query III
 SELECT * FROM delta_scan('${DELTA_KERNEL_TESTS_PATH}/basic_partitioned')
 ----
-Failed to read file "/Users/sam/Development/delta-kernel-testing/delta-kernel-rs/kernel/tests/data/basic_partitioned/letter=__HIVE_DEFAULT_PARTITION__
+NULL	6	6.6
+a	4	4.4
+e	5	5.5
+a	1	1.1
+b	2	2.2
+c	3	3.3

From 90455e5f55f4caf682a81156c7ccdae2e47f7471 Mon Sep 17 00:00:00 2001
From: Norman Foerster <norman.foerster@fmc-data-solutions.com>
Date: Tue, 25 Jun 2024 10:28:47 +0200
Subject: [PATCH 17/38] working azure setting

---
 .gitignore                          |  4 ++
 extension_config.cmake              |  3 +
 src/functions/delta_scan.cpp        | 87 ++++++++++++++++++++---------
 test/sql/generated/azure.emulator.x | 25 +++++++++
 vcpkg.json                          |  3 +
 5 files changed, 96 insertions(+), 26 deletions(-)
 create mode 100644 test/sql/generated/azure.emulator.x

diff --git a/.gitignore b/.gitignore
index 2cf38b5..31bc287 100644
--- a/.gitignore
+++ b/.gitignore
@@ -7,3 +7,7 @@ testext
 test/python/__pycache__/
 .Rhistory
 data/generated
+__azurite*__.json
+__blobstorage__
+.venv
+.vscode
\ No newline at end of file
diff --git a/extension_config.cmake b/extension_config.cmake
index 46e7a27..6cfa12c 100644
--- a/extension_config.cmake
+++ b/extension_config.cmake
@@ -9,6 +9,9 @@ duckdb_extension_load(delta
 # Build the httpfs extension to test with s3/http
 duckdb_extension_load(httpfs)
 
+# Build the azure extension to test with azure
+duckdb_extension_load(azure)
+
 # Build the tpch and tpcds extension for testing/benchmarking
 duckdb_extension_load(tpch)
 duckdb_extension_load(tpcds)
diff --git a/src/functions/delta_scan.cpp b/src/functions/delta_scan.cpp
index 05b958e..fb3643c 100644
--- a/src/functions/delta_scan.cpp
+++ b/src/functions/delta_scan.cpp
@@ -18,6 +18,7 @@
 
 #include <string>
 #include <numeric>
+#include <regex>
 
 namespace duckdb {
 
@@ -65,11 +66,23 @@ static void visit_callback(ffi::NullableCvoid engine_context, struct ffi::Kernel
     ffi::visit_scan_data(engine_data, selection_vec, engine_context, visit_callback);
 }
 
+std::string parseFromConnectionString(const std::string& connectionString, const std::string& key) {
+    std::regex pattern(key + "=([^;]+);");
+    std::smatch matches;
+    if (std::regex_search(connectionString, matches, pattern) && matches.size() > 1) {
+        // The second match ([1]) contains the access key
+        return matches[1].str();
+    } else {
+        // If no access key is found, return an empty string or handle as needed
+        return "";
+    }
+}
+
 static ffi::EngineBuilder* CreateBuilder(ClientContext &context, const string &path) {
     ffi::EngineBuilder* builder;
 
     // For "regular" paths we early out with the default builder config
-    if (!StringUtil::StartsWith(path, "s3://") && !StringUtil::StartsWith(path, "azure://") && !StringUtil::StartsWith(path, "az://") && !StringUtil::StartsWith(path, "abfss://")) {
+    if (!StringUtil::StartsWith(path, "s3://") && !StringUtil::StartsWith(path, "azure://") && !StringUtil::StartsWith(path, "az://") && !StringUtil::StartsWith(path, "abfs://") && !StringUtil::StartsWith(path, "abfss://")) {
         auto interface_builder_res = ffi::get_engine_builder(KernelUtils::ToDeltaString(path), DuckDBEngineError::AllocateError);
         return KernelUtils::UnpackResult(interface_builder_res, "get_engine_interface_builder for path " + path);
     }
@@ -87,7 +100,7 @@ static ffi::EngineBuilder* CreateBuilder(ClientContext &context, const string &p
         bucket = path.substr(5, end_of_container-5);
         path_in_bucket = path.substr(end_of_container);
         secret_type = "s3";
-    } else if (StringUtil::StartsWith(path, "azure://")) {
+    } else if ((StringUtil::StartsWith(path, "azure://")) || (StringUtil::StartsWith(path, "abfss://"))) {
         auto end_of_container = path.find('/',8);
 
         if(end_of_container == string::npos) {
@@ -105,8 +118,8 @@ static ffi::EngineBuilder* CreateBuilder(ClientContext &context, const string &p
         bucket = path.substr(5, end_of_container-5);
         path_in_bucket = path.substr(end_of_container);
         secret_type = "azure";
-    } else if (StringUtil::StartsWith(path, "abfss://")) {
-        auto end_of_container = path.find('/',8);
+    } else if (StringUtil::StartsWith(path, "abfs://")) {
+        auto end_of_container = path.find('/',7);
 
         if(end_of_container == string::npos) {
             throw IOException("Invalid azure url passed to delta scan: %s", path);
@@ -157,10 +170,12 @@ static ffi::EngineBuilder* CreateBuilder(ClientContext &context, const string &p
 
     } else if (secret_type == "azure") {
         
+        // azure seems to be super complicated as we need to cover duckdb azure plugin and delta RS builder
+        // and both require different settings
+
         auto connection_string = kv_secret.TryGetValue("connection_string").ToString();
         auto account_name = kv_secret.TryGetValue("account_name").ToString();
         auto endpoint = kv_secret.TryGetValue("endpoint").ToString();
-        auto credential_chain = kv_secret.TryGetValue("credential_chain").ToString();
         auto client_id = kv_secret.TryGetValue("client_id").ToString();
         auto client_secret = kv_secret.TryGetValue("client_secret").ToString();
         auto tenant_id = kv_secret.TryGetValue("tenant_id").ToString();
@@ -168,40 +183,60 @@ static ffi::EngineBuilder* CreateBuilder(ClientContext &context, const string &p
         auto http_proxy = kv_secret.TryGetValue("http_proxy").ToString();
         auto proxy_user_name = kv_secret.TryGetValue("proxy_user_name").ToString();
         auto proxy_password = kv_secret.TryGetValue("proxy_password").ToString();
+        auto chain = kv_secret.TryGetValue("chain").ToString();
+
+        if (account_name == "devstoreaccount1" || connection_string.find("devstoreaccount1") != string::npos) {
+            ffi::set_builder_option(builder, KernelUtils::ToDeltaString("use_emulator"), KernelUtils::ToDeltaString("true")); //needed for delta RS builder
+        }
 
-        if (!connection_string.empty()) {
-            ffi::set_builder_option(builder, KernelUtils::ToDeltaString("azure_storage_connection_string"), KernelUtils::ToDeltaString(connection_string));
+        if (!connection_string.empty() && connection_string != "NULL") {
+            ffi::set_builder_option(builder, KernelUtils::ToDeltaString("azure_storage_connection_string"), KernelUtils::ToDeltaString(connection_string)); //needed for duckdb azure plugin
+            ffi::set_builder_option(builder, KernelUtils::ToDeltaString("connection_string"), KernelUtils::ToDeltaString(connection_string)); //needed for duckdb azure plugin
+            account_name = parseFromConnectionString(connection_string, "AccountName");
+            ffi::set_builder_option(builder, KernelUtils::ToDeltaString("access_key"), KernelUtils::ToDeltaString(parseFromConnectionString(connection_string, "AccountKey"))); //needed for delta RS builder
         }
-        if (!account_name.empty()) {
-            ffi::set_builder_option(builder, KernelUtils::ToDeltaString("azure_storage_account_name"), KernelUtils::ToDeltaString(account_name));
+        if (!account_name.empty() && account_name != "NULL") {
+            ffi::set_builder_option(builder, KernelUtils::ToDeltaString("azure_account_name"), KernelUtils::ToDeltaString(account_name)); //needed for duckdb azure plugin
+            ffi::set_builder_option(builder, KernelUtils::ToDeltaString("account_name"), KernelUtils::ToDeltaString(account_name)); //needed for delta RS builder
         }
-        if (!endpoint.empty()) {
+        if (!endpoint.empty() && endpoint != "NULL") {
             ffi::set_builder_option(builder, KernelUtils::ToDeltaString("azure_endpoint"), KernelUtils::ToDeltaString(endpoint));
+        } else {
+            ffi::set_builder_option(builder, KernelUtils::ToDeltaString("azure_endpoint"), KernelUtils::ToDeltaString("https://" + account_name + ".blob.core.windows.net/")); //needed? Does that work with dfs files system?
         }
-        if (!credential_chain.empty()) {
-            ffi::set_builder_option(builder, KernelUtils::ToDeltaString("azure_credential_chain"), KernelUtils::ToDeltaString(credential_chain));
+        if (!chain.empty() && chain != "NULL") {
+            ffi::set_builder_option(builder, KernelUtils::ToDeltaString("provider"), KernelUtils::ToDeltaString("credential_chain")); //needed for duckdb azure plugin
+
+            if (chain.find("cli") != std::string::npos) {
+                ffi::set_builder_option(builder, KernelUtils::ToDeltaString("use_azure_cli"), KernelUtils::ToDeltaString("true")); //dont know if that is the right way, but we need to tell delta RS builder to authenticate with azure cli
+            }
+            
+            ffi::set_builder_option(builder, KernelUtils::ToDeltaString("azure_credential_chain"), KernelUtils::ToDeltaString(chain)); //needed for duckdb azure plugin, dont know if all three are necessary
+            ffi::set_builder_option(builder, KernelUtils::ToDeltaString("credential_chain"), KernelUtils::ToDeltaString(chain)); //needed for duckdb azure plugin, dont know if all three are necessary
+            ffi::set_builder_option(builder, KernelUtils::ToDeltaString("chain"), KernelUtils::ToDeltaString(chain));  //needed for duckdb azure plugin, dont know if all three are necessary
         }
-        if (!client_id.empty()) {
-            ffi::set_builder_option(builder, KernelUtils::ToDeltaString("azure_client_id"), KernelUtils::ToDeltaString(client_id));
+        if (!client_id.empty() && client_id != "NULL") {
+            ffi::set_builder_option(builder, KernelUtils::ToDeltaString("azure_client_id"), KernelUtils::ToDeltaString(client_id)); //untested
         }
-        if (!client_secret.empty()) {
-            ffi::set_builder_option(builder, KernelUtils::ToDeltaString("azure_client_secret"), KernelUtils::ToDeltaString(client_secret));
+        if (!client_secret.empty() && client_secret != "NULL") {
+            ffi::set_builder_option(builder, KernelUtils::ToDeltaString("azure_client_secret"), KernelUtils::ToDeltaString(client_secret)); //untested
         }
-        if (!tenant_id.empty()) {
-            ffi::set_builder_option(builder, KernelUtils::ToDeltaString("azure_tenant_id"), KernelUtils::ToDeltaString(tenant_id));
+        if (!tenant_id.empty() && tenant_id != "NULL") {
+            ffi::set_builder_option(builder, KernelUtils::ToDeltaString("azure_tenant_id"), KernelUtils::ToDeltaString(tenant_id)); //needed for duckdb azure plugin
         }
-        if (!certificate_path.empty()) {
-            ffi::set_builder_option(builder, KernelUtils::ToDeltaString("azure_client_certificate_path"), KernelUtils::ToDeltaString(certificate_path));
+        if (!certificate_path.empty() && certificate_path != "NULL") {
+            ffi::set_builder_option(builder, KernelUtils::ToDeltaString("azure_client_certificate_path"), KernelUtils::ToDeltaString(certificate_path)); //untested
         }
-        if (!http_proxy.empty()) {
-            ffi::set_builder_option(builder, KernelUtils::ToDeltaString("http_proxy"), KernelUtils::ToDeltaString(http_proxy));
+        if (!http_proxy.empty() && http_proxy != "NULL") {
+            ffi::set_builder_option(builder, KernelUtils::ToDeltaString("http_proxy"), KernelUtils::ToDeltaString(http_proxy)); //untested
         }
-        if (!proxy_user_name.empty()) {
-            ffi::set_builder_option(builder, KernelUtils::ToDeltaString("proxy_user_name"), KernelUtils::ToDeltaString(proxy_user_name));
+        if (!proxy_user_name.empty() && proxy_user_name != "NULL") {
+            ffi::set_builder_option(builder, KernelUtils::ToDeltaString("proxy_user_name"), KernelUtils::ToDeltaString(proxy_user_name)); //untested
         }
-        if (!proxy_password.empty()) {
-            ffi::set_builder_option(builder, KernelUtils::ToDeltaString("proxy_password"), KernelUtils::ToDeltaString(proxy_password));
+        if (!proxy_password.empty() && proxy_password != "NULL") {
+            ffi::set_builder_option(builder, KernelUtils::ToDeltaString("proxy_password"), KernelUtils::ToDeltaString(proxy_password)); //untested
         }
+        ffi::set_builder_option(builder, KernelUtils::ToDeltaString("container_name"), KernelUtils::ToDeltaString(bucket)); // needed ?
 
     }
     return builder;
diff --git a/test/sql/generated/azure.emulator.x b/test/sql/generated/azure.emulator.x
new file mode 100644
index 0000000..86b67ec
--- /dev/null
+++ b/test/sql/generated/azure.emulator.x
@@ -0,0 +1,25 @@
+# name: test/sql/generated/azure.emulator
+# description: test delta scan on azure emulator data using secret
+# group: [delta_generated]
+
+require parquet
+
+require httpfs
+
+require azure
+
+require delta
+
+require-env GENERATED_AZURE_DATA_AVAILABLE
+
+statement ok
+CREATE SECRET azure_1 (TYPE AZURE, CONNECTION_STRING 'AccountName=devstoreaccount1;AccountKey=Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==;DefaultEndpointsProtocol=http;BlobEndpoint=http://127.0.0.1:10000/devstoreaccount1;QueueEndpoint=http://127.0.0.1:10001/devstoreaccount1;TableEndpoint=http://127.0.0.1:10002/devstoreaccount1')
+
+# Run modified tpch q06 against the remote data
+query I rowsort q1
+SELECT
+    *
+FROM
+    delta_scan('az://test-bucket-ceiveran/delta_testing/lineitem_sf0_01/delta_lake/')
+LIMIT 100
+----
\ No newline at end of file
diff --git a/vcpkg.json b/vcpkg.json
index 85936bf..0cefd94 100644
--- a/vcpkg.json
+++ b/vcpkg.json
@@ -1,5 +1,8 @@
 {
   "dependencies": [
+    "azure-identity-cpp",
+    "azure-storage-blobs-cpp",
+    "azure-storage-files-datalake-cpp",
     "openssl"
   ]
 }
\ No newline at end of file

From 4688190ce50e0adadee097a46a28aee5b3cdd5cc Mon Sep 17 00:00:00 2001
From: Norman Foerster <norman.foerster@fmc-data-solutions.com>
Date: Tue, 25 Jun 2024 14:03:17 +0200
Subject: [PATCH 18/38] load azure functionality from duckdb azure plugin

---
 .gitmodules            | 3 +++
 duckdb                 | 2 +-
 duckdb_azure           | 1 +
 extension-ci-tools     | 2 +-
 extension_config.cmake | 4 +++-
 5 files changed, 9 insertions(+), 3 deletions(-)
 create mode 160000 duckdb_azure

diff --git a/.gitmodules b/.gitmodules
index a55d71e..5131848 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -6,3 +6,6 @@
 	path = extension-ci-tools
 	url = git@github.com:duckdb/extension-ci-tools.git
 	branch = main
+[submodule "duckdb_azure"]
+	path = duckdb_azure
+	url = https://github.com/duckdb/duckdb_azure.git
diff --git a/duckdb b/duckdb
index 1f98600..7b8efd3 160000
--- a/duckdb
+++ b/duckdb
@@ -1 +1 @@
-Subproject commit 1f98600c2cf8722a6d2f2d805bb4af5e701319fc
+Subproject commit 7b8efd3d0fab38ec9dae467861a317af3f1d7f3e
diff --git a/duckdb_azure b/duckdb_azure
new file mode 160000
index 0000000..097bb13
--- /dev/null
+++ b/duckdb_azure
@@ -0,0 +1 @@
+Subproject commit 097bb13aadb186ca43ae9b5dc6a21c20e56ad4dd
diff --git a/extension-ci-tools b/extension-ci-tools
index c0cc931..71b8a60 160000
--- a/extension-ci-tools
+++ b/extension-ci-tools
@@ -1 +1 @@
-Subproject commit c0cc9319492bfa38344c2f28bd35f2304c74cdde
+Subproject commit 71b8a603ea24b1ac8a2cff134aca28163576548f
diff --git a/extension_config.cmake b/extension_config.cmake
index 6cfa12c..369abd0 100644
--- a/extension_config.cmake
+++ b/extension_config.cmake
@@ -10,7 +10,9 @@ duckdb_extension_load(delta
 duckdb_extension_load(httpfs)
 
 # Build the azure extension to test with azure
-duckdb_extension_load(azure)
+duckdb_extension_load(azure
+    SOURCE_DIR ${CMAKE_CURRENT_LIST_DIR}/duckdb_azure
+)
 
 # Build the tpch and tpcds extension for testing/benchmarking
 duckdb_extension_load(tpch)

From 49810dac2fb9fbf8ef856a7b58e39c14981fb427 Mon Sep 17 00:00:00 2001
From: Norman Foerster <norman.foerster@fmc-data-solutions.com>
Date: Tue, 25 Jun 2024 14:58:45 +0200
Subject: [PATCH 19/38] added tests

---
 .../{azure.emulator.x => azure.emulator.test} |  0
 .../azure_emulator_with_partitions.test       | 25 +++++++++++++++++++
 2 files changed, 25 insertions(+)
 rename test/sql/generated/{azure.emulator.x => azure.emulator.test} (100%)
 create mode 100644 test/sql/generated/azure_emulator_with_partitions.test

diff --git a/test/sql/generated/azure.emulator.x b/test/sql/generated/azure.emulator.test
similarity index 100%
rename from test/sql/generated/azure.emulator.x
rename to test/sql/generated/azure.emulator.test
diff --git a/test/sql/generated/azure_emulator_with_partitions.test b/test/sql/generated/azure_emulator_with_partitions.test
new file mode 100644
index 0000000..78946b4
--- /dev/null
+++ b/test/sql/generated/azure_emulator_with_partitions.test
@@ -0,0 +1,25 @@
+# name: test/sql/generated/azure.emulator
+# description: test delta scan on azure emulator data using secret
+# group: [delta_generated]
+
+require parquet
+
+require httpfs
+
+require azure
+
+require delta
+
+require-env GENERATED_AZURE_DATA_AVAILABLE
+
+statement ok
+CREATE SECRET azure_1 (TYPE AZURE, CONNECTION_STRING 'AccountName=devstoreaccount1;AccountKey=Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==;DefaultEndpointsProtocol=http;BlobEndpoint=http://127.0.0.1:10000/devstoreaccount1;QueueEndpoint=http://127.0.0.1:10001/devstoreaccount1;TableEndpoint=http://127.0.0.1:10002/devstoreaccount1')
+
+# Run modified tpch q06 against the remote data
+query I rowsort q1
+SELECT
+    *
+FROM
+    delta_scan('az://test-bucket-ceiveran/delta_testing/simple_partitioned/delta_lake/')
+where part=1
+----
\ No newline at end of file

From aa0b52b14d459bece34efb305d39ca61a17c7ebc Mon Sep 17 00:00:00 2001
From: Sam Ansmink <samansmink@hotmail.com>
Date: Thu, 27 Jun 2024 12:10:00 +0200
Subject: [PATCH 20/38] add testing for azure

---
 .github/workflows/CloudTesting.yml            |  80 ++++++++++++
 .github/workflows/LocalTesting.yml            |  66 ++++++++++
 .gitmodules                                   |   5 +-
 duckdb_azure                                  |   1 -
 extension_config.cmake                        |   4 +-
 scripts/upload_test_files_to_azurite.sh       |  21 ++++
 src/functions/delta_scan.cpp                  | 114 +++++++++---------
 test/sql/cloud/azure/cli_auth.test            |  37 ++++++
 .../cloud/azure/hierarchical_namespace.test   |  42 +++++++
 test/sql/cloud/azure/spn_auth.test            |  38 ++++++
 test/sql/cloud/azure/unauthenticated.test     |  47 ++++++++
 test/sql/cloud/azurite/azurite.test           |  31 +++++
 12 files changed, 426 insertions(+), 60 deletions(-)
 create mode 100644 .github/workflows/CloudTesting.yml
 create mode 100644 .github/workflows/LocalTesting.yml
 delete mode 160000 duckdb_azure
 create mode 100755 scripts/upload_test_files_to_azurite.sh
 create mode 100644 test/sql/cloud/azure/cli_auth.test
 create mode 100644 test/sql/cloud/azure/hierarchical_namespace.test
 create mode 100644 test/sql/cloud/azure/spn_auth.test
 create mode 100644 test/sql/cloud/azure/unauthenticated.test
 create mode 100644 test/sql/cloud/azurite/azurite.test

diff --git a/.github/workflows/CloudTesting.yml b/.github/workflows/CloudTesting.yml
new file mode 100644
index 0000000..f75a37d
--- /dev/null
+++ b/.github/workflows/CloudTesting.yml
@@ -0,0 +1,80 @@
+name: Cloud functional tests
+on: [push, repository_dispatch]
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref }}-${{ github.head_ref || '' }}-${{ github.base_ref || '' }}-${{ github.ref != 'refs/heads/main' || github.sha }}
+  cancel-in-progress: true
+defaults:
+  run:
+    shell: bash
+
+jobs:
+  azure-tests-linux:
+    name: Azure tests (Linux)
+    runs-on: ubuntu-latest
+    env:
+      VCPKG_TARGET_TRIPLET: x64-linux
+      VCPKG_TOOLCHAIN_PATH: ${{ github.workspace }}/vcpkg/scripts/buildsystems/vcpkg.cmake
+      GEN: Ninja
+      DUCKDB_PLATFORM: linux_amd64
+
+    steps:
+      - name: Install required ubuntu packages
+        run: |
+          sudo apt-get update -y -qq
+          sudo apt-get install -y -qq software-properties-common
+          sudo add-apt-repository ppa:git-core/ppa
+          sudo apt-get update -y -qq
+          sudo apt-get install -y -qq ninja-build make gcc-multilib g++-multilib zip unzip build-essential checkinstall curl libz-dev openssh-client
+
+      - uses: actions/checkout@v3
+        with:
+          fetch-depth: 0
+          submodules: 'true'
+
+      - name: Setup Ccache
+        uses: hendrikmuhs/ccache-action@v1.2.11 # Note: pinned due to GLIBC incompatibility in later releases
+        with:
+          key: ${{ github.job }}-${{ matrix.duckdb_arch }}
+
+      - name: Setup vcpkg
+        uses: lukka/run-vcpkg@v11.1
+        with:
+          vcpkgGitCommitId: a1a1cbc975abf909a6c8985a6a2b8fe20bbd9bd6
+
+      - name: Setup Rust
+        uses: dtolnay/rust-toolchain@stable
+
+      - name: Build extension
+        env:
+          GEN: ninja
+        run: |
+          make release
+
+      - name: Test with Service Principal (SPN) in env vars
+        env:
+          AZURE_CLIENT_ID: ${{secrets.AZURE_CLIENT_ID}}
+          AZURE_CLIENT_SECRET: ${{secrets.AZURE_CLIENT_SECRET}}
+          AZURE_TENANT_ID: ${{secrets.AZURE_TENANT_ID}}
+          AZURE_STORAGE_ACCOUNT: ${{secrets.AZURE_STORAGE_ACCOUNT}}
+        run: |
+          python3 duckdb/scripts/run_tests_one_by_one.py ./build/release/test/unittest "*test/sql/cloud/*"
+
+      - name: Test with SPN logged in in azure-cli
+        env:
+          AZURE_STORAGE_ACCOUNT: ${{secrets.AZURE_STORAGE_ACCOUNT}}
+          DUCKDB_AZ_CLI_LOGGED_IN: 1
+        run: |
+          az login --service-principal -u ${{secrets.AZURE_CLIENT_ID}} -p ${{secrets.AZURE_CLIENT_SECRET}} --tenant ${{secrets.AZURE_TENANT_ID}}
+          python3 duckdb/scripts/run_tests_one_by_one.py ./build/release/test/unittest "*test/sql/cloud/*"
+
+      - name: Log out azure-cli
+        if: always()
+        run: |
+          az logout
+
+      - name: Tests that focus on public non-authenticated requests
+        env:
+          AZURE_STORAGE_ACCOUNT: ${{secrets.AZURE_STORAGE_ACCOUNT}}
+          DUCKDB_AZURE_PUBLIC_CONTAINER_AVAILABLE: 1
+        run: |
+          python3 duckdb/scripts/run_tests_one_by_one.py ./build/release/test/unittest "*test/sql/cloud/*"
\ No newline at end of file
diff --git a/.github/workflows/LocalTesting.yml b/.github/workflows/LocalTesting.yml
new file mode 100644
index 0000000..23b31ac
--- /dev/null
+++ b/.github/workflows/LocalTesting.yml
@@ -0,0 +1,66 @@
+name: Local functional tests
+on: [push, pull_request,repository_dispatch]
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref }}-${{ github.head_ref || '' }}-${{ github.base_ref || '' }}-${{ github.ref != 'refs/heads/main' || github.sha }}
+  cancel-in-progress: true
+defaults:
+  run:
+    shell: bash
+
+jobs:
+  azurite-tests-linux:
+    name: Azurite (local azure test server) tests (Linux)
+    runs-on: ubuntu-latest
+    container: 'quay.io/pypa/manylinux2014_x86_64'
+    env:
+      VCPKG_TARGET_TRIPLET: 'x64-linux'
+      GEN: Ninja
+      VCPKG_TOOLCHAIN_PATH: ${{ github.workspace }}/vcpkg/scripts/buildsystems/vcpkg.cmake
+      AZURE_STORAGE_CONNECTION_STRING: 'DefaultEndpointsProtocol=http;AccountName=devstoreaccount1;AccountKey=Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==;BlobEndpoint=http://127.0.0.1:10000/devstoreaccount1;QueueEndpoint=http://127.0.0.1:10001/devstoreaccount1;TableEndpoint=http://127.0.0.1:10002/devstoreaccount1;'
+      AZURE_STORAGE_ACCOUNT: devstoreaccount1
+
+    steps:
+    - uses: actions/checkout@v3
+      with:
+        fetch-depth: 0
+        submodules: 'true'
+
+    - name: install Azure test service
+      run: |
+        yum install -y nodejs npm
+        npm install -g azurite
+        echo -e "[azure-cli]\nname=Azure CLI\nbaseurl=https://packages.microsoft.com/yumrepos/azure-cli\nenabled=1\ngpgcheck=1\ngpgkey=https://packages.microsoft.com/keys/microsoft.asc" | tee /etc/yum.repos.d/azure-cli.repo
+        yum install -y azure-cli
+
+    - name: Setup ManyLinux2014
+      run: |
+        ./duckdb/scripts/setup_manylinux2014.sh general aws-cli ccache ssh openssl python_alias
+
+    - name: Setup vcpkg
+      uses: lukka/run-vcpkg@v11.1
+      with:
+        vcpkgGitCommitId: a1a1cbc975abf909a6c8985a6a2b8fe20bbd9bd6
+
+    # Build extension
+    - name: Build extension
+      env:
+        GEN: ninja
+      run: |
+        make release
+
+    - name: Launch & populate Azure test service
+      run: |
+        azurite > azurite_log.txt 2>&1 &
+        sleep 10
+        ./scripts/upload_test_files_to_azurite.sh
+
+    - name: Test extension
+      run: |
+        make test
+
+    - name: Azure test server log
+      if: always()
+      shell: bash
+      run: |
+        echo "## azurite"
+        cat azurite_log.txt
\ No newline at end of file
diff --git a/.gitmodules b/.gitmodules
index 5131848..cd15846 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -5,7 +5,4 @@
 [submodule "extension-ci-tools"]
 	path = extension-ci-tools
 	url = git@github.com:duckdb/extension-ci-tools.git
-	branch = main
-[submodule "duckdb_azure"]
-	path = duckdb_azure
-	url = https://github.com/duckdb/duckdb_azure.git
+	branch = main
\ No newline at end of file
diff --git a/duckdb_azure b/duckdb_azure
deleted file mode 160000
index 097bb13..0000000
--- a/duckdb_azure
+++ /dev/null
@@ -1 +0,0 @@
-Subproject commit 097bb13aadb186ca43ae9b5dc6a21c20e56ad4dd
diff --git a/extension_config.cmake b/extension_config.cmake
index 369abd0..16571c2 100644
--- a/extension_config.cmake
+++ b/extension_config.cmake
@@ -11,7 +11,9 @@ duckdb_extension_load(httpfs)
 
 # Build the azure extension to test with azure
 duckdb_extension_load(azure
-    SOURCE_DIR ${CMAKE_CURRENT_LIST_DIR}/duckdb_azure
+        LOAD_TESTS
+        GIT_URL https://github.com/duckdb/duckdb_azure
+        GIT_TAG 49b63dc8cd166952a0a34dfd54e6cfe5b823e05e
 )
 
 # Build the tpch and tpcds extension for testing/benchmarking
diff --git a/scripts/upload_test_files_to_azurite.sh b/scripts/upload_test_files_to_azurite.sh
new file mode 100755
index 0000000..f1ae34e
--- /dev/null
+++ b/scripts/upload_test_files_to_azurite.sh
@@ -0,0 +1,21 @@
+#!/bin/bash
+
+# Default Azurite connection string (see: https://github.com/Azure/Azurite)
+conn_string="DefaultEndpointsProtocol=http;AccountName=devstoreaccount1;AccountKey=Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==;BlobEndpoint=http://127.0.0.1:10000/devstoreaccount1;QueueEndpoint=http://127.0.0.1:10001/devstoreaccount1;TableEndpoint=http://127.0.0.1:10002/devstoreaccount1;"
+
+# Create container
+az storage container create -n delta-testing-private --connection-string "${conn_string}"
+az storage container create -n delta-testing-public  --connection-string "${conn_string}" --public-access blob
+
+copy_file() {
+  local from="${1}"
+  local to="${2}"
+  az storage blob upload --file "${from}" --name "${to}" --container-name "delta-testing-private" --connection-string "${conn_string}"
+  az storage blob upload --file "${from}" --name "${to}" --container-name "delta-testing-public"  --connection-string "${conn_string}"
+}
+
+cd ./build/debug/rust/src/delta_kernel/acceptance/tests/dat/out/reader_tests/generated
+while read filepath; do
+    remote_filepath=dat/"$(echo "${filepath}" | cut -c 3-)"
+    copy_file "${filepath}" "${remote_filepath}"
+done < <(find . -type f)
\ No newline at end of file
diff --git a/src/functions/delta_scan.cpp b/src/functions/delta_scan.cpp
index 40dd143..3929c57 100644
--- a/src/functions/delta_scan.cpp
+++ b/src/functions/delta_scan.cpp
@@ -66,16 +66,25 @@ static void visit_callback(ffi::NullableCvoid engine_context, struct ffi::Kernel
     ffi::visit_scan_data(engine_data, selection_vec, engine_context, visit_callback);
 }
 
-std::string parseFromConnectionString(const std::string& connectionString, const std::string& key) {
-    std::regex pattern(key + "=([^;]+);");
+string ParseAccountNameFromEndpoint(const string& endpoint) {
+    if (!StringUtil::StartsWith(endpoint, "https://")) {
+        return "";
+    }
+    auto result = endpoint.find('.', 8);
+    if (result == endpoint.npos) {
+        return "";
+    }
+    return endpoint.substr(8,result-8);
+}
+
+string parseFromConnectionString(const string& connectionString, const string& key) {
+    std::regex pattern(key + "=([^;]+)(?=;|$)");
     std::smatch matches;
     if (std::regex_search(connectionString, matches, pattern) && matches.size() > 1) {
         // The second match ([1]) contains the access key
         return matches[1].str();
-    } else {
-        // If no access key is found, return an empty string or handle as needed
-        return "";
     }
+    return "";
 }
 
 static ffi::EngineBuilder* CreateBuilder(ClientContext &context, const string &path) {
@@ -169,75 +178,72 @@ static ffi::EngineBuilder* CreateBuilder(ClientContext &context, const string &p
         ffi::set_builder_option(builder, KernelUtils::ToDeltaString("aws_region"), KernelUtils::ToDeltaString(region));
 
     } else if (secret_type == "azure") {
-        
         // azure seems to be super complicated as we need to cover duckdb azure plugin and delta RS builder
         // and both require different settings
-
         auto connection_string = kv_secret.TryGetValue("connection_string").ToString();
         auto account_name = kv_secret.TryGetValue("account_name").ToString();
         auto endpoint = kv_secret.TryGetValue("endpoint").ToString();
         auto client_id = kv_secret.TryGetValue("client_id").ToString();
         auto client_secret = kv_secret.TryGetValue("client_secret").ToString();
         auto tenant_id = kv_secret.TryGetValue("tenant_id").ToString();
-        auto certificate_path = kv_secret.TryGetValue("certificate_path").ToString();
-        auto http_proxy = kv_secret.TryGetValue("http_proxy").ToString();
-        auto proxy_user_name = kv_secret.TryGetValue("proxy_user_name").ToString();
-        auto proxy_password = kv_secret.TryGetValue("proxy_password").ToString();
         auto chain = kv_secret.TryGetValue("chain").ToString();
+        auto provider = kv_secret.GetProvider();
 
-        if (account_name == "devstoreaccount1" || connection_string.find("devstoreaccount1") != string::npos) {
-            ffi::set_builder_option(builder, KernelUtils::ToDeltaString("use_emulator"), KernelUtils::ToDeltaString("true")); //needed for delta RS builder
-        }
+        if (provider == "credential_chain") {
+            // Authentication option 1a: using the cli authentication
+            if (chain.find("cli") != std::string::npos) {
+                ffi::set_builder_option(builder, KernelUtils::ToDeltaString("use_azure_cli"), KernelUtils::ToDeltaString("true"));
+            }
+            // Authentication option 1b: non-cli credential chains will just "hope for the best" technically since we are using the default
+            // credential chain provider duckDB and delta-kernel-rs should find the same auth
+        } else if (!connection_string.empty() && connection_string != "NULL") {
 
-        if (!connection_string.empty() && connection_string != "NULL") {
-            ffi::set_builder_option(builder, KernelUtils::ToDeltaString("azure_storage_connection_string"), KernelUtils::ToDeltaString(connection_string)); //needed for duckdb azure plugin
-            ffi::set_builder_option(builder, KernelUtils::ToDeltaString("connection_string"), KernelUtils::ToDeltaString(connection_string)); //needed for duckdb azure plugin
+            // Authentication option 2: a connection string based on account key
+            auto account_key = parseFromConnectionString(connection_string, "AccountKey");
             account_name = parseFromConnectionString(connection_string, "AccountName");
-            ffi::set_builder_option(builder, KernelUtils::ToDeltaString("access_key"), KernelUtils::ToDeltaString(parseFromConnectionString(connection_string, "AccountKey"))); //needed for delta RS builder
+            // Authentication option 2: a connection string based on account key
+            if (!account_name.empty() && !account_key.empty()) {
+                ffi::set_builder_option(builder, KernelUtils::ToDeltaString("account_key"),
+                                        KernelUtils::ToDeltaString(account_key));
+            } else {
+                // Authentication option 2b: a connection string based on SAS token
+                endpoint = parseFromConnectionString(connection_string, "BlobEndpoint");
+                if (account_name.empty()) {
+                    account_name = ParseAccountNameFromEndpoint(endpoint);
+                }
+                auto sas_token = parseFromConnectionString(connection_string, "SharedAccessSignature");
+                if (!sas_token.empty()) {
+                    ffi::set_builder_option(builder, KernelUtils::ToDeltaString("sas_token"),
+                                            KernelUtils::ToDeltaString(sas_token));
+                }
+            }
+        } else if (provider == "service_principal") {
+            if (!client_id.empty() && client_id != "NULL") {
+                ffi::set_builder_option(builder, KernelUtils::ToDeltaString("azure_client_id"), KernelUtils::ToDeltaString(client_id));
+            }
+            if (!client_secret.empty() && client_secret != "NULL") {
+                ffi::set_builder_option(builder, KernelUtils::ToDeltaString("azure_client_secret"), KernelUtils::ToDeltaString(client_secret));
+            }
+            if (!tenant_id.empty() && tenant_id != "NULL") {
+                ffi::set_builder_option(builder, KernelUtils::ToDeltaString("azure_tenant_id"), KernelUtils::ToDeltaString(tenant_id));
+            }
+        } else {
+            // Authentication option 3: no authentication, just an account name
+            ffi::set_builder_option(builder, KernelUtils::ToDeltaString("azure_skip_signature"), KernelUtils::ToDeltaString("true"));
+        }
+        // Set the use_emulator option for when the azurite test server is used
+        if (account_name == "devstoreaccount1" || connection_string.find("devstoreaccount1") != string::npos) {
+            ffi::set_builder_option(builder, KernelUtils::ToDeltaString("use_emulator"), KernelUtils::ToDeltaString("true"));
         }
         if (!account_name.empty() && account_name != "NULL") {
-            ffi::set_builder_option(builder, KernelUtils::ToDeltaString("azure_account_name"), KernelUtils::ToDeltaString(account_name)); //needed for duckdb azure plugin
             ffi::set_builder_option(builder, KernelUtils::ToDeltaString("account_name"), KernelUtils::ToDeltaString(account_name)); //needed for delta RS builder
         }
         if (!endpoint.empty() && endpoint != "NULL") {
             ffi::set_builder_option(builder, KernelUtils::ToDeltaString("azure_endpoint"), KernelUtils::ToDeltaString(endpoint));
         } else {
-            ffi::set_builder_option(builder, KernelUtils::ToDeltaString("azure_endpoint"), KernelUtils::ToDeltaString("https://" + account_name + ".blob.core.windows.net/")); //needed? Does that work with dfs files system?
-        }
-        if (!chain.empty() && chain != "NULL") {
-            ffi::set_builder_option(builder, KernelUtils::ToDeltaString("provider"), KernelUtils::ToDeltaString("credential_chain")); //needed for duckdb azure plugin
-
-            if (chain.find("cli") != std::string::npos) {
-                ffi::set_builder_option(builder, KernelUtils::ToDeltaString("use_azure_cli"), KernelUtils::ToDeltaString("true")); //dont know if that is the right way, but we need to tell delta RS builder to authenticate with azure cli
-            }
-            
-            ffi::set_builder_option(builder, KernelUtils::ToDeltaString("azure_credential_chain"), KernelUtils::ToDeltaString(chain)); //needed for duckdb azure plugin, dont know if all three are necessary
-            ffi::set_builder_option(builder, KernelUtils::ToDeltaString("credential_chain"), KernelUtils::ToDeltaString(chain)); //needed for duckdb azure plugin, dont know if all three are necessary
-            ffi::set_builder_option(builder, KernelUtils::ToDeltaString("chain"), KernelUtils::ToDeltaString(chain));  //needed for duckdb azure plugin, dont know if all three are necessary
+            ffi::set_builder_option(builder, KernelUtils::ToDeltaString("azure_endpoint"), KernelUtils::ToDeltaString("https://" + account_name + ".blob.core.windows.net/"));
         }
-        if (!client_id.empty() && client_id != "NULL") {
-            ffi::set_builder_option(builder, KernelUtils::ToDeltaString("azure_client_id"), KernelUtils::ToDeltaString(client_id)); //untested
-        }
-        if (!client_secret.empty() && client_secret != "NULL") {
-            ffi::set_builder_option(builder, KernelUtils::ToDeltaString("azure_client_secret"), KernelUtils::ToDeltaString(client_secret)); //untested
-        }
-        if (!tenant_id.empty() && tenant_id != "NULL") {
-            ffi::set_builder_option(builder, KernelUtils::ToDeltaString("azure_tenant_id"), KernelUtils::ToDeltaString(tenant_id)); //needed for duckdb azure plugin
-        }
-        if (!certificate_path.empty() && certificate_path != "NULL") {
-            ffi::set_builder_option(builder, KernelUtils::ToDeltaString("azure_client_certificate_path"), KernelUtils::ToDeltaString(certificate_path)); //untested
-        }
-        if (!http_proxy.empty() && http_proxy != "NULL") {
-            ffi::set_builder_option(builder, KernelUtils::ToDeltaString("http_proxy"), KernelUtils::ToDeltaString(http_proxy)); //untested
-        }
-        if (!proxy_user_name.empty() && proxy_user_name != "NULL") {
-            ffi::set_builder_option(builder, KernelUtils::ToDeltaString("proxy_user_name"), KernelUtils::ToDeltaString(proxy_user_name)); //untested
-        }
-        if (!proxy_password.empty() && proxy_password != "NULL") {
-            ffi::set_builder_option(builder, KernelUtils::ToDeltaString("proxy_password"), KernelUtils::ToDeltaString(proxy_password)); //untested
-        }
-        ffi::set_builder_option(builder, KernelUtils::ToDeltaString("container_name"), KernelUtils::ToDeltaString(bucket)); // needed ?
-
+        ffi::set_builder_option(builder, KernelUtils::ToDeltaString("container_name"), KernelUtils::ToDeltaString(bucket));
     }
     return builder;
 }
diff --git a/test/sql/cloud/azure/cli_auth.test b/test/sql/cloud/azure/cli_auth.test
new file mode 100644
index 0000000..fffa36a
--- /dev/null
+++ b/test/sql/cloud/azure/cli_auth.test
@@ -0,0 +1,37 @@
+# name: test/sql/cloud/basic.test
+# description: confirm queried data is correct
+# group: [azure]
+
+require azure
+
+require parquet
+
+require delta
+
+require-env DUCKDB_AZ_CLI_LOGGED_IN
+
+require-env AZURE_STORAGE_ACCOUNT
+
+statement ok
+set allow_persistent_secrets=false
+
+statement ok
+CREATE SECRET az1 (
+    TYPE AZURE,
+    PROVIDER CREDENTIAL_CHAIN,
+    CHAIN 'cli',
+    ACCOUNT_NAME '${AZURE_STORAGE_ACCOUNT}'
+)
+
+mode output_result
+
+# Run a remote DAT test
+query I rowsort all_primitive_types
+SELECT *
+FROM delta_scan('azure://delta-testing-private/dat/all_primitive_types/delta')
+----
+
+query I rowsort all_primitive_types
+SELECT *
+FROM parquet_scan('azure://delta-testing-private/dat/all_primitive_types/expected/latest/**/*.parquet')
+----
diff --git a/test/sql/cloud/azure/hierarchical_namespace.test b/test/sql/cloud/azure/hierarchical_namespace.test
new file mode 100644
index 0000000..470a325
--- /dev/null
+++ b/test/sql/cloud/azure/hierarchical_namespace.test
@@ -0,0 +1,42 @@
+# name: test/sql/hierarchical_namespace.test
+# description: test azure extension with ADLS GEN2 storage
+# group: [azure]
+
+# Require statement will ensure this test is run with this extension loaded
+require azure
+
+require parquet
+
+require delta
+
+require-env AZURE_TENANT_ID
+
+require-env AZURE_CLIENT_ID
+
+require-env AZURE_CLIENT_SECRET
+
+require-env AZURE_STORAGE_ACCOUNT
+
+statement ok
+set allow_persistent_secrets=false
+
+statement ok
+CREATE SECRET spn (
+    TYPE AZURE,
+    PROVIDER SERVICE_PRINCIPAL,
+    TENANT_ID '${AZURE_TENANT_ID}',
+    CLIENT_ID '${AZURE_CLIENT_ID}',
+    CLIENT_SECRET '${AZURE_CLIENT_SECRET}',
+    ACCOUNT_NAME '${AZURE_STORAGE_ACCOUNT}'
+);
+
+# Run a remote DAT test on abfss
+query I
+SELECT int32
+FROM delta_scan('abfss://delta-testing-private/dat/all_primitive_types/delta')
+----
+0
+1
+2
+3
+4
diff --git a/test/sql/cloud/azure/spn_auth.test b/test/sql/cloud/azure/spn_auth.test
new file mode 100644
index 0000000..11ed035
--- /dev/null
+++ b/test/sql/cloud/azure/spn_auth.test
@@ -0,0 +1,38 @@
+# name: test/sql/cloud/spn_auth.test
+# description: test azure extension with service principal authentication
+# group: [azure]
+
+require azure
+
+require parquet
+
+require delta
+
+require-env AZURE_CLIENT_ID
+
+require-env AZURE_CLIENT_SECRET
+
+require-env AZURE_TENANT_ID
+
+require-env AZURE_STORAGE_ACCOUNT
+
+statement ok
+CREATE SECRET spn (
+    TYPE AZURE,
+    PROVIDER SERVICE_PRINCIPAL,
+    TENANT_ID '${AZURE_TENANT_ID}',
+    CLIENT_ID '${AZURE_CLIENT_ID}',
+    CLIENT_SECRET '${AZURE_CLIENT_SECRET}',
+    ACCOUNT_NAME '${AZURE_STORAGE_ACCOUNT}'
+);
+
+# Run a remote DAT test
+query I rowsort all_primitive_types
+SELECT *
+FROM delta_scan('azure://delta-testing-private/dat/all_primitive_types/delta')
+----
+
+query I rowsort all_primitive_types
+SELECT *
+FROM parquet_scan('azure://delta-testing-private/dat/all_primitive_types/expected/latest/**/*.parquet')
+----
diff --git a/test/sql/cloud/azure/unauthenticated.test b/test/sql/cloud/azure/unauthenticated.test
new file mode 100644
index 0000000..84c1f5f
--- /dev/null
+++ b/test/sql/cloud/azure/unauthenticated.test
@@ -0,0 +1,47 @@
+# name: test/sql/cloud/unauthenticated.test
+# description: test unauthenticated queries
+# group: [azure]
+
+require azure
+
+require parquet
+
+require delta
+
+require-env DUCKDB_AZURE_PUBLIC_CONTAINER_AVAILABLE
+
+require-env AZURE_STORAGE_ACCOUNT
+
+statement ok
+set allow_persistent_secrets=false
+
+# TODO: this doesn't work yet
+mode skip
+
+query I
+SELECT int32
+FROM delta_scan('azure://${AZURE_STORAGE_ACCOUNT}.blob.core.windows.net/dat/all_primitive_types/delta')
+----
+0
+1
+2
+3
+4
+
+mode unskip
+
+# Using a secret to set the account name, we can omit the fully qualified url
+statement ok
+CREATE SECRET s1 (TYPE AZURE, ACCOUNT_NAME '${AZURE_STORAGE_ACCOUNT}')
+
+query I
+SELECT int32
+FROM delta_scan('azure://delta-testing-public/dat/all_primitive_types/delta')
+----
+0
+1
+2
+3
+4
+
+
diff --git a/test/sql/cloud/azurite/azurite.test b/test/sql/cloud/azurite/azurite.test
new file mode 100644
index 0000000..169615b
--- /dev/null
+++ b/test/sql/cloud/azurite/azurite.test
@@ -0,0 +1,31 @@
+# name: test/sql/cloud/azurite/azurite.test
+# description: test with azurite test server
+# group: [azure]
+
+# Require statement will ensure this test is run with this extension loaded
+require azure
+
+require parquet
+
+require delta
+
+require-env AZURE_STORAGE_CONNECTION_STRING
+
+# Set connection string from env var
+statement ok
+CREATE SECRET (TYPE AZURE, CONNECTION_STRING '${AZURE_STORAGE_CONNECTION_STRING}');
+
+# We need a connection string to do requests
+foreach prefix azure:// az://
+
+query I
+SELECT int32
+FROM delta_scan('${prefix}delta-testing-private/dat/all_primitive_types/delta')
+----
+0
+1
+2
+3
+4
+
+endloop

From 53e6d95751f95349af3efe9b08a169e9f7e09dfa Mon Sep 17 00:00:00 2001
From: Sam Ansmink <samansmink@hotmail.com>
Date: Thu, 27 Jun 2024 14:30:19 +0200
Subject: [PATCH 21/38] fix string encoding-related dat failures

---
 CMakeLists.txt                   |  2 +-
 src/functions/delta_scan.cpp     | 23 ++++++++++++++++++++++-
 src/include/delta_kernel_ffi.hpp | 11 ++++++++++-
 test/sql/dat/all.test            | 26 ++++++--------------------
 4 files changed, 39 insertions(+), 23 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 1f94ea9..6797b39 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -99,7 +99,7 @@ ExternalProject_Add(
         GIT_REPOSITORY "https://github.com/delta-incubator/delta-kernel-rs"
         # WARNING: the FFI headers are currently pinned due to the C linkage issue of the c++ headers. Currently, when bumping
         # the kernel version, the produced header in ./src/include/delta_kernel_ffi.hpp should be also bumped, applying the fix
-        GIT_TAG 6f95fd3bfaaa57698d72f539f8c6a0475a52c4e7
+        GIT_TAG ed2b80b127984481adba8e59879f39b9e5f871d1
         # Prints the env variables passed to the cargo build to the terminal, useful in debugging because passing them
         # through CMake is an error-prone mess
         CONFIGURE_COMMAND ${CMAKE_COMMAND} -E env ${RUST_UNSET_ENV_VARS} ${RUST_ENV_VARS} env
diff --git a/src/functions/delta_scan.cpp b/src/functions/delta_scan.cpp
index 1065a7e..feae74c 100644
--- a/src/functions/delta_scan.cpp
+++ b/src/functions/delta_scan.cpp
@@ -25,12 +25,33 @@ static void* allocate_string(const struct ffi::KernelStringSlice slice) {
     return new string(slice.ptr, slice.len);
 }
 
-static void visit_callback(ffi::NullableCvoid engine_context, struct ffi::KernelStringSlice path, int64_t size, const ffi::DvInfo *dv_info, const struct ffi::CStringMap *partition_values) {
+string url_decode(string input) {
+    string result;
+    result.reserve(input.size());
+    char ch;
+    replace(input.begin(), input.end(), '+', ' ');
+    for (idx_t i = 0; i < input.length(); i++) {
+        if (int(input[i]) == 37) {
+            unsigned int ii;
+            sscanf(input.substr(i + 1, 2).c_str(), "%x", &ii);
+            ch = static_cast<char>(ii);
+            result += ch;
+            i += 2;
+        } else {
+            result += input[i];
+        }
+    }
+    return result;
+}
+
+static void visit_callback(ffi::NullableCvoid engine_context, struct ffi::KernelStringSlice path, int64_t size, const ffi::Stats *, const ffi::DvInfo *dv_info, const struct ffi::CStringMap *partition_values) {
     auto context = (DeltaSnapshot *) engine_context;
     auto path_string =  context->GetPath();
     StringUtil::RTrim(path_string, "/");
     path_string += "/" + KernelUtils::FromDeltaString(path);
 
+    path_string = url_decode(path_string);
+
     // First we append the file to our resolved files
     context->resolved_files.push_back(DeltaSnapshot::ToDuckDBPath(path_string));
     context->metadata.emplace_back(make_uniq<DeltaFileMetaData>());
diff --git a/src/include/delta_kernel_ffi.hpp b/src/include/delta_kernel_ffi.hpp
index 12833be..de22390 100644
--- a/src/include/delta_kernel_ffi.hpp
+++ b/src/include/delta_kernel_ffi.hpp
@@ -304,9 +304,19 @@ using NullableCvoid = void*;
 /// function is that `kernel_str` is _only_ valid until the return from this function
 using AllocateStringFn = NullableCvoid(*)(KernelStringSlice kernel_str);
 
+/// Give engines an easy way to consume stats
+struct Stats {
+  /// For any file where the deletion vector is not present (see [`DvInfo::has_vector`]), the
+  /// `num_records` statistic must be present and accurate, and must equal the number of records
+  /// in the data file. In the presence of Deletion Vectors the statistics may be somewhat
+  /// outdated, i.e. not reflecting deleted rows yet.
+  uint64_t num_records;
+};
+
 using CScanCallback = void(*)(NullableCvoid engine_context,
                               KernelStringSlice path,
                               int64_t size,
+                              const Stats *stats,
                               const DvInfo *dv_info,
                               const CStringMap *partition_map);
 
@@ -324,7 +334,6 @@ struct im_an_unused_struct_that_tricks_msvc_into_compilation {
     ExternResult<Handle<SharedScan>> field10;
 };
 
-
 extern "C" {
 
 /// # Safety
diff --git a/test/sql/dat/all.test b/test/sql/dat/all.test
index b3ba2d8..676047b 100644
--- a/test/sql/dat/all.test
+++ b/test/sql/dat/all.test
@@ -65,14 +65,6 @@ SELECT *
 FROM parquet_scan('${DAT_PATH}/out/reader_tests/generated/basic_partitioned/expected/latest/**/*.parquet')
 ----
 
-### FAILING DAT TESTS
-
-# TODO fix all of these
-mode skip
-
-# Fetches path containing`letter=%252F%252520%2525f` from kernel
-# Should be letter= %2F%2520%25f, which means its doubly url encoded
-
 # multi_partitioned
 query I rowsort multi_partitioned
 SELECT *
@@ -80,18 +72,7 @@ FROM delta_scan('${DAT_PATH}/out/reader_tests/generated/multi_partitioned/delta'
 ----
 
 query I rowsort multi_partitioned
-SELECT *
-FROM parquet_scan('${DAT_PATH}/out/reader_tests/generated/multi_partitioned/expected/latest/**/*.parquet')
-----
-
-# multi_partitioned
-query I rowsort multi_partitioned
-SELECT *
-FROM delta_scan('${DAT_PATH}/out/reader_tests/generated/multi_partitioned/delta')
-----
-
-query I rowsort multi_partitioned
-SELECT *
+SELECT letter, date, decode(data) as data, number
 FROM parquet_scan('${DAT_PATH}/out/reader_tests/generated/multi_partitioned/expected/latest/**/*.parquet')
 ----
 
@@ -106,6 +87,11 @@ SELECT *
 FROM parquet_scan('${DAT_PATH}/out/reader_tests/generated/multi_partitioned_2/expected/latest/**/*.parquet')
 ----
 
+### FAILING DAT TESTS
+
+# TODO fix all of these
+mode skip
+
 # no_replay
 query I rowsort no_replay
 SELECT *

From d33690e61901336f9e586008a1db7f3fdc1f6d6f Mon Sep 17 00:00:00 2001
From: Sam Ansmink <samansmink@hotmail.com>
Date: Thu, 27 Jun 2024 14:34:00 +0200
Subject: [PATCH 22/38] add rust to local test

---
 .github/workflows/LocalTesting.yml            |  3 +++
 test/sql/generated/azure.emulator.test        | 25 -------------------
 .../azure_emulator_with_partitions.test       | 25 -------------------
 3 files changed, 3 insertions(+), 50 deletions(-)
 delete mode 100644 test/sql/generated/azure.emulator.test
 delete mode 100644 test/sql/generated/azure_emulator_with_partitions.test

diff --git a/.github/workflows/LocalTesting.yml b/.github/workflows/LocalTesting.yml
index 23b31ac..a08f0c3 100644
--- a/.github/workflows/LocalTesting.yml
+++ b/.github/workflows/LocalTesting.yml
@@ -41,6 +41,9 @@ jobs:
       with:
         vcpkgGitCommitId: a1a1cbc975abf909a6c8985a6a2b8fe20bbd9bd6
 
+    - name: Setup Rust
+      uses: dtolnay/rust-toolchain@stable
+
     # Build extension
     - name: Build extension
       env:
diff --git a/test/sql/generated/azure.emulator.test b/test/sql/generated/azure.emulator.test
deleted file mode 100644
index 86b67ec..0000000
--- a/test/sql/generated/azure.emulator.test
+++ /dev/null
@@ -1,25 +0,0 @@
-# name: test/sql/generated/azure.emulator
-# description: test delta scan on azure emulator data using secret
-# group: [delta_generated]
-
-require parquet
-
-require httpfs
-
-require azure
-
-require delta
-
-require-env GENERATED_AZURE_DATA_AVAILABLE
-
-statement ok
-CREATE SECRET azure_1 (TYPE AZURE, CONNECTION_STRING 'AccountName=devstoreaccount1;AccountKey=Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==;DefaultEndpointsProtocol=http;BlobEndpoint=http://127.0.0.1:10000/devstoreaccount1;QueueEndpoint=http://127.0.0.1:10001/devstoreaccount1;TableEndpoint=http://127.0.0.1:10002/devstoreaccount1')
-
-# Run modified tpch q06 against the remote data
-query I rowsort q1
-SELECT
-    *
-FROM
-    delta_scan('az://test-bucket-ceiveran/delta_testing/lineitem_sf0_01/delta_lake/')
-LIMIT 100
-----
\ No newline at end of file
diff --git a/test/sql/generated/azure_emulator_with_partitions.test b/test/sql/generated/azure_emulator_with_partitions.test
deleted file mode 100644
index 78946b4..0000000
--- a/test/sql/generated/azure_emulator_with_partitions.test
+++ /dev/null
@@ -1,25 +0,0 @@
-# name: test/sql/generated/azure.emulator
-# description: test delta scan on azure emulator data using secret
-# group: [delta_generated]
-
-require parquet
-
-require httpfs
-
-require azure
-
-require delta
-
-require-env GENERATED_AZURE_DATA_AVAILABLE
-
-statement ok
-CREATE SECRET azure_1 (TYPE AZURE, CONNECTION_STRING 'AccountName=devstoreaccount1;AccountKey=Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==;DefaultEndpointsProtocol=http;BlobEndpoint=http://127.0.0.1:10000/devstoreaccount1;QueueEndpoint=http://127.0.0.1:10001/devstoreaccount1;TableEndpoint=http://127.0.0.1:10002/devstoreaccount1')
-
-# Run modified tpch q06 against the remote data
-query I rowsort q1
-SELECT
-    *
-FROM
-    delta_scan('az://test-bucket-ceiveran/delta_testing/simple_partitioned/delta_lake/')
-where part=1
-----
\ No newline at end of file

From 06798f3a9ad8369e8401fbc54dd8328a1addc3b0 Mon Sep 17 00:00:00 2001
From: Sam Ansmink <samansmink@hotmail.com>
Date: Thu, 27 Jun 2024 14:42:32 +0200
Subject: [PATCH 23/38] small refactor

---
 .github/workflows/GeneratedTests.yml | 53 ----------------------------
 .github/workflows/LocalTesting.yml   | 51 +++++++++++++++++++++++---
 2 files changed, 47 insertions(+), 57 deletions(-)
 delete mode 100644 .github/workflows/GeneratedTests.yml

diff --git a/.github/workflows/GeneratedTests.yml b/.github/workflows/GeneratedTests.yml
deleted file mode 100644
index bd106a5..0000000
--- a/.github/workflows/GeneratedTests.yml
+++ /dev/null
@@ -1,53 +0,0 @@
-#
-# This workflow calls the main distribution pipeline from DuckDB to build, test and (optionally) release the extension
-#
-name: GeneratedTests
-on:
-  push:
-  pull_request:
-  workflow_dispatch:
-
-concurrency:
-  group: ${{ github.workflow }}-${{ github.ref }}-${{ github.head_ref || '' }}-${{ github.base_ref || '' }}-${{ github.ref != 'refs/heads/main' || github.sha }}
-  cancel-in-progress: true
-
-jobs:
-  generated-tests-linux:
-    name: Generated Tests (Linux)
-    runs-on: ubuntu-latest
-    env:
-      GEN: ninja
-      VCPKG_TOOLCHAIN_PATH: ${{ github.workspace }}/vcpkg/scripts/buildsystems/vcpkg.cmake
-
-    steps:
-      - uses: actions/checkout@v3
-        with:
-          fetch-depth: 0
-          submodules: 'true'
-
-      - name: Install
-        shell: bash
-        run: sudo apt-get update -y -qq && sudo apt-get install -y -qq ninja-build
-
-      - name: Setup Ccache
-        uses: hendrikmuhs/ccache-action@main
-        with:
-          key: ${{ github.job }}
-
-      - uses: actions/setup-python@v5
-        with:
-          python-version: '3.11'
-
-      - name: Setup vcpkg
-        uses: lukka/run-vcpkg@v11.1
-        with:
-          vcpkgGitCommitId: a1a1cbc975abf909a6c8985a6a2b8fe20bbd9bd6
-
-      - name: Build
-        shell: bash
-        run: make generate-data
-
-      - name: Test
-        shell: bash
-        run: |
-          GENERATED_DATA_AVAILABLE=1 make test
\ No newline at end of file
diff --git a/.github/workflows/LocalTesting.yml b/.github/workflows/LocalTesting.yml
index a08f0c3..95a7a09 100644
--- a/.github/workflows/LocalTesting.yml
+++ b/.github/workflows/LocalTesting.yml
@@ -36,14 +36,17 @@ jobs:
       run: |
         ./duckdb/scripts/setup_manylinux2014.sh general aws-cli ccache ssh openssl python_alias
 
+    - name: Setup Rust for manylinux (dtolnay/rust-toolchain doesn't work due to curl being old here)
+      if: ${{ matrix.duckdb_arch == 'linux_amd64_gcc4' }}
+      run: |
+        curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y
+        echo "$HOME/.cargo/bin" >> $GITHUB_PATH
+
     - name: Setup vcpkg
       uses: lukka/run-vcpkg@v11.1
       with:
         vcpkgGitCommitId: a1a1cbc975abf909a6c8985a6a2b8fe20bbd9bd6
 
-    - name: Setup Rust
-      uses: dtolnay/rust-toolchain@stable
-
     # Build extension
     - name: Build extension
       env:
@@ -66,4 +69,44 @@ jobs:
       shell: bash
       run: |
         echo "## azurite"
-        cat azurite_log.txt
\ No newline at end of file
+        cat azurite_log.txt
+
+  generated-tests-linux:
+    name: Generated Tests (Linux)
+    runs-on: ubuntu-latest
+    env:
+      GEN: ninja
+      VCPKG_TOOLCHAIN_PATH: ${{ github.workspace }}/vcpkg/scripts/buildsystems/vcpkg.cmake
+
+    steps:
+      - uses: actions/checkout@v3
+        with:
+          fetch-depth: 0
+          submodules: 'true'
+
+      - name: Install
+        shell: bash
+        run: sudo apt-get update -y -qq && sudo apt-get install -y -qq ninja-build
+
+      - name: Setup Ccache
+        uses: hendrikmuhs/ccache-action@main
+        with:
+          key: ${{ github.job }}
+
+      - uses: actions/setup-python@v5
+        with:
+          python-version: '3.11'
+
+      - name: Setup vcpkg
+        uses: lukka/run-vcpkg@v11.1
+        with:
+          vcpkgGitCommitId: a1a1cbc975abf909a6c8985a6a2b8fe20bbd9bd6
+
+      - name: Build
+        shell: bash
+        run: make generate-data
+
+      - name: Test
+        shell: bash
+        run: |
+          GENERATED_DATA_AVAILABLE=1 make test
\ No newline at end of file

From 7c296837d93b8c21780d98ba287fe2a1e5361e7c Mon Sep 17 00:00:00 2001
From: Sam Ansmink <samansmink@hotmail.com>
Date: Thu, 27 Jun 2024 15:33:46 +0200
Subject: [PATCH 24/38] add missing openssl dep

---
 .github/workflows/LocalTesting.yml | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/LocalTesting.yml b/.github/workflows/LocalTesting.yml
index 95a7a09..d74c7dd 100644
--- a/.github/workflows/LocalTesting.yml
+++ b/.github/workflows/LocalTesting.yml
@@ -34,7 +34,7 @@ jobs:
 
     - name: Setup ManyLinux2014
       run: |
-        ./duckdb/scripts/setup_manylinux2014.sh general aws-cli ccache ssh openssl python_alias
+        ./duckdb/scripts/setup_manylinux2014.sh general aws-cli ccache ssh python_alias openssl
 
     - name: Setup Rust for manylinux (dtolnay/rust-toolchain doesn't work due to curl being old here)
       if: ${{ matrix.duckdb_arch == 'linux_amd64_gcc4' }}
@@ -47,6 +47,17 @@ jobs:
       with:
         vcpkgGitCommitId: a1a1cbc975abf909a6c8985a6a2b8fe20bbd9bd6
 
+    - name: Handle OpenSSL dependency for rust build
+      run: |
+        echo "OPENSSL_ROOT_DIR=`pwd`/build/release/vcpkg_installed/${{ matrix.vcpkg_triplet }}" >> $GITHUB_ENV
+        echo "OPENSSL_DIR=`pwd`/build/release/vcpkg_installed/${{ matrix.vcpkg_triplet }}" >> $GITHUB_ENV
+        echo "OPENSSL_USE_STATIC_LIBS=true" >> $GITHUB_ENV
+
+    - name: Set Openssl dir
+      if: inputs.openssl_path != ''
+      shell: bash
+      run: |
+
     # Build extension
     - name: Build extension
       env:

From 20ad323e144e09d421bc35dd78fec04cdf293974 Mon Sep 17 00:00:00 2001
From: Sam Ansmink <samansmink@hotmail.com>
Date: Thu, 27 Jun 2024 17:38:56 +0200
Subject: [PATCH 25/38] correct openssl path

---
 .github/workflows/LocalTesting.yml | 9 ++-------
 1 file changed, 2 insertions(+), 7 deletions(-)

diff --git a/.github/workflows/LocalTesting.yml b/.github/workflows/LocalTesting.yml
index d74c7dd..0b2ea85 100644
--- a/.github/workflows/LocalTesting.yml
+++ b/.github/workflows/LocalTesting.yml
@@ -49,15 +49,10 @@ jobs:
 
     - name: Handle OpenSSL dependency for rust build
       run: |
-        echo "OPENSSL_ROOT_DIR=`pwd`/build/release/vcpkg_installed/${{ matrix.vcpkg_triplet }}" >> $GITHUB_ENV
-        echo "OPENSSL_DIR=`pwd`/build/release/vcpkg_installed/${{ matrix.vcpkg_triplet }}" >> $GITHUB_ENV
+        echo "OPENSSL_ROOT_DIR=`pwd`/build/release/vcpkg_installed/x64-linux" >> $GITHUB_ENV
+        echo "OPENSSL_DIR=`pwd`/build/release/vcpkg_installed/x64-linux" >> $GITHUB_ENV
         echo "OPENSSL_USE_STATIC_LIBS=true" >> $GITHUB_ENV
 
-    - name: Set Openssl dir
-      if: inputs.openssl_path != ''
-      shell: bash
-      run: |
-
     # Build extension
     - name: Build extension
       env:

From 9b0b86fd343fc59d3d96dfaa8f1689f4cddc8caa Mon Sep 17 00:00:00 2001
From: Sam Ansmink <samansmink@hotmail.com>
Date: Thu, 27 Jun 2024 18:16:28 +0200
Subject: [PATCH 26/38] actually run rust install

---
 .github/workflows/LocalTesting.yml | 1 -
 1 file changed, 1 deletion(-)

diff --git a/.github/workflows/LocalTesting.yml b/.github/workflows/LocalTesting.yml
index 0b2ea85..ecdc23c 100644
--- a/.github/workflows/LocalTesting.yml
+++ b/.github/workflows/LocalTesting.yml
@@ -37,7 +37,6 @@ jobs:
         ./duckdb/scripts/setup_manylinux2014.sh general aws-cli ccache ssh python_alias openssl
 
     - name: Setup Rust for manylinux (dtolnay/rust-toolchain doesn't work due to curl being old here)
-      if: ${{ matrix.duckdb_arch == 'linux_amd64_gcc4' }}
       run: |
         curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y
         echo "$HOME/.cargo/bin" >> $GITHUB_PATH

From d4454da51b22320bd7b0fa53539e46b95b5e39cf Mon Sep 17 00:00:00 2001
From: Sam Ansmink <samansmink@hotmail.com>
Date: Fri, 28 Jun 2024 09:35:27 +0200
Subject: [PATCH 27/38] fix upload script

---
 scripts/upload_test_files_to_azurite.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/upload_test_files_to_azurite.sh b/scripts/upload_test_files_to_azurite.sh
index f1ae34e..f3631ba 100755
--- a/scripts/upload_test_files_to_azurite.sh
+++ b/scripts/upload_test_files_to_azurite.sh
@@ -14,7 +14,7 @@ copy_file() {
   az storage blob upload --file "${from}" --name "${to}" --container-name "delta-testing-public"  --connection-string "${conn_string}"
 }
 
-cd ./build/debug/rust/src/delta_kernel/acceptance/tests/dat/out/reader_tests/generated
+cd ./build/release/rust/src/delta_kernel/acceptance/tests/dat/out/reader_tests/generated &&
 while read filepath; do
     remote_filepath=dat/"$(echo "${filepath}" | cut -c 3-)"
     copy_file "${filepath}" "${remote_filepath}"

From d23079edd39a1afee20e7e7e0c687337a479b79b Mon Sep 17 00:00:00 2001
From: Sam Ansmink <samansmink@hotmail.com>
Date: Fri, 31 May 2024 16:09:44 +0200
Subject: [PATCH 28/38] skip test case for windows for now

---
 src/delta_utils.cpp   | 21 ++-------------------
 test/sql/dat/all.test |  3 +++
 2 files changed, 5 insertions(+), 19 deletions(-)

diff --git a/src/delta_utils.cpp b/src/delta_utils.cpp
index 03f6562..f6f8b3d 100644
--- a/src/delta_utils.cpp
+++ b/src/delta_utils.cpp
@@ -218,20 +218,8 @@ static bool CanHandleFilter(TableFilter *filter) {
     }
 }
 
-// Prunes the list of predicates to ones that we can handle
-static unordered_map<string, TableFilter*> PrunePredicates(unordered_map<string, TableFilter*> predicates) {
-    unordered_map<string, TableFilter*> result;
-    for (const auto &predicate : predicates) {
-        if (CanHandleFilter(predicate.second)) {
-            result[predicate.first] = predicate.second;
-        }
-
-    }
-    return result;
-}
-
 uintptr_t PredicateVisitor::VisitPredicate(PredicateVisitor* predicate, ffi::KernelExpressionVisitorState* state) {
-    auto filters = predicate->column_filters;
+    auto &filters = predicate->column_filters;
 
     auto it = filters.begin();
     auto end = filters.end();
@@ -244,12 +232,7 @@ uintptr_t PredicateVisitor::VisitPredicate(PredicateVisitor* predicate, ffi::Ker
     };
     auto eit = EngineIteratorFromCallable(get_next);
 
-    // TODO: this should be fixed upstream?
-    try {
-        return visit_expression_and(state, &eit);
-    } catch (...) {
-        return ~0;
-    }
+    return visit_expression_and(state, &eit);
 }
 
 uintptr_t PredicateVisitor::VisitConstantFilter(const string &col_name, const ConstantFilter &filter, ffi::KernelExpressionVisitorState* state) {
diff --git a/test/sql/dat/all.test b/test/sql/dat/all.test
index 676047b..c25c646 100644
--- a/test/sql/dat/all.test
+++ b/test/sql/dat/all.test
@@ -76,6 +76,9 @@ SELECT letter, date, decode(data) as data, number
 FROM parquet_scan('${DAT_PATH}/out/reader_tests/generated/multi_partitioned/expected/latest/**/*.parquet')
 ----
 
+# TODO: fix this
+require notwindows
+
 # multi_partitioned_2
 query I rowsort multi_partitioned_2
 SELECT *

From 230dfa387e36870d32e42bd1e0bb72c454cdc7f5 Mon Sep 17 00:00:00 2001
From: Sam Ansmink <samansmink@hotmail.com>
Date: Tue, 2 Jul 2024 15:28:41 +0200
Subject: [PATCH 29/38] remove workflow override

---
 .../workflows/MainDistributionPipeline.yml    |   3 +-
 .github/workflows/_extension_distribution.yml | 411 ------------------
 2 files changed, 2 insertions(+), 412 deletions(-)
 delete mode 100644 .github/workflows/_extension_distribution.yml

diff --git a/.github/workflows/MainDistributionPipeline.yml b/.github/workflows/MainDistributionPipeline.yml
index abdc095..c2644d4 100644
--- a/.github/workflows/MainDistributionPipeline.yml
+++ b/.github/workflows/MainDistributionPipeline.yml
@@ -14,10 +14,11 @@ concurrency:
 jobs:
   duckdb-stable-build:
     name: Build extension binaries
-    uses: ./.github/workflows/_extension_distribution.yml # Overridden due to rust dependency during build
+    uses: duckdb/extension-ci-tools/.github/workflows/_extension_distribution.yml@v1.0.0
     with:
       duckdb_version: v1.0.0
       extension_name: delta
+      enable_rust: true
       exclude_archs: 'wasm_mvp;wasm_eh;wasm_threads;windows_amd64_rtools'
 
   duckdb-stable-deploy:
diff --git a/.github/workflows/_extension_distribution.yml b/.github/workflows/_extension_distribution.yml
deleted file mode 100644
index a536982..0000000
--- a/.github/workflows/_extension_distribution.yml
+++ /dev/null
@@ -1,411 +0,0 @@
-# Reusable workflow for extension building
-
-name: Extension distribution
-on:
-  workflow_call:
-    inputs:
-      # The name with which the extension will be built
-      extension_name:
-        required: true
-        type: string
-      # DuckDB version to build against, should in most cases be identical to
-      duckdb_version:
-        required: true
-        type: string
-      # ';' separated list of architectures to exclude, for example: 'linux_amd64;osx_arm64'
-      exclude_archs:
-        required: false
-        type: string
-        default: ""
-      # Postfix added to artifact names. Can be used to guarantee unique names when this workflow is called multiple times
-      artifact_postfix:
-        required: false
-        type: string
-        default: ""
-      # Override the default vcpkg commit used by this version of DuckDB
-      vcpkg_commit:
-        required: false
-        type: string
-        default: "a1a1cbc975abf909a6c8985a6a2b8fe20bbd9bd6"
-      # Override the default script producing the matrices. Allows specifying custom matrices.
-      matrix_parse_script:
-        required: false
-        type: string
-        default: "./duckdb/scripts/modify_distribution_matrix.py"
-      # Enable building the DuckDB Shell
-      build_duckdb_shell:
-        required: false
-        type: boolean
-        default: true
-
-jobs:
-  generate_matrix:
-    name: Generate matrix
-    runs-on: ubuntu-latest
-    outputs:
-      linux_matrix: ${{ steps.set-matrix-linux.outputs.linux_matrix }}
-      windows_matrix: ${{ steps.set-matrix-windows.outputs.windows_matrix }}
-      osx_matrix: ${{ steps.set-matrix-osx.outputs.osx_matrix }}
-      wasm_matrix: ${{ steps.set-matrix-wasm.outputs.wasm_matrix }}
-    steps:
-      - uses: actions/checkout@v3
-        with:
-          fetch-depth: 0
-          submodules: 'true'
-
-      - name: Checkout DuckDB to version
-        run: |
-          cd duckdb
-          git checkout ${{ inputs.duckdb_version }}
-
-      - id: parse-matrices
-        run: |
-          python3 ${{ inputs.matrix_parse_script }} --input ./duckdb/.github/config/distribution_matrix.json --select_os linux --output linux_matrix.json --exclude "${{ inputs.exclude_archs }}" --pretty
-          python3 ${{ inputs.matrix_parse_script }} --input ./duckdb/.github/config/distribution_matrix.json --select_os osx --output osx_matrix.json --exclude "${{ inputs.exclude_archs }}" --pretty
-          python3 ${{ inputs.matrix_parse_script }} --input ./duckdb/.github/config/distribution_matrix.json --select_os windows --output windows_matrix.json --exclude "${{ inputs.exclude_archs }}" --pretty
-          python3 ${{ inputs.matrix_parse_script }} --input ./duckdb/.github/config/distribution_matrix.json --select_os wasm --output wasm_matrix.json --exclude "${{ inputs.exclude_archs }}" --pretty
-
-      - id: set-matrix-linux
-        run: |
-          linux_matrix="`cat linux_matrix.json`"
-          echo linux_matrix=$linux_matrix >> $GITHUB_OUTPUT
-          echo `cat $GITHUB_OUTPUT`
-
-      - id: set-matrix-osx
-        run: |
-          osx_matrix="`cat osx_matrix.json`"
-          echo osx_matrix=$osx_matrix >> $GITHUB_OUTPUT
-          echo `cat $GITHUB_OUTPUT`
-
-      - id: set-matrix-windows
-        run: |
-          windows_matrix="`cat windows_matrix.json`"
-          echo windows_matrix=$windows_matrix >> $GITHUB_OUTPUT
-          echo `cat $GITHUB_OUTPUT`
-
-      - id: set-matrix-wasm
-        run: |
-          wasm_matrix="`cat wasm_matrix.json`"
-          echo wasm_matrix=$wasm_matrix >> $GITHUB_OUTPUT
-          echo `cat $GITHUB_OUTPUT`
-
-  linux:
-    name: Linux
-    runs-on: ubuntu-latest
-    container: ${{ matrix.container }}
-    needs: generate_matrix
-    if: ${{ needs.generate_matrix.outputs.linux_matrix != '{}' && needs.generate_matrix.outputs.linux_matrix != '' }}
-    strategy:
-      matrix: ${{fromJson(needs.generate_matrix.outputs.linux_matrix)}}
-    env:
-      VCPKG_TARGET_TRIPLET: ${{ matrix.vcpkg_triplet }}
-      VCPKG_TOOLCHAIN_PATH: ${{ github.workspace }}/vcpkg/scripts/buildsystems/vcpkg.cmake
-      GEN: Ninja
-      BUILD_SHELL: ${{ inputs.build_duckdb_shell && '1' || '0' }}
-      DUCKDB_PLATFORM: ${{ matrix.duckdb_arch }}
-
-    steps:
-      - name: Install required ubuntu packages
-        if: ${{ matrix.duckdb_arch == 'linux_amd64' || matrix.duckdb_arch == 'linux_arm64' }}
-        run: |
-          apt-get update -y -qq
-          apt-get install -y -qq software-properties-common
-          add-apt-repository ppa:git-core/ppa
-          apt-get update -y -qq
-          apt-get install -y -qq ninja-build make gcc-multilib g++-multilib libssl-dev wget openjdk-8-jdk zip maven unixodbc-dev libc6-dev-i386 lib32readline6-dev libssl-dev libcurl4-gnutls-dev libexpat1-dev gettext unzip build-essential checkinstall libffi-dev curl libz-dev openssh-client
-
-      - name: Install Git 2.18.5
-        if: ${{ matrix.duckdb_arch == 'linux_amd64' || matrix.duckdb_arch == 'linux_arm64' }}
-        run: |
-          wget https://github.com/git/git/archive/refs/tags/v2.18.5.tar.gz
-          tar xvf v2.18.5.tar.gz
-          cd git-2.18.5
-          make
-          make prefix=/usr install
-          git --version
-
-      - uses: actions/checkout@v3
-        with:
-          fetch-depth: 0
-          submodules: 'true'
-
-      - name: Checkout DuckDB to version
-        run: |
-          cd duckdb
-          git checkout ${{ inputs.duckdb_version }}
-
-      - name: Setup ManyLinux2014
-        if: ${{ matrix.duckdb_arch == 'linux_amd64_gcc4' }}
-        run: |
-          ./duckdb/scripts/setup_manylinux2014.sh general aws-cli ccache ssh python_alias openssl
-
-      - name: Setup Rust
-        if: ${{ matrix.duckdb_arch == 'linux_amd64'}}
-        uses: dtolnay/rust-toolchain@stable
-
-      - name: Setup Rust for cross compilation
-        if: ${{ matrix.duckdb_arch == 'linux_arm64'}}
-        uses: dtolnay/rust-toolchain@stable
-        with:
-          targets: aarch64-unknown-linux-gnu
-
-      - name: Setup Rust for manylinux (dtolnay/rust-toolchain doesn't work due to curl being old here)
-        if: ${{ matrix.duckdb_arch == 'linux_amd64_gcc4' }}
-        run: |
-          curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y
-          echo "$HOME/.cargo/bin" >> $GITHUB_PATH
-
-      - name: Setup Ccache
-        uses: hendrikmuhs/ccache-action@v1.2.11 # Note: pinned due to GLIBC incompatibility in later releases
-        continue-on-error: true
-        with:
-          key: ${{ github.job }}-${{ matrix.duckdb_arch }}
-
-      - name: Setup Ubuntu
-        if: ${{ matrix.duckdb_arch == 'linux_amd64' || matrix.duckdb_arch == 'linux_arm64' }}
-        uses: ./duckdb/.github/actions/ubuntu_18_setup
-        with:
-          aarch64_cross_compile: ${{ matrix.duckdb_arch == 'linux_arm64' && 1 }}
-
-      - name: Setup vcpkg
-        uses: lukka/run-vcpkg@v11.1
-        with:
-          vcpkgGitCommitId: ${{ inputs.vcpkg_commit }}
-
-      - name: Handle OpenSSL dependency for rust build
-        run: |
-          echo "OPENSSL_ROOT_DIR=`pwd`/build/release/vcpkg_installed/${{ matrix.vcpkg_triplet }}" >> $GITHUB_ENV
-          echo "OPENSSL_DIR=`pwd`/build/release/vcpkg_installed/${{ matrix.vcpkg_triplet }}" >> $GITHUB_ENV
-          echo "OPENSSL_USE_STATIC_LIBS=true" >> $GITHUB_ENV
-
-      - name: Set Openssl dir
-        if: inputs.openssl_path != ''
-        shell: bash
-        run: |
-
-      - name: Build extension
-        env:
-          GEN: ninja
-          CC: ${{ matrix.duckdb_arch == 'linux_arm64' && 'aarch64-linux-gnu-gcc' || '' }}
-          CXX: ${{ matrix.duckdb_arch == 'linux_arm64' && 'aarch64-linux-gnu-g++' || '' }}
-          DUCKDB_PLATFORM: ${{ matrix.duckdb_arch }}
-        run: |
-          make release
-
-      - name: Test extension
-        if: ${{ matrix.duckdb_arch != 'linux_arm64'}}
-        run: |
-          make test
-
-      - name: Error log
-        if: always()
-        run: |
-          echo "ERROR LOG"
-          cat build/release/rust/src/delta_kernel-stamp/delta_kernel-build-*.log
-          
-
-      - uses: actions/upload-artifact@v2
-        with:
-          name: ${{ inputs.extension_name }}-${{ inputs.duckdb_version }}-extension-${{matrix.duckdb_arch}}${{inputs.artifact_postfix}}
-          path: |
-            build/release/extension/${{ inputs.extension_name }}/${{ inputs.extension_name }}.duckdb_extension
-
-  macos:
-    name: MacOS
-    runs-on: macos-latest
-    needs: generate_matrix
-    if: ${{ needs.generate_matrix.outputs.osx_matrix != '{}' && needs.generate_matrix.outputs.osx_matrix != '' }}
-    strategy:
-      matrix: ${{fromJson(needs.generate_matrix.outputs.osx_matrix)}}
-    env:
-      VCPKG_TOOLCHAIN_PATH: ${{ github.workspace }}/vcpkg/scripts/buildsystems/vcpkg.cmake
-      VCPKG_TARGET_TRIPLET: ${{ matrix.vcpkg_triplet }}
-      OSX_BUILD_ARCH: ${{ matrix.osx_build_arch }}
-      GEN: Ninja
-      BUILD_SHELL: ${{ inputs.build_duckdb_shell && '1' || '0' }}
-      DUCKDB_PLATFORM: ${{ matrix.duckdb_arch }}
-
-    steps:
-      - uses: actions/checkout@v3
-        with:
-          fetch-depth: 0
-          submodules: 'true'
-
-      - name: Install Ninja
-        run: |
-          brew install ninja
-
-      - name: Setup Ccache
-        uses: hendrikmuhs/ccache-action@main
-        continue-on-error: true
-        with:
-          key: ${{ github.job }}-${{ matrix.duckdb_arch }}
-
-      - uses: actions/setup-python@v5
-        with:
-          python-version: '3.11'
-
-      - name: Checkout DuckDB to version
-        run: |
-          cd duckdb
-          git checkout ${{ inputs.duckdb_version }}
-
-      - name: Setup vcpkg
-        uses: lukka/run-vcpkg@v11.1
-        with:
-          vcpkgGitCommitId: ${{ inputs.vcpkg_commit }}
-
-      - name: Install Rust cross compile dependency
-        if: ${{ matrix.osx_build_arch == 'x86_64'}}
-        run: |
-          rustup target add x86_64-apple-darwin
-
-      - name: Build extension
-        shell: bash
-        env:
-          DUCKDB_PLATFORM: ${{ matrix.duckdb_arch }}
-        run: |
-          make release
-
-      - name: Test Extension
-        if: ${{ matrix.osx_build_arch == 'arm64'}}
-        shell: bash
-        run: |
-          make test
-
-      - name: Error log
-        if: always()
-        run: |
-          echo "ERROR LOG"
-          cat build/release/rust/src/delta_kernel-stamp/delta_kernel-build-*.log
-
-      - uses: actions/upload-artifact@v2
-        with:
-          name: ${{ inputs.extension_name }}-${{ inputs.duckdb_version }}-extension-${{matrix.duckdb_arch}}${{inputs.artifact_postfix}}
-          path: |
-            build/release/extension/${{ inputs.extension_name }}/${{ inputs.extension_name }}.duckdb_extension
-
-  windows:
-    name: Windows
-    runs-on: windows-latest
-    needs: generate_matrix
-    if: ${{ needs.generate_matrix.outputs.windows_matrix != '{}' && needs.generate_matrix.outputs.windows_matrix != '' }}
-    strategy:
-      matrix: ${{fromJson(needs.generate_matrix.outputs.windows_matrix)}}
-    env:
-      GEN: Ninja
-      VCPKG_TOOLCHAIN_PATH: ${{ github.workspace }}/vcpkg/scripts/buildsystems/vcpkg.cmake
-      VCPKG_TARGET_TRIPLET: ${{ matrix.vcpkg_triplet }}
-      BUILD_SHELL: ${{ inputs.build_duckdb_shell && '1' || '0' }}
-      DUCKDB_PLATFORM: ${{ matrix.duckdb_arch }}
-      CC: ${{ matrix.duckdb_arch == 'windows_amd64_rtools' && 'gcc' || '' }}
-      CXX: ${{ matrix.duckdb_arch == 'windows_amd64_rtools' && 'g++' || '' }}
-
-    steps:
-      - name: Keep \n line endings
-        shell: bash
-        run: |
-          git config --global core.autocrlf false
-          git config --global core.eol lf
-
-      - uses: actions/checkout@v3
-        with:
-          fetch-depth: 0
-          submodules: 'true'
-
-      - uses: actions/setup-python@v5
-        with:
-          python-version: '3.11'
-
-      - name: Setup Rust
-        uses: dtolnay/rust-toolchain@stable
-
-      - uses: r-lib/actions/setup-r@v2
-        if: matrix.duckdb_arch == 'windows_amd64_rtools'
-        with:
-          r-version: 'devel'
-          update-rtools: true
-          rtools-version: '42' # linker bug in 43
-
-      - name: Checkout DuckDB to version
-        run: |
-          cd duckdb
-          git checkout ${{ inputs.duckdb_version }}
-
-      - name: Setup Ccache
-        uses: hendrikmuhs/ccache-action@main
-        continue-on-error: true
-        with:
-          key: ${{ github.job }}-${{ matrix.duckdb_arch }}
-
-      - name: Setup vcpkg
-        uses: lukka/run-vcpkg@v11.1
-        with:
-          vcpkgGitCommitId: ${{ inputs.vcpkg_commit }}
-
-      - name: Build & test extension
-        env:
-          VCPKG_OVERLAY_TRIPLETS: "${{ github.workspace }}/overlay_triplets"
-          DUCKDB_PLATFORM: ${{ matrix.duckdb_arch }}
-        run: |
-          make test_release
-
-      - name: Error log
-        if: always()
-        run: |
-          cat build/release/rust/src/delta_kernel-stamp/delta_kernel-build-*.log
-
-      - uses: actions/upload-artifact@v2
-        with:
-          name: ${{ inputs.extension_name }}-${{ inputs.duckdb_version }}-extension-${{matrix.duckdb_arch}}${{inputs.artifact_postfix}}
-          path: |
-            build/release/extension/${{ inputs.extension_name }}/${{ inputs.extension_name }}.duckdb_extension
-
-  wasm:
-    name: DuckDB-Wasm
-    runs-on: ubuntu-latest
-    needs: generate_matrix
-    if: ${{ needs.generate_matrix.outputs.wasm_matrix != '{}' && needs.generate_matrix.outputs.wasm_matrix != '' }}
-    strategy:
-      matrix: ${{fromJson(needs.generate_matrix.outputs.wasm_matrix)}}
-    env:
-      VCPKG_TARGET_TRIPLET: ${{ matrix.vcpkg_triplet }}
-      VCPKG_TOOLCHAIN_PATH: ${{ github.workspace }}/vcpkg/scripts/buildsystems/vcpkg.cmake
-      GEN: Ninja
-      DUCKDB_PLATFORM: ${{ matrix.duckdb_arch }}
-
-    steps:
-      - uses: actions/checkout@v3
-        with:
-          fetch-depth: 0
-          submodules: 'true'
-
-      - name: Checkout DuckDB to version
-        run: |
-          cd duckdb
-          git checkout ${{ inputs.duckdb_version }}
-
-      - uses: mymindstorm/setup-emsdk@v13
-        with:
-          version: 'latest'
-
-      - name: Setup vcpkg
-        uses: lukka/run-vcpkg@v11.1
-        with:
-          vcpkgGitCommitId: ${{ inputs.vcpkg_commit }}
-
-      - name: Setup Ccache
-        uses: hendrikmuhs/ccache-action@main
-        continue-on-error: true
-        with:
-          key: ${{ github.job }}-${{ matrix.duckdb_arch }}
-
-      - name: Build Wasm module
-        run: |
-          make ${{ matrix.duckdb_arch }}
-
-      - uses: actions/upload-artifact@v3
-        with:
-          name: ${{ inputs.extension_name }}-${{ inputs.duckdb_version }}-extension-${{matrix.duckdb_arch}}${{inputs.artifact_postfix}}
-          path: |
-            build/${{ matrix.duckdb_arch }}/${{ inputs.extension_name }}.duckdb_extension.wasm

From a392ea517720dfe08276d74e32f7e9e78c7585b9 Mon Sep 17 00:00:00 2001
From: Sam Ansmink <samansmink@hotmail.com>
Date: Wed, 3 Jul 2024 11:24:28 +0200
Subject: [PATCH 30/38] Update README.md

add newly supported platforms linux_arm64 and windows_amd64
---
 README.md | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index 686c252..e10bd74 100644
--- a/README.md
+++ b/README.md
@@ -5,8 +5,9 @@ tables, both local and remote.
 
 # Supported platforms
 The supported platforms are:
-- `linux_amd64` and `linux_amd64_gcc4`
+- `linux_amd64` and `linux_amd64_gcc4` and `linux_arm64`
 - `osx_amd64` and `osx_arm64`
+- `windows_amd64`
 
 Support for the [other](https://duckdb.org/docs/extensions/working_with_extensions#platforms) DuckDB platforms is 
 work-in-progress
@@ -68,4 +69,4 @@ To also run the tests on generated data:
 ```shell
 make generate-data
 GENERATED_DATA_AVAILABLE=1 make test
-```
\ No newline at end of file
+```

From 3e33b4967eac2cb4e6725eac52336fe0e319de59 Mon Sep 17 00:00:00 2001
From: Sam Ansmink <samansmink@hotmail.com>
Date: Fri, 12 Jul 2024 11:05:42 +0200
Subject: [PATCH 31/38] add support for minio, r2, gcs

---
 .github/workflows/LocalTesting.yml          |  87 ++++++++++++++
 extension-ci-tools                          |   2 +-
 extension_config.cmake                      |   7 ++
 scripts/create_minio_credential_file.sh     |  43 +++++++
 scripts/upload_test_files_to_minio.sh       |   4 +
 src/functions/delta_scan.cpp                |  75 +++++++++++-
 test/sql/cloud/minio_local/gcs_r2.test      |  93 +++++++++++++++
 test/sql/cloud/minio_local/minio_local.test | 121 ++++++++++++++++++++
 vcpkg.json                                  |   7 +-
 9 files changed, 432 insertions(+), 7 deletions(-)
 create mode 100755 scripts/create_minio_credential_file.sh
 create mode 100755 scripts/upload_test_files_to_minio.sh
 create mode 100644 test/sql/cloud/minio_local/gcs_r2.test
 create mode 100644 test/sql/cloud/minio_local/minio_local.test

diff --git a/.github/workflows/LocalTesting.yml b/.github/workflows/LocalTesting.yml
index ecdc23c..a424ebf 100644
--- a/.github/workflows/LocalTesting.yml
+++ b/.github/workflows/LocalTesting.yml
@@ -18,6 +18,7 @@ jobs:
       VCPKG_TOOLCHAIN_PATH: ${{ github.workspace }}/vcpkg/scripts/buildsystems/vcpkg.cmake
       AZURE_STORAGE_CONNECTION_STRING: 'DefaultEndpointsProtocol=http;AccountName=devstoreaccount1;AccountKey=Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==;BlobEndpoint=http://127.0.0.1:10000/devstoreaccount1;QueueEndpoint=http://127.0.0.1:10001/devstoreaccount1;TableEndpoint=http://127.0.0.1:10002/devstoreaccount1;'
       AZURE_STORAGE_ACCOUNT: devstoreaccount1
+      ACTIONS_ALLOW_USE_UNSECURE_NODE_VERSION: true
 
     steps:
     - uses: actions/checkout@v3
@@ -76,6 +77,92 @@ jobs:
         echo "## azurite"
         cat azurite_log.txt
 
+  minio-tests-linux:
+    name: Minio (local S3 test server) tests (Linux)
+    runs-on: ubuntu-latest
+    env:
+      S3_TEST_SERVER_AVAILABLE: 1
+      GEN: ninja
+      VCPKG_TARGET_TRIPLET: x64-linux
+      VCPKG_TOOLCHAIN_PATH: ${{ github.workspace }}/vcpkg/scripts/buildsystems/vcpkg.cmake
+
+    steps:
+      - uses: actions/checkout@v3
+        with:
+          fetch-depth: 0
+          submodules: 'true'
+
+      - name: Checkout DuckDB to version
+        if: ${{ matrix.duckdb_version != '<submodule_version>'}}
+        run: |
+          cd duckdb
+          git checkout ${{ matrix.duckdb_version }}
+
+      - uses: actions/setup-python@v4
+        with:
+          python-version: '3.11'
+
+      - name: Install Ninja
+        shell: bash
+        run: sudo apt-get update -y -qq && sudo apt-get install -y -qq ninja-build
+
+      - name: Setup Ccache
+        uses: hendrikmuhs/ccache-action@main
+        with:
+          key: ${{ github.job }}
+          save: ${{ github.ref == 'refs/heads/main' || github.repository != 'duckdb/duckdb' }}
+
+      - name: Setup vcpkg
+        uses: lukka/run-vcpkg@v11.1
+        with:
+          vcpkgGitCommitId: a1a1cbc975abf909a6c8985a6a2b8fe20bbd9bd6
+
+      - name: Build
+        shell: bash
+        run: make
+
+      - name: Start S3/HTTP test server
+        shell: bash
+        run: |
+          cd duckdb
+          mkdir data/attach_test
+          touch data/attach_test/attach.db
+          sudo ./scripts/install_s3_test_server.sh
+          source ./scripts/run_s3_test_server.sh
+          sleep 30
+
+      - name: Write AWS credentials file
+        shell: bash
+        run: |
+          ./scripts/create_minio_credential_file.sh
+
+      - name: Copy files to minio
+        shell: bash
+        env:
+          DUCKDB_MINIO_TEST_SERVER_AVAILABLE: 1
+          AWS_ACCESS_KEY_ID: minio_duckdb_user
+          AWS_SECRET_ACCESS_KEY: minio_duckdb_user_password
+          AWS_DEFAULT_REGION: eu-west-1
+          AWS_ENDPOINT: duckdb-minio.com:9000
+        run: |
+          ./scripts/upload_test_files_to_minio.sh
+
+      - name: Test
+        shell: bash
+        run: |
+          make test
+
+      - name: Run Env tests
+        shell: bash
+        env:
+          DUCKDB_MINIO_TEST_SERVER_AVAILABLE: 1
+          AWS_ACCESS_KEY_ID: minio_duckdb_user
+          AWS_SECRET_ACCESS_KEY: minio_duckdb_user_password
+          AWS_DEFAULT_REGION: eu-west-1
+          AWS_ENDPOINT: duckdb-minio.com:9000
+        run: |
+          ./build/release/test/unittest "*/test/sql/cloud/minio_local/*"
+
   generated-tests-linux:
     name: Generated Tests (Linux)
     runs-on: ubuntu-latest
diff --git a/extension-ci-tools b/extension-ci-tools
index 71b8a60..d6d09ae 160000
--- a/extension-ci-tools
+++ b/extension-ci-tools
@@ -1 +1 @@
-Subproject commit 71b8a603ea24b1ac8a2cff134aca28163576548f
+Subproject commit d6d09ae94e71ae74d21f71bed5f9057accbb7505
diff --git a/extension_config.cmake b/extension_config.cmake
index 16571c2..b2ba8c0 100644
--- a/extension_config.cmake
+++ b/extension_config.cmake
@@ -16,6 +16,13 @@ duckdb_extension_load(azure
         GIT_TAG 49b63dc8cd166952a0a34dfd54e6cfe5b823e05e
 )
 
+# Build the aws extension to test with credential providers
+duckdb_extension_load(aws
+        LOAD_TESTS
+        GIT_URL https://github.com/duckdb/duckdb_aws
+        GIT_TAG 3d1f5c8d0127ff7aaf127935721b197e5fdd95e5
+)
+
 # Build the tpch and tpcds extension for testing/benchmarking
 duckdb_extension_load(tpch)
 duckdb_extension_load(tpcds)
diff --git a/scripts/create_minio_credential_file.sh b/scripts/create_minio_credential_file.sh
new file mode 100755
index 0000000..c9f88d2
--- /dev/null
+++ b/scripts/create_minio_credential_file.sh
@@ -0,0 +1,43 @@
+#!/bin/bash
+# Warning: overwrites your existing aws credentials file!
+
+# Set the file path for the credentials file
+credentials_file=~/.aws/credentials
+
+# Set the file path for the config file
+config_file=~/.aws/config
+
+# create dir if not already exists
+mkdir -p ~/.aws
+
+# Create the credentials configuration
+credentials_str="[default]
+aws_access_key_id=minio_duckdb_user
+aws_secret_access_key=minio_duckdb_user_password
+
+[minio-testing-2]
+aws_access_key_id=minio_duckdb_user_2
+aws_secret_access_key=minio_duckdb_user_2_password
+
+[minio-testing-invalid]
+aws_access_key_id=minio_duckdb_user_invalid
+aws_secret_access_key=thispasswordiscompletelywrong
+aws_session_token=completelybogussessiontoken
+"
+
+# Write the credentials configuration to the file
+echo "$credentials_str" > "$credentials_file"
+
+# Create the credentials configuration
+config_str="[default]
+region=eu-west-1
+
+[profile minio-testing-2]
+region=eu-west-1
+
+[profile minio-testing-invalid]
+region=the-moon-123
+"
+
+# Write the config to the file
+echo "$config_str" > "$config_file"
\ No newline at end of file
diff --git a/scripts/upload_test_files_to_minio.sh b/scripts/upload_test_files_to_minio.sh
new file mode 100755
index 0000000..c5723c7
--- /dev/null
+++ b/scripts/upload_test_files_to_minio.sh
@@ -0,0 +1,4 @@
+#!/bin/bash
+
+aws s3 cp --endpoint-url http://duckdb-minio.com:9000 --recursive ./build/release/rust/src/delta_kernel/acceptance/tests/dat/out/reader_tests/generated "s3://test-bucket/dat"
+aws s3 cp --endpoint-url http://duckdb-minio.com:9000 --recursive ./build/release/rust/src/delta_kernel/acceptance/tests/dat/out/reader_tests/generated "s3://test-bucket-public/dat"
\ No newline at end of file
diff --git a/src/functions/delta_scan.cpp b/src/functions/delta_scan.cpp
index 5dae760..5d50c8b 100644
--- a/src/functions/delta_scan.cpp
+++ b/src/functions/delta_scan.cpp
@@ -112,7 +112,14 @@ static ffi::EngineBuilder* CreateBuilder(ClientContext &context, const string &p
     ffi::EngineBuilder* builder;
 
     // For "regular" paths we early out with the default builder config
-    if (!StringUtil::StartsWith(path, "s3://") && !StringUtil::StartsWith(path, "azure://") && !StringUtil::StartsWith(path, "az://") && !StringUtil::StartsWith(path, "abfs://") && !StringUtil::StartsWith(path, "abfss://")) {
+    if (!StringUtil::StartsWith(path, "s3://") &&
+        !StringUtil::StartsWith(path, "gcs://") &&
+        !StringUtil::StartsWith(path, "gs://") &&
+        !StringUtil::StartsWith(path, "r2://") &&
+        !StringUtil::StartsWith(path, "azure://") &&
+        !StringUtil::StartsWith(path, "az://") &&
+        !StringUtil::StartsWith(path, "abfs://") &&
+        !StringUtil::StartsWith(path, "abfss://")) {
         auto interface_builder_res = ffi::get_engine_builder(KernelUtils::ToDeltaString(path), DuckDBEngineError::AllocateError);
         return KernelUtils::UnpackResult(interface_builder_res, "get_engine_interface_builder for path " + path);
     }
@@ -130,6 +137,33 @@ static ffi::EngineBuilder* CreateBuilder(ClientContext &context, const string &p
         bucket = path.substr(5, end_of_container-5);
         path_in_bucket = path.substr(end_of_container);
         secret_type = "s3";
+    } else if (StringUtil::StartsWith(path, "gcs://")) {
+        auto end_of_container = path.find('/',6);
+
+        if(end_of_container == string::npos) {
+            throw IOException("Invalid gcs url passed to delta scan: %s", path);
+        }
+        bucket = path.substr(6, end_of_container-6);
+        path_in_bucket = path.substr(end_of_container);
+        secret_type = "gcs";
+    } else if (StringUtil::StartsWith(path, "gs://")) {
+        auto end_of_container = path.find('/',5);
+
+        if(end_of_container == string::npos) {
+            throw IOException("Invalid gcs url passed to delta scan: %s", path);
+        }
+        bucket = path.substr(5, end_of_container-5);
+        path_in_bucket = path.substr(end_of_container);
+        secret_type = "gcs";
+    } else if (StringUtil::StartsWith(path, "r2://")) {
+        auto end_of_container = path.find('/',5);
+
+        if(end_of_container == string::npos) {
+            throw IOException("Invalid gcs url passed to delta scan: %s", path);
+        }
+        bucket = path.substr(5, end_of_container-5);
+        path_in_bucket = path.substr(end_of_container);
+        secret_type = "r2";
     } else if ((StringUtil::StartsWith(path, "azure://")) || (StringUtil::StartsWith(path, "abfss://"))) {
         auto end_of_container = path.find('/',8);
 
@@ -159,8 +193,18 @@ static ffi::EngineBuilder* CreateBuilder(ClientContext &context, const string &p
         secret_type = "azure";
     }
 
-    auto interface_builder_res = ffi::get_engine_builder(KernelUtils::ToDeltaString(path), DuckDBEngineError::AllocateError);
-    builder = KernelUtils::UnpackResult(interface_builder_res, "get_engine_interface_builder for path " + path);
+    // We need to substitute DuckDB's usage of s3 and r2 paths because delta kernel needs to just interpret them as s3 protocol servers.
+    string cleaned_path;
+    if (StringUtil::StartsWith(path, "r2://") || StringUtil::StartsWith(path, "gs://") ) {
+        cleaned_path = "s3://" + path.substr(5);
+    } else if (StringUtil::StartsWith(path, "gcs://")) {
+        cleaned_path = "s3://" + path.substr(6);
+    } else {
+        cleaned_path = path;
+    }
+
+    auto interface_builder_res = ffi::get_engine_builder(KernelUtils::ToDeltaString(cleaned_path), DuckDBEngineError::AllocateError);
+    builder = KernelUtils::UnpackResult(interface_builder_res, "get_engine_interface_builder for path " + cleaned_path);
 
     // For S3 or Azure paths we need to trim the url, set the container, and fetch a potential secret
     auto &secret_manager = SecretManager::Get(context);
@@ -170,18 +214,24 @@ static ffi::EngineBuilder* CreateBuilder(ClientContext &context, const string &p
 
     // No secret: nothing left to do here!
     if (!secret_match.HasMatch()) {
+        if (StringUtil::StartsWith(path, "r2://") || StringUtil::StartsWith(path, "gs://") || StringUtil::StartsWith(path, "gcs://")) {
+            throw NotImplementedException("Can not scan a gcs:// gs:// or r2:// url without a secret providing its endpoint currently. Please create an R2 or GCS secret containing the credentials for this endpoint and try again.");
+        }
+
         return builder;
     }
     const auto &kv_secret = dynamic_cast<const KeyValueSecret &>(*secret_match.secret_entry->secret);
 
-
     // Here you would need to add the logic for setting the builder options for Azure
     // This is just a placeholder and will need to be replaced with the actual logic
-    if (secret_type == "s3") {
+    if (secret_type == "s3" || secret_type == "gcs" || secret_type == "r2") {
         auto key_id = kv_secret.TryGetValue("key_id").ToString();
         auto secret = kv_secret.TryGetValue("secret").ToString();
         auto session_token = kv_secret.TryGetValue("session_token").ToString();
         auto region = kv_secret.TryGetValue("region").ToString();
+        auto endpoint = kv_secret.TryGetValue("endpoint").ToString();
+        auto use_ssl = kv_secret.TryGetValue("use_ssl").ToString();
+        auto url_style = kv_secret.TryGetValue("url_style").ToString();
 
         if (key_id.empty() && secret.empty()) {
             ffi::set_builder_option(builder, KernelUtils::ToDeltaString("skip_signature"), KernelUtils::ToDeltaString("true"));
@@ -196,6 +246,21 @@ static ffi::EngineBuilder* CreateBuilder(ClientContext &context, const string &p
         if (!session_token.empty()) {
             ffi::set_builder_option(builder, KernelUtils::ToDeltaString("aws_session_token"), KernelUtils::ToDeltaString(session_token));
         }
+        if (!endpoint.empty() && endpoint != "s3.amazonaws.com") {
+            if(!StringUtil::StartsWith(endpoint, "https://") && !StringUtil::StartsWith(endpoint, "http://")) {
+                if(use_ssl == "1" || use_ssl == "NULL") {
+                    endpoint = "https://" + endpoint;
+                } else {
+                    endpoint = "http://" + endpoint;
+                }
+            }
+
+            if (StringUtil::StartsWith(endpoint, "http://")) {
+                ffi::set_builder_option(builder, KernelUtils::ToDeltaString("allow_http"), KernelUtils::ToDeltaString("true"));
+            }
+            ffi::set_builder_option(builder, KernelUtils::ToDeltaString("aws_endpoint"), KernelUtils::ToDeltaString(endpoint));
+        }
+
         ffi::set_builder_option(builder, KernelUtils::ToDeltaString("aws_region"), KernelUtils::ToDeltaString(region));
 
     } else if (secret_type == "azure") {
diff --git a/test/sql/cloud/minio_local/gcs_r2.test b/test/sql/cloud/minio_local/gcs_r2.test
new file mode 100644
index 0000000..319380c
--- /dev/null
+++ b/test/sql/cloud/minio_local/gcs_r2.test
@@ -0,0 +1,93 @@
+# name: test/sql/cloud/minio_local/gcs_r2.test
+# description: test delta extension with GCS and R2
+# group: [aws]
+
+require httpfs
+
+require parquet
+
+require delta
+
+require aws
+
+require-env DUCKDB_MINIO_TEST_SERVER_AVAILABLE
+
+require-env AWS_ACCESS_KEY_ID
+
+require-env AWS_SECRET_ACCESS_KEY
+
+require-env AWS_DEFAULT_REGION
+
+require-env AWS_ENDPOINT
+
+statement ok
+set secret_directory='__TEST_DIR__/minio_local_gcs_env'
+
+statement error
+FROM delta_scan('gcs://test-bucket/dat/all_primitive_types/delta')
+----
+Can not scan a gcs:// gs:// or r2:// url without a secret providing its endpoint currently. Please create an R2 or GCS secret containing the credentials for this endpoint and try again.
+
+statement error
+FROM delta_scan('gs://test-bucket/dat/all_primitive_types/delta')
+----
+Can not scan a gcs:// gs:// or r2:// url without a secret providing its endpoint currently. Please create an R2 or GCS secret containing the credentials for this endpoint and try again.
+
+statement error
+FROM delta_scan('r2://test-bucket/dat/all_primitive_types/delta')
+----
+Can not scan a gcs:// gs:// or r2:// url without a secret providing its endpoint currently. Please create an R2 or GCS secret containing the credentials for this endpoint and try again.
+
+# create a fake gcs secret
+statement ok
+CREATE SECRET (
+    TYPE GCS,
+	KEY_ID '${AWS_ACCESS_KEY_ID}',
+	SECRET '${AWS_SECRET_ACCESS_KEY}',
+	REGION '${AWS_DEFAULT_REGION}',
+	ENDPOINT '${AWS_ENDPOINT}',
+	USE_SSL false 
+)
+
+query I
+SELECT int32
+FROM delta_scan('gcs://test-bucket-public/dat/all_primitive_types/delta')
+----
+0
+1
+2
+3
+4
+
+query I
+SELECT int32
+FROM delta_scan('gs://test-bucket-public/dat/all_primitive_types/delta')
+----
+0
+1
+2
+3
+4
+
+# create a fake r2 secret
+statement ok
+CREATE SECRET s1 (
+    TYPE R2,
+    PROVIDER config,
+    account_id 'some_bogus_account',
+    KEY_ID '${AWS_ACCESS_KEY_ID}',
+	SECRET '${AWS_SECRET_ACCESS_KEY}',
+	REGION '${AWS_DEFAULT_REGION}',
+	ENDPOINT '${AWS_ENDPOINT}',
+	USE_SSL false 
+)
+
+query I
+SELECT int32
+FROM delta_scan('r2://test-bucket-public/dat/all_primitive_types/delta')
+----
+0
+1
+2
+3
+4
diff --git a/test/sql/cloud/minio_local/minio_local.test b/test/sql/cloud/minio_local/minio_local.test
new file mode 100644
index 0000000..28031f7
--- /dev/null
+++ b/test/sql/cloud/minio_local/minio_local.test
@@ -0,0 +1,121 @@
+# name: test/sql/cloud/minio_local/aws_secret_chains_env.test
+# description: test delta extension with a local minio installation
+# group: [aws]
+
+require httpfs
+
+require parquet
+
+require delta
+
+require aws
+
+require-env DUCKDB_MINIO_TEST_SERVER_AVAILABLE
+
+require-env AWS_ACCESS_KEY_ID
+
+require-env AWS_SECRET_ACCESS_KEY
+
+require-env AWS_DEFAULT_REGION
+
+require-env AWS_ENDPOINT
+
+statement ok
+set secret_directory='__TEST_DIR__/aws_secret_chains_env'
+
+# Secret with just the endpoint
+statement ok
+CREATE SECRET s1 (
+    TYPE S3,
+    ENDPOINT '${AWS_ENDPOINT}',
+	USE_SSL false
+);
+
+# We need auth for this
+statement error
+SELECT int32
+FROM delta_scan('s3://test-bucket/dat/all_primitive_types/delta')
+----
+IO Error
+
+# unauthenticated query is fine!
+query I
+SELECT int32
+FROM delta_scan('s3://test-bucket-public/dat/all_primitive_types/delta')
+----
+0
+1
+2
+3
+4
+
+statement ok
+DROP SECRET S1;
+
+# Now we create a config secret with credentials
+statement ok
+CREATE SECRET s1 (
+    TYPE S3,
+    PROVIDER config,
+    KEY_ID '${AWS_ACCESS_KEY_ID}',
+    SECRET '${AWS_SECRET_ACCESS_KEY}',
+    REGION '${AWS_DEFAULT_REGION}',
+    ENDPOINT '${AWS_ENDPOINT}',
+    USE_SSL false
+);
+
+# Public bucket now does work
+query I
+SELECT int32
+FROM delta_scan('s3://test-bucket-public/dat/all_primitive_types/delta')
+----
+0
+1
+2
+3
+4
+
+# Private bucket now does work too
+query I
+SELECT int32
+FROM delta_scan('s3://test-bucket/dat/all_primitive_types/delta')
+----
+0
+1
+2
+3
+4
+
+statement ok
+DROP SECRET S1;
+
+# Now we create a credential chain secret that searches the env vars automatically
+statement ok
+CREATE SECRET s1 (
+    TYPE S3,
+    PROVIDER credential_chain,
+    ENDPOINT '${AWS_ENDPOINT}',
+    USE_SSL false
+);
+
+# Still works!
+query I
+SELECT int32
+FROM delta_scan('s3://test-bucket-public/dat/all_primitive_types/delta')
+----
+0
+1
+2
+3
+4
+
+# Still works!
+query I
+SELECT int32
+FROM delta_scan('s3://test-bucket/dat/all_primitive_types/delta')
+----
+0
+1
+2
+3
+4
diff --git a/vcpkg.json b/vcpkg.json
index 0cefd94..8e8245d 100644
--- a/vcpkg.json
+++ b/vcpkg.json
@@ -3,6 +3,11 @@
     "azure-identity-cpp",
     "azure-storage-blobs-cpp",
     "azure-storage-files-datalake-cpp",
-    "openssl"
+    "openssl",
+    "zlib",
+    {
+      "name": "aws-sdk-cpp",
+      "features": [ "sts" ]
+    }
   ]
 }
\ No newline at end of file

From e0add7b43b2d394e912acbf0ff01ff2a08a63bd8 Mon Sep 17 00:00:00 2001
From: Sam Ansmink <samansmink@hotmail.com>
Date: Mon, 15 Jul 2024 11:23:24 +0200
Subject: [PATCH 32/38] fix bug with count star and partition values

---
 src/functions/delta_scan.cpp |  3 +++
 test/sql/dat/all.test        | 41 +++++++++++++++++++++++++++++++++++-
 2 files changed, 43 insertions(+), 1 deletion(-)

diff --git a/src/functions/delta_scan.cpp b/src/functions/delta_scan.cpp
index 5d50c8b..e07c391 100644
--- a/src/functions/delta_scan.cpp
+++ b/src/functions/delta_scan.cpp
@@ -557,6 +557,9 @@ void DeltaMultiFileReader::FinalizeBind(const MultiFileReaderOptions &file_optio
     if (!file_metadata->partition_map.empty()) {
         for (idx_t i = 0; i < global_column_ids.size(); i++) {
             column_t col_id = global_column_ids[i];
+            if (IsRowIdColumnId(col_id)) {
+                continue;
+            }
             auto col_partition_entry = file_metadata->partition_map.find(global_names[col_id]);
             if (col_partition_entry != file_metadata->partition_map.end()) {
                 // Todo: use https://github.com/delta-io/delta/blob/master/PROTOCOL.md#partition-value-serialization
diff --git a/test/sql/dat/all.test b/test/sql/dat/all.test
index c25c646..f6332da 100644
--- a/test/sql/dat/all.test
+++ b/test/sql/dat/all.test
@@ -26,7 +26,6 @@ query I rowsort nested_types
 SELECT *
 FROM delta_scan('${DAT_PATH}/out/reader_tests/generated/nested_types/delta')
 ----
-
 query I rowsort nested_types
 SELECT *
 FROM parquet_scan('${DAT_PATH}/out/reader_tests/generated/nested_types/expected/latest/**/*.parquet')
@@ -43,6 +42,16 @@ SELECT *
 FROM parquet_scan('${DAT_PATH}/out/reader_tests/generated/basic_append/expected/latest/**/*.parquet')
 ----
 
+query I rowsort basic_append_count
+SELECT count(*)
+FROM delta_scan('${DAT_PATH}/out/reader_tests/generated/basic_append/delta')
+----
+
+query I rowsort basic_append_count
+SELECT count(*)
+FROM parquet_scan('${DAT_PATH}/out/reader_tests/generated/basic_append/expected/latest/**/*.parquet')
+----
+
 # with_schema_change
 query I rowsort with_checkpoint
 SELECT *
@@ -54,6 +63,16 @@ SELECT *
 FROM parquet_scan('${DAT_PATH}/out/reader_tests/generated/with_schema_change/expected/latest/**/*.parquet')
 ----
 
+query I rowsort with_checkpoint_count
+SELECT count(*)
+FROM delta_scan('${DAT_PATH}/out/reader_tests/generated/with_schema_change/delta')
+----
+
+query I rowsort with_checkpoint_count
+SELECT count(*)
+FROM parquet_scan('${DAT_PATH}/out/reader_tests/generated/with_schema_change/expected/latest/**/*.parquet')
+----
+
 # basic_partitioned
 query I rowsort basic_partitioned
 SELECT *
@@ -65,6 +84,16 @@ SELECT *
 FROM parquet_scan('${DAT_PATH}/out/reader_tests/generated/basic_partitioned/expected/latest/**/*.parquet')
 ----
 
+query I rowsort basic_partitioned_count
+SELECT count(*)
+FROM delta_scan('${DAT_PATH}/out/reader_tests/generated/basic_partitioned/delta')
+----
+
+query I rowsort basic_partitioned_count
+SELECT count(*)
+FROM parquet_scan('${DAT_PATH}/out/reader_tests/generated/basic_partitioned/expected/latest/**/*.parquet')
+----
+
 # multi_partitioned
 query I rowsort multi_partitioned
 SELECT *
@@ -76,6 +105,16 @@ SELECT letter, date, decode(data) as data, number
 FROM parquet_scan('${DAT_PATH}/out/reader_tests/generated/multi_partitioned/expected/latest/**/*.parquet')
 ----
 
+query I rowsort multi_partitioned_count
+SELECT count(*)
+FROM delta_scan('${DAT_PATH}/out/reader_tests/generated/multi_partitioned/delta')
+----
+
+query I rowsort multi_partitioned_count
+SELECT count(*)
+FROM parquet_scan('${DAT_PATH}/out/reader_tests/generated/multi_partitioned/expected/latest/**/*.parquet')
+----
+
 # TODO: fix this
 require notwindows
 

From 4693e29550ef977bfed5b7593bd94b4e4b813b66 Mon Sep 17 00:00:00 2001
From: Sam Ansmink <samansmink@hotmail.com>
Date: Fri, 19 Jul 2024 11:17:30 +0200
Subject: [PATCH 33/38] bump kernel to v0.2.0

---
 CMakeLists.txt                       |   2 +-
 src/functions/delta_scan.cpp         |   2 +-
 src/include/delta_kernel_ffi.hpp     | 278 +++++++++++++++++++++------
 src/include/delta_utils.hpp          |  10 +-
 src/include/functions/delta_scan.hpp |   2 +-
 test/sql/dat/all.test                |  13 +-
 6 files changed, 226 insertions(+), 81 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 6797b39..4aeaefd 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -99,7 +99,7 @@ ExternalProject_Add(
         GIT_REPOSITORY "https://github.com/delta-incubator/delta-kernel-rs"
         # WARNING: the FFI headers are currently pinned due to the C linkage issue of the c++ headers. Currently, when bumping
         # the kernel version, the produced header in ./src/include/delta_kernel_ffi.hpp should be also bumped, applying the fix
-        GIT_TAG ed2b80b127984481adba8e59879f39b9e5f871d1
+        GIT_TAG v0.2.0
         # Prints the env variables passed to the cargo build to the terminal, useful in debugging because passing them
         # through CMake is an error-prone mess
         CONFIGURE_COMMAND ${CMAKE_COMMAND} -E env ${RUST_UNSET_ENV_VARS} ${RUST_ENV_VARS} env
diff --git a/src/functions/delta_scan.cpp b/src/functions/delta_scan.cpp
index e07c391..d4a30fd 100644
--- a/src/functions/delta_scan.cpp
+++ b/src/functions/delta_scan.cpp
@@ -83,7 +83,7 @@ static void visit_callback(ffi::NullableCvoid engine_context, struct ffi::Kernel
     context->metadata.back()->partition_map = std::move(constant_map);
 }
 
-  static void visit_data(void *engine_context, ffi::EngineData* engine_data, const struct ffi::KernelBoolSlice selection_vec) {
+  static void visit_data(void *engine_context, ffi::ExclusiveEngineData* engine_data, const struct ffi::KernelBoolSlice selection_vec) {
     ffi::visit_scan_data(engine_data, selection_vec, engine_context, visit_callback);
 }
 
diff --git a/src/include/delta_kernel_ffi.hpp b/src/include/delta_kernel_ffi.hpp
index de22390..15db00d 100644
--- a/src/include/delta_kernel_ffi.hpp
+++ b/src/include/delta_kernel_ffi.hpp
@@ -49,15 +49,9 @@ enum class KernelError {
   InvalidTableLocationError,
   InvalidDecimalError,
   InvalidStructDataError,
+  InternalError,
 };
 
-#if defined(DEFINE_DEFAULT_ENGINE)
-/// Struct to allow binding to the arrow [C Data
-/// Interface](https://arrow.apache.org/docs/format/CDataInterface.html). This includes the data and
-/// the schema.
-struct ArrowFFIData;
-#endif
-
 struct CStringMap;
 
 /// this struct can be used by an engine to materialize a selection vector
@@ -71,7 +65,9 @@ struct EngineBuilder;
 /// an opaque struct that encapsulates data read by an engine. this handle can be passed back into
 /// some kernel calls to operate on the data, or can be converted into the raw data as read by the
 /// [`delta_kernel::Engine`] by calling [`get_raw_engine_data`]
-struct EngineData;
+struct ExclusiveEngineData;
+
+struct ExclusiveFileReadResultIterator;
 
 struct KernelExpressionVisitorState;
 
@@ -83,16 +79,55 @@ struct SharedScan;
 
 struct SharedScanDataIterator;
 
+struct SharedSchema;
+
 struct SharedSnapshot;
 
+struct StringSliceIterator;
+
 /// Represents an owned slice of boolean values allocated by the kernel. Any time the engine
 /// receives a `KernelBoolSlice` as a return value from a kernel method, engine is responsible
-/// to free that slice, by calling [super::drop_bool_slice] exactly once.
+/// to free that slice, by calling [super::free_bool_slice] exactly once.
 struct KernelBoolSlice {
   bool *ptr;
   uintptr_t len;
 };
 
+/// Represents an object that crosses the FFI boundary and which outlives the scope that created
+/// it. It can be passed freely between rust code and external code. The
+///
+/// An accompanying [`HandleDescriptor`] trait defines the behavior of each handle type:
+///
+/// * The true underlying ("target") type the handle represents. For safety reasons, target type
+/// must always be [`Send`].
+///
+/// * Mutable (`Box`-like) vs. shared (`Arc`-like). For safety reasons, the target type of a
+/// shared handle must always be [`Send`]+[`Sync`].
+///
+/// * Sized vs. unsized. Sized types allow handle operations to be implemented more efficiently.
+///
+/// # Validity
+///
+/// A `Handle` is _valid_ if all of the following hold:
+///
+/// * It was created by a call to [`Handle::from`]
+/// * Not yet dropped by a call to [`Handle::drop_handle`]
+/// * Not yet consumed by a call to [`Handle::into_inner`]
+///
+/// Additionally, in keeping with the [`Send`] contract, multi-threaded external code must
+/// enforce mutual exclusion -- no mutable handle should ever be passed to more than one kernel
+/// API call at a time. If thread races are possible, the handle should be protected with a
+/// mutex. Due to Rust [reference
+/// rules](https://doc.rust-lang.org/book/ch04-02-references-and-borrowing.html#the-rules-of-references),
+/// this requirement applies even for API calls that appear to be read-only (because Rust code
+/// always receives the handle as mutable).
+///
+/// NOTE: Because the underlying type is always [`Sync`], multi-threaded external code can
+/// freely access shared (non-mutable) handles.
+///
+template<typename H>
+using Handle = H*;
+
 /// An error that can be returned to the engine. Engines that wish to associate additional
 /// information can define and use any type that is [pointer
 /// interconvertible](https://en.cppreference.com/w/cpp/language/static_cast#pointer-interconvertible)
@@ -155,40 +190,11 @@ struct KernelStringSlice {
 
 using AllocateErrorFn = EngineError*(*)(KernelError etype, KernelStringSlice msg);
 
-/// Represents an object that crosses the FFI boundary and which outlives the scope that created
-/// it. It can be passed freely between rust code and external code. The
-///
-/// An accompanying [`HandleDescriptor`] trait defines the behavior of each handle type:
-///
-/// * The true underlying ("target") type the handle represents. For safety reasons, target type
-/// must always be [`Send`].
-///
-/// * Mutable (`Box`-like) vs. shared (`Arc`-like). For safety reasons, the target type of a
-/// shared handle must always be [`Send`]+[`Sync`].
-///
-/// * Sized vs. unsized. Sized types allow handle operations to be implemented more efficiently.
-///
-/// # Validity
-///
-/// A `Handle` is _valid_ if all of the following hold:
-///
-/// * It was created by a call to [`Handle::from`]
-/// * Not yet dropped by a call to [`Handle::drop_handle`]
-/// * Not yet consumed by a call to [`Handle::into_inner`]
-///
-/// Additionally, in keeping with the [`Send`] contract, multi-threaded external code must
-/// enforce mutual exclusion -- no mutable handle should ever be passed to more than one kernel
-/// API call at a time. If thread races are possible, the handle should be protected with a
-/// mutex. Due to Rust [reference
-/// rules](https://doc.rust-lang.org/book/ch04-02-references-and-borrowing.html#the-rules-of-references),
-/// this requirement applies even for API calls that appear to be read-only (because Rust code
-/// always receives the handle as mutable).
-///
-/// NOTE: Because the underlying type is always [`Sync`], multi-threaded external code can
-/// freely access shared (non-mutable) handles.
-///
-template<typename H>
-using Handle = H*;
+using NullableCvoid = void*;
+
+/// Allow engines to allocate strings of their own type. the contract of calling a passed allocate
+/// function is that `kernel_str` is _only_ valid until the return from this function
+using AllocateStringFn = NullableCvoid(*)(KernelStringSlice kernel_str);
 
 /// The `EngineSchemaVisitor` defines a visitor system to allow engines to build their own
 /// representation of a schema from a particular schema within kernel.
@@ -283,6 +289,68 @@ struct EngineIterator {
   const void *(*get_next)(void *data);
 };
 
+struct FileMeta {
+  KernelStringSlice path;
+  int64_t last_modified;
+  uintptr_t size;
+};
+
+/// ABI-compatible struct for ArrowArray from C Data Interface
+/// See <https://arrow.apache.org/docs/format/CDataInterface.html#structure-definitions>
+///
+/// ```
+/// # use arrow_data::ArrayData;
+/// # use arrow_data::ffi::FFI_ArrowArray;
+/// fn export_array(array: &ArrayData) -> FFI_ArrowArray {
+///     FFI_ArrowArray::new(array)
+/// }
+/// ```
+struct FFI_ArrowArray {
+  int64_t length;
+  int64_t null_count;
+  int64_t offset;
+  int64_t n_buffers;
+  int64_t n_children;
+  const void **buffers;
+  FFI_ArrowArray **children;
+  FFI_ArrowArray *dictionary;
+  void (*release)(FFI_ArrowArray *arg1);
+  void *private_data;
+};
+
+/// ABI-compatible struct for `ArrowSchema` from C Data Interface
+/// See <https://arrow.apache.org/docs/format/CDataInterface.html#structure-definitions>
+///
+/// ```
+/// # use arrow_schema::DataType;
+/// # use arrow_schema::ffi::FFI_ArrowSchema;
+/// fn array_schema(data_type: &DataType) -> FFI_ArrowSchema {
+///     FFI_ArrowSchema::try_from(data_type).unwrap()
+/// }
+/// ```
+///
+struct FFI_ArrowSchema {
+  const char *format;
+  const char *name;
+  const char *metadata;
+  int64_t flags;
+  int64_t n_children;
+  FFI_ArrowSchema **children;
+  FFI_ArrowSchema *dictionary;
+  void (*release)(FFI_ArrowSchema *arg1);
+  void *private_data;
+};
+
+#if defined(DEFINE_DEFAULT_ENGINE)
+/// Struct to allow binding to the arrow [C Data
+/// Interface](https://arrow.apache.org/docs/format/CDataInterface.html). This includes the data and
+/// the schema.
+struct ArrowFFIData {
+  FFI_ArrowArray array;
+  FFI_ArrowSchema schema;
+};
+#endif
+
 /// A predicate that can be used to skip data when scanning.
 ///
 /// When invoking [`scan::scan`], The engine provides a pointer to the (engine's native) predicate,
@@ -298,12 +366,6 @@ struct EnginePredicate {
   uintptr_t (*visitor)(void *predicate, KernelExpressionVisitorState *state);
 };
 
-using NullableCvoid = void*;
-
-/// Allow engines to allocate strings of their own type. the contract of calling a passed allocate
-/// function is that `kernel_str` is _only_ valid until the return from this function
-using AllocateStringFn = NullableCvoid(*)(KernelStringSlice kernel_str);
-
 /// Give engines an easy way to consume stats
 struct Stats {
   /// For any file where the deletion vector is not present (see [`DvInfo::has_vector`]), the
@@ -339,7 +401,14 @@ extern "C" {
 /// # Safety
 ///
 /// Caller is responsible for passing a valid handle.
-void drop_bool_slice(KernelBoolSlice slice);
+void free_bool_slice(KernelBoolSlice slice);
+
+/// Drop an `ExclusiveEngineData`.
+///
+/// # Safety
+///
+/// Caller is responsible for passing a valid handle as engine_data
+void free_engine_data(Handle<ExclusiveEngineData> engine_data);
 
 #if defined(DEFINE_DEFAULT_ENGINE)
 /// Get a "builder" that can be used to construct an engine. The function
@@ -390,7 +459,7 @@ ExternResult<Handle<SharedExternEngine>> get_sync_engine(AllocateErrorFn allocat
 /// # Safety
 ///
 /// Caller is responsible for passing a valid handle.
-void drop_engine(Handle<SharedExternEngine> engine);
+void free_engine(Handle<SharedExternEngine> engine);
 
 /// Get the latest snapshot from the specified table
 ///
@@ -403,7 +472,7 @@ ExternResult<Handle<SharedSnapshot>> snapshot(KernelStringSlice path,
 /// # Safety
 ///
 /// Caller is responsible for passing a valid handle.
-void drop_snapshot(Handle<SharedSnapshot> snapshot);
+void free_snapshot(Handle<SharedSnapshot> snapshot);
 
 /// Get the version of the specified snapshot
 ///
@@ -412,6 +481,27 @@ void drop_snapshot(Handle<SharedSnapshot> snapshot);
 /// Caller is responsible for passing a valid handle.
 uint64_t version(Handle<SharedSnapshot> snapshot);
 
+/// Get the resolved root of the table. This should be used in any future calls that require
+/// constructing a path
+///
+/// # Safety
+///
+/// Caller is responsible for passing a valid handle.
+NullableCvoid snapshot_table_root(Handle<SharedSnapshot> snapshot, AllocateStringFn allocate_fn);
+
+/// # Safety
+///
+/// The iterator must be valid (returned by [kernel_scan_data_init]) and not yet freed by
+/// [kernel_scan_data_free]. The visitor function pointer must be non-null.
+bool string_slice_next(Handle<StringSliceIterator> data,
+                       NullableCvoid engine_context,
+                       void (*engine_visitor)(NullableCvoid engine_context, KernelStringSlice slice));
+
+/// # Safety
+///
+/// Caller is responsible for (at most once) passing a valid pointer to a [`StringSliceIterator`]
+void free_string_slice_data(Handle<StringSliceIterator> data);
+
 /// Visit the schema of the passed `SnapshotHandle`, using the provided `visitor`. See the
 /// documentation of [`EngineSchemaVisitor`] for a description of how this visitor works.
 ///
@@ -464,14 +554,49 @@ uintptr_t visit_expression_literal_double(KernelExpressionVisitorState *state, d
 
 uintptr_t visit_expression_literal_bool(KernelExpressionVisitorState *state, bool value);
 
-/// Allow an engine to "unwrap" an [`EngineData`] into the raw pointer for the case it wants
+/// Call the engine back with the next `EngingeData` batch read by Parquet/Json handler. The
+/// _engine_ "owns" the data that is passed into the `engine_visitor`, since it is allocated by the
+/// `Engine` being used for log-replay. If the engine wants the kernel to free this data, it _must_
+/// call [`free_engine_data`] on it.
+///
+/// # Safety
+///
+/// The iterator must be valid (returned by [`read_parquet_file`]) and not yet freed by
+/// [`free_read_result_iter`]. The visitor function pointer must be non-null.
+ExternResult<bool> read_result_next(Handle<ExclusiveFileReadResultIterator> data,
+                                    NullableCvoid engine_context,
+                                    void (*engine_visitor)(NullableCvoid engine_context,
+                                                           Handle<ExclusiveEngineData> engine_data));
+
+/// Free the memory from the passed read result iterator
+/// # Safety
+///
+/// Caller is responsible for (at most once) passing a valid pointer returned by a call to
+/// [`read_parquet_file`].
+void free_read_result_iter(Handle<ExclusiveFileReadResultIterator> data);
+
+/// Use the specified engine's [`delta_kernel::ParquetHandler`] to read the specified file.
+///
+/// # Safety
+/// Caller is responsible for calling with a valid `ExternEngineHandle` and `FileMeta`
+ExternResult<Handle<ExclusiveFileReadResultIterator>> read_parquet_file(Handle<SharedExternEngine> engine,
+                                                                        const FileMeta *file,
+                                                                        Handle<SharedSchema> physical_schema);
+
+/// Get the number of rows in an engine data
+///
+/// # Safety
+/// `data_handle` must be a valid pointer to a kernel allocated `ExclusiveEngineData`
+uintptr_t engine_data_length(Handle<ExclusiveEngineData> *data);
+
+/// Allow an engine to "unwrap" an [`ExclusiveEngineData`] into the raw pointer for the case it wants
 /// to use its own engine data format
 ///
 /// # Safety
 ///
-/// `data_handle` must be a valid pointer to a kernel allocated `EngineData`. The Engine must
+/// `data_handle` must be a valid pointer to a kernel allocated `ExclusiveEngineData`. The Engine must
 /// ensure the handle outlives the returned pointer.
-void *get_raw_engine_data(Handle<EngineData> data);
+void *get_raw_engine_data(Handle<ExclusiveEngineData> data);
 
 #if defined(DEFINE_DEFAULT_ENGINE)
 /// Get an [`ArrowFFIData`] to allow binding to the arrow [C Data
@@ -479,16 +604,16 @@ void *get_raw_engine_data(Handle<EngineData> data);
 /// the schema.
 ///
 /// # Safety
-/// data_handle must be a valid EngineData as read by the
+/// data_handle must be a valid ExclusiveEngineData as read by the
 /// [`delta_kernel::engine::default::DefaultEngine`] obtained from `get_default_engine`.
-ExternResult<ArrowFFIData*> get_raw_arrow_data(Handle<EngineData> data,
+ExternResult<ArrowFFIData*> get_raw_arrow_data(Handle<ExclusiveEngineData> data,
                                                Handle<SharedExternEngine> engine);
 #endif
 
 /// Drops a scan.
 /// # Safety
 /// Caller is responsible for passing a [valid][Handle#Validity] scan handle.
-void drop_scan(Handle<SharedScan> scan);
+void free_scan(Handle<SharedScan> scan);
 
 /// Get a [`Scan`] over the table specified by the passed snapshot.
 /// # Safety
@@ -505,10 +630,35 @@ ExternResult<Handle<SharedScan>> scan(Handle<SharedSnapshot> snapshot,
 /// Engine is responsible for providing a valid scan pointer
 Handle<SharedGlobalScanState> get_global_scan_state(Handle<SharedScan> scan);
 
+/// Get the kernel view of the physical read schema that an engine should read from parquet file in
+/// a scan
+///
 /// # Safety
+/// Engine is responsible for providing a valid GlobalScanState pointer
+Handle<SharedSchema> get_global_read_schema(Handle<SharedGlobalScanState> state);
+
+/// Free a global read schema
+///
+/// # Safety
+/// Engine is responsible for providing a valid schema obtained via [`get_global_read_schema`]
+void free_global_read_schema(Handle<SharedSchema> schema);
+
+/// Get a count of the number of partition columns for this scan
 ///
+/// # Safety
+/// Caller is responsible for passing a valid global scan pointer.
+uintptr_t get_partition_column_count(Handle<SharedGlobalScanState> state);
+
+/// Get an iterator of the list of partition columns for this scan.
+///
+/// # Safety
 /// Caller is responsible for passing a valid global scan pointer.
-void drop_global_scan_state(Handle<SharedGlobalScanState> state);
+Handle<StringSliceIterator> get_partition_columns(Handle<SharedGlobalScanState> state);
+
+/// # Safety
+///
+/// Caller is responsible for passing a valid global scan state pointer.
+void free_global_scan_state(Handle<SharedGlobalScanState> state);
 
 /// Get an iterator over the data needed to perform a scan. This will return a
 /// [`KernelScanDataIterator`] which can be passed to [`kernel_scan_data_next`] to get the actual
@@ -523,18 +673,18 @@ ExternResult<Handle<SharedScanDataIterator>> kernel_scan_data_init(Handle<Shared
 /// # Safety
 ///
 /// The iterator must be valid (returned by [kernel_scan_data_init]) and not yet freed by
-/// [kernel_scan_data_free]. The visitor function pointer must be non-null.
+/// [`free_kernel_scan_data`]. The visitor function pointer must be non-null.
 ExternResult<bool> kernel_scan_data_next(Handle<SharedScanDataIterator> data,
                                          NullableCvoid engine_context,
                                          void (*engine_visitor)(NullableCvoid engine_context,
-                                                                Handle<EngineData> engine_data,
+                                                                Handle<ExclusiveEngineData> engine_data,
                                                                 KernelBoolSlice selection_vector));
 
 /// # Safety
 ///
 /// Caller is responsible for (at most once) passing a valid pointer returned by a call to
 /// [`kernel_scan_data_init`].
-void kernel_scan_data_free(Handle<SharedScanDataIterator> data);
+void free_kernel_scan_data(Handle<SharedScanDataIterator> data);
 
 /// allow probing into a CStringMap. If the specified key is in the map, kernel will call
 /// allocate_fn with the value associated with the key and return the value returned from that
@@ -559,8 +709,8 @@ ExternResult<KernelBoolSlice> selection_vector_from_dv(const DvInfo *dv_info,
 /// data which provides the data handle and selection vector as each element in the iterator.
 ///
 /// # Safety
-/// engine is responsbile for passing a valid [`EngineData`] and selection vector.
-void visit_scan_data(Handle<EngineData> data,
+/// engine is responsbile for passing a valid [`ExclusiveEngineData`] and selection vector.
+void visit_scan_data(Handle<ExclusiveEngineData> data,
                      KernelBoolSlice selection_vec,
                      NullableCvoid engine_context,
                      CScanCallback callback);
diff --git a/src/include/delta_utils.hpp b/src/include/delta_utils.hpp
index 37dc289..9b33c5c 100644
--- a/src/include/delta_utils.hpp
+++ b/src/include/delta_utils.hpp
@@ -102,11 +102,11 @@ struct TemplatedUniqueKernelPointer : public UniqueKernelPointer<KernelType> {
     };
 };
 
-typedef TemplatedUniqueKernelPointer<ffi::SharedSnapshot, ffi::drop_snapshot> KernelSnapshot;
-typedef TemplatedUniqueKernelPointer<ffi::SharedExternEngine, ffi::drop_engine> KernelExternEngine;
-typedef TemplatedUniqueKernelPointer<ffi::SharedScan, ffi::drop_scan> KernelScan;
-typedef TemplatedUniqueKernelPointer<ffi::SharedGlobalScanState, ffi::drop_global_scan_state> KernelGlobalScanState;
-typedef TemplatedUniqueKernelPointer<ffi::SharedScanDataIterator, ffi::kernel_scan_data_free> KernelScanDataIterator;
+typedef TemplatedUniqueKernelPointer<ffi::SharedSnapshot, ffi::free_snapshot> KernelSnapshot;
+typedef TemplatedUniqueKernelPointer<ffi::SharedExternEngine, ffi::free_engine> KernelExternEngine;
+typedef TemplatedUniqueKernelPointer<ffi::SharedScan, ffi::free_scan> KernelScan;
+typedef TemplatedUniqueKernelPointer<ffi::SharedGlobalScanState, ffi::free_global_scan_state> KernelGlobalScanState;
+typedef TemplatedUniqueKernelPointer<ffi::SharedScanDataIterator, ffi::free_kernel_scan_data> KernelScanDataIterator;
 
 struct KernelUtils {
     static ffi::KernelStringSlice ToDeltaString(const string &str);
diff --git a/src/include/functions/delta_scan.hpp b/src/include/functions/delta_scan.hpp
index 07c782b..b4c3c76 100644
--- a/src/include/functions/delta_scan.hpp
+++ b/src/include/functions/delta_scan.hpp
@@ -22,7 +22,7 @@ struct DeltaFileMetaData {
 
     ~DeltaFileMetaData() {
         if (selection_vector.ptr) {
-            ffi::drop_bool_slice(selection_vector);
+            ffi::free_bool_slice(selection_vector);
         }
     }
 
diff --git a/test/sql/dat/all.test b/test/sql/dat/all.test
index f6332da..fc7c6c5 100644
--- a/test/sql/dat/all.test
+++ b/test/sql/dat/all.test
@@ -53,22 +53,22 @@ FROM parquet_scan('${DAT_PATH}/out/reader_tests/generated/basic_append/expected/
 ----
 
 # with_schema_change
-query I rowsort with_checkpoint
+query I rowsort with_schema_change
 SELECT *
 FROM delta_scan('${DAT_PATH}/out/reader_tests/generated/with_schema_change/delta')
 ----
 
-query I rowsort with_checkpoint
+query I rowsort with_schema_change
 SELECT *
 FROM parquet_scan('${DAT_PATH}/out/reader_tests/generated/with_schema_change/expected/latest/**/*.parquet')
 ----
 
-query I rowsort with_checkpoint_count
+query I rowsort with_schema_change_count
 SELECT count(*)
 FROM delta_scan('${DAT_PATH}/out/reader_tests/generated/with_schema_change/delta')
 ----
 
-query I rowsort with_checkpoint_count
+query I rowsort with_schema_change_count
 SELECT count(*)
 FROM parquet_scan('${DAT_PATH}/out/reader_tests/generated/with_schema_change/expected/latest/**/*.parquet')
 ----
@@ -129,11 +129,6 @@ SELECT *
 FROM parquet_scan('${DAT_PATH}/out/reader_tests/generated/multi_partitioned_2/expected/latest/**/*.parquet')
 ----
 
-### FAILING DAT TESTS
-
-# TODO fix all of these
-mode skip
-
 # no_replay
 query I rowsort no_replay
 SELECT *

From 7279b4fe01ec9473d12eb4add8682096c7c3c561 Mon Sep 17 00:00:00 2001
From: Sam Ansmink <samansmink@hotmail.com>
Date: Fri, 19 Jul 2024 15:12:58 +0200
Subject: [PATCH 34/38] add new return type to msvc workaround struct

---
 src/include/delta_kernel_ffi.hpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/include/delta_kernel_ffi.hpp b/src/include/delta_kernel_ffi.hpp
index 15db00d..cf69a8d 100644
--- a/src/include/delta_kernel_ffi.hpp
+++ b/src/include/delta_kernel_ffi.hpp
@@ -393,7 +393,7 @@ struct im_an_unused_struct_that_tricks_msvc_into_compilation {
     ExternResult<ArrowFFIData*> field7;
     ExternResult<Handle<SharedScanDataIterator>> field8;
     ExternResult<Handle<SharedScan>> field9;
-    ExternResult<Handle<SharedScan>> field10;
+    ExternResult<Handle<ExclusiveFileReadResultIterator>> field10;
 };
 
 extern "C" {

From 34bb7fe1101ec5d7dc45c1a816665025dfbe40e7 Mon Sep 17 00:00:00 2001
From: Sam Ansmink <samansmink@hotmail.com>
Date: Wed, 24 Jul 2024 10:55:13 +0200
Subject: [PATCH 35/38] add basic benchmarking suite

---
 .gitignore                                    |  2 +
 Makefile                                      |  3 +
 benchmark/README.md                           | 27 ++++++++
 benchmark/benchmark.Makefile                  | 65 +++++++++++++++++++
 benchmark/tpcds/sf1-delta/load.sql            | 24 +++++++
 benchmark/tpcds/sf1-delta/q01.benchmark       |  7 ++
 benchmark/tpcds/sf1-delta/q02.benchmark       |  7 ++
 benchmark/tpcds/sf1-delta/q03.benchmark       |  7 ++
 benchmark/tpcds/sf1-delta/q04.benchmark       |  7 ++
 benchmark/tpcds/sf1-delta/q05.benchmark       |  7 ++
 benchmark/tpcds/sf1-delta/q06.benchmark       |  7 ++
 benchmark/tpcds/sf1-delta/q07.benchmark       |  7 ++
 benchmark/tpcds/sf1-delta/q08.benchmark       |  7 ++
 benchmark/tpcds/sf1-delta/q09.benchmark       |  7 ++
 benchmark/tpcds/sf1-delta/q10.benchmark       |  7 ++
 benchmark/tpcds/sf1-delta/q11.benchmark       |  7 ++
 benchmark/tpcds/sf1-delta/q12.benchmark       |  7 ++
 benchmark/tpcds/sf1-delta/q13.benchmark       |  7 ++
 benchmark/tpcds/sf1-delta/q14.benchmark       |  7 ++
 benchmark/tpcds/sf1-delta/q15.benchmark       |  7 ++
 benchmark/tpcds/sf1-delta/q16.benchmark       |  7 ++
 benchmark/tpcds/sf1-delta/q17.benchmark       |  7 ++
 benchmark/tpcds/sf1-delta/q18.benchmark       |  7 ++
 benchmark/tpcds/sf1-delta/q19.benchmark       |  7 ++
 benchmark/tpcds/sf1-delta/q20.benchmark       |  7 ++
 benchmark/tpcds/sf1-delta/q21.benchmark       |  7 ++
 benchmark/tpcds/sf1-delta/q22.benchmark       |  7 ++
 benchmark/tpcds/sf1-delta/q23.benchmark       |  7 ++
 benchmark/tpcds/sf1-delta/q24.benchmark       |  7 ++
 benchmark/tpcds/sf1-delta/q25.benchmark       |  7 ++
 benchmark/tpcds/sf1-delta/q26.benchmark       |  7 ++
 benchmark/tpcds/sf1-delta/q27.benchmark       |  7 ++
 benchmark/tpcds/sf1-delta/q28.benchmark       |  7 ++
 benchmark/tpcds/sf1-delta/q29.benchmark       |  7 ++
 benchmark/tpcds/sf1-delta/q30.benchmark       |  7 ++
 benchmark/tpcds/sf1-delta/q31.benchmark       |  7 ++
 benchmark/tpcds/sf1-delta/q32.benchmark       |  7 ++
 benchmark/tpcds/sf1-delta/q33.benchmark       |  7 ++
 benchmark/tpcds/sf1-delta/q34.benchmark       |  7 ++
 benchmark/tpcds/sf1-delta/q35.benchmark       |  7 ++
 benchmark/tpcds/sf1-delta/q36.benchmark       |  7 ++
 benchmark/tpcds/sf1-delta/q37.benchmark       |  7 ++
 benchmark/tpcds/sf1-delta/q38.benchmark       |  7 ++
 benchmark/tpcds/sf1-delta/q39.benchmark       |  7 ++
 benchmark/tpcds/sf1-delta/q40.benchmark       |  7 ++
 benchmark/tpcds/sf1-delta/q41.benchmark       |  7 ++
 benchmark/tpcds/sf1-delta/q42.benchmark       |  7 ++
 benchmark/tpcds/sf1-delta/q43.benchmark       |  7 ++
 benchmark/tpcds/sf1-delta/q44.benchmark       |  7 ++
 benchmark/tpcds/sf1-delta/q45.benchmark       |  7 ++
 benchmark/tpcds/sf1-delta/q46.benchmark       |  7 ++
 benchmark/tpcds/sf1-delta/q47.benchmark       |  7 ++
 benchmark/tpcds/sf1-delta/q48.benchmark       |  7 ++
 benchmark/tpcds/sf1-delta/q49.benchmark       |  7 ++
 benchmark/tpcds/sf1-delta/q50.benchmark       |  7 ++
 benchmark/tpcds/sf1-delta/q51.benchmark       |  7 ++
 benchmark/tpcds/sf1-delta/q52.benchmark       |  7 ++
 benchmark/tpcds/sf1-delta/q53.benchmark       |  7 ++
 benchmark/tpcds/sf1-delta/q54.benchmark       |  7 ++
 benchmark/tpcds/sf1-delta/q55.benchmark       |  7 ++
 benchmark/tpcds/sf1-delta/q56.benchmark       |  7 ++
 benchmark/tpcds/sf1-delta/q57.benchmark       |  7 ++
 benchmark/tpcds/sf1-delta/q58.benchmark       |  7 ++
 benchmark/tpcds/sf1-delta/q59.benchmark       |  7 ++
 benchmark/tpcds/sf1-delta/q60.benchmark       |  7 ++
 benchmark/tpcds/sf1-delta/q61.benchmark       |  7 ++
 benchmark/tpcds/sf1-delta/q62.benchmark       |  7 ++
 benchmark/tpcds/sf1-delta/q63.benchmark       |  7 ++
 benchmark/tpcds/sf1-delta/q64.benchmark       |  7 ++
 benchmark/tpcds/sf1-delta/q65.benchmark       |  7 ++
 benchmark/tpcds/sf1-delta/q66.benchmark       |  7 ++
 benchmark/tpcds/sf1-delta/q67.benchmark       |  7 ++
 benchmark/tpcds/sf1-delta/q68.benchmark       |  7 ++
 benchmark/tpcds/sf1-delta/q69.benchmark       |  7 ++
 benchmark/tpcds/sf1-delta/q70.benchmark       |  7 ++
 benchmark/tpcds/sf1-delta/q71.benchmark       |  7 ++
 benchmark/tpcds/sf1-delta/q72.benchmark       |  7 ++
 benchmark/tpcds/sf1-delta/q73.benchmark       |  7 ++
 benchmark/tpcds/sf1-delta/q74.benchmark       |  7 ++
 benchmark/tpcds/sf1-delta/q75.benchmark       |  7 ++
 benchmark/tpcds/sf1-delta/q76.benchmark       |  7 ++
 benchmark/tpcds/sf1-delta/q77.benchmark       |  7 ++
 benchmark/tpcds/sf1-delta/q78.benchmark       |  7 ++
 benchmark/tpcds/sf1-delta/q79.benchmark       |  7 ++
 benchmark/tpcds/sf1-delta/q80.benchmark       |  7 ++
 benchmark/tpcds/sf1-delta/q81.benchmark       |  7 ++
 benchmark/tpcds/sf1-delta/q82.benchmark       |  7 ++
 benchmark/tpcds/sf1-delta/q83.benchmark       |  7 ++
 benchmark/tpcds/sf1-delta/q84.benchmark       |  7 ++
 benchmark/tpcds/sf1-delta/q85.benchmark       |  7 ++
 benchmark/tpcds/sf1-delta/q86.benchmark       |  7 ++
 benchmark/tpcds/sf1-delta/q87.benchmark       |  7 ++
 benchmark/tpcds/sf1-delta/q88.benchmark       |  7 ++
 benchmark/tpcds/sf1-delta/q89.benchmark       |  7 ++
 benchmark/tpcds/sf1-delta/q90.benchmark       |  7 ++
 benchmark/tpcds/sf1-delta/q91.benchmark       |  7 ++
 benchmark/tpcds/sf1-delta/q92.benchmark       |  7 ++
 benchmark/tpcds/sf1-delta/q93.benchmark       |  7 ++
 benchmark/tpcds/sf1-delta/q94.benchmark       |  7 ++
 benchmark/tpcds/sf1-delta/q95.benchmark       |  7 ++
 benchmark/tpcds/sf1-delta/q96.benchmark       |  7 ++
 benchmark/tpcds/sf1-delta/q97.benchmark       |  7 ++
 benchmark/tpcds/sf1-delta/q98.benchmark       |  7 ++
 benchmark/tpcds/sf1-delta/q99.benchmark       |  7 ++
 .../tpcds/sf1-delta/tpcds_sf1.benchmark.in    | 17 +++++
 benchmark/tpcds/sf1-parquet/load.sql          | 24 +++++++
 benchmark/tpcds/sf1-parquet/q01.benchmark     |  7 ++
 benchmark/tpcds/sf1-parquet/q02.benchmark     |  7 ++
 benchmark/tpcds/sf1-parquet/q03.benchmark     |  7 ++
 benchmark/tpcds/sf1-parquet/q04.benchmark     |  7 ++
 benchmark/tpcds/sf1-parquet/q05.benchmark     |  7 ++
 benchmark/tpcds/sf1-parquet/q06.benchmark     |  7 ++
 benchmark/tpcds/sf1-parquet/q07.benchmark     |  7 ++
 benchmark/tpcds/sf1-parquet/q08.benchmark     |  7 ++
 benchmark/tpcds/sf1-parquet/q09.benchmark     |  7 ++
 benchmark/tpcds/sf1-parquet/q10.benchmark     |  7 ++
 benchmark/tpcds/sf1-parquet/q11.benchmark     |  7 ++
 benchmark/tpcds/sf1-parquet/q12.benchmark     |  7 ++
 benchmark/tpcds/sf1-parquet/q13.benchmark     |  7 ++
 benchmark/tpcds/sf1-parquet/q14.benchmark     |  7 ++
 benchmark/tpcds/sf1-parquet/q15.benchmark     |  7 ++
 benchmark/tpcds/sf1-parquet/q16.benchmark     |  7 ++
 benchmark/tpcds/sf1-parquet/q17.benchmark     |  7 ++
 benchmark/tpcds/sf1-parquet/q18.benchmark     |  7 ++
 benchmark/tpcds/sf1-parquet/q19.benchmark     |  7 ++
 benchmark/tpcds/sf1-parquet/q20.benchmark     |  7 ++
 benchmark/tpcds/sf1-parquet/q21.benchmark     |  7 ++
 benchmark/tpcds/sf1-parquet/q22.benchmark     |  7 ++
 benchmark/tpcds/sf1-parquet/q23.benchmark     |  7 ++
 benchmark/tpcds/sf1-parquet/q24.benchmark     |  7 ++
 benchmark/tpcds/sf1-parquet/q25.benchmark     |  7 ++
 benchmark/tpcds/sf1-parquet/q26.benchmark     |  7 ++
 benchmark/tpcds/sf1-parquet/q27.benchmark     |  7 ++
 benchmark/tpcds/sf1-parquet/q28.benchmark     |  7 ++
 benchmark/tpcds/sf1-parquet/q29.benchmark     |  7 ++
 benchmark/tpcds/sf1-parquet/q30.benchmark     |  7 ++
 benchmark/tpcds/sf1-parquet/q31.benchmark     |  7 ++
 benchmark/tpcds/sf1-parquet/q32.benchmark     |  7 ++
 benchmark/tpcds/sf1-parquet/q33.benchmark     |  7 ++
 benchmark/tpcds/sf1-parquet/q34.benchmark     |  7 ++
 benchmark/tpcds/sf1-parquet/q35.benchmark     |  7 ++
 benchmark/tpcds/sf1-parquet/q36.benchmark     |  7 ++
 benchmark/tpcds/sf1-parquet/q37.benchmark     |  7 ++
 benchmark/tpcds/sf1-parquet/q38.benchmark     |  7 ++
 benchmark/tpcds/sf1-parquet/q39.benchmark     |  7 ++
 benchmark/tpcds/sf1-parquet/q40.benchmark     |  7 ++
 benchmark/tpcds/sf1-parquet/q41.benchmark     |  7 ++
 benchmark/tpcds/sf1-parquet/q42.benchmark     |  7 ++
 benchmark/tpcds/sf1-parquet/q43.benchmark     |  7 ++
 benchmark/tpcds/sf1-parquet/q44.benchmark     |  7 ++
 benchmark/tpcds/sf1-parquet/q45.benchmark     |  7 ++
 benchmark/tpcds/sf1-parquet/q46.benchmark     |  7 ++
 benchmark/tpcds/sf1-parquet/q47.benchmark     |  7 ++
 benchmark/tpcds/sf1-parquet/q48.benchmark     |  7 ++
 benchmark/tpcds/sf1-parquet/q49.benchmark     |  7 ++
 benchmark/tpcds/sf1-parquet/q50.benchmark     |  7 ++
 benchmark/tpcds/sf1-parquet/q51.benchmark     |  7 ++
 benchmark/tpcds/sf1-parquet/q52.benchmark     |  7 ++
 benchmark/tpcds/sf1-parquet/q53.benchmark     |  7 ++
 benchmark/tpcds/sf1-parquet/q54.benchmark     |  7 ++
 benchmark/tpcds/sf1-parquet/q55.benchmark     |  7 ++
 benchmark/tpcds/sf1-parquet/q56.benchmark     |  7 ++
 benchmark/tpcds/sf1-parquet/q57.benchmark     |  7 ++
 benchmark/tpcds/sf1-parquet/q58.benchmark     |  7 ++
 benchmark/tpcds/sf1-parquet/q59.benchmark     |  7 ++
 benchmark/tpcds/sf1-parquet/q60.benchmark     |  7 ++
 benchmark/tpcds/sf1-parquet/q61.benchmark     |  7 ++
 benchmark/tpcds/sf1-parquet/q62.benchmark     |  7 ++
 benchmark/tpcds/sf1-parquet/q63.benchmark     |  7 ++
 benchmark/tpcds/sf1-parquet/q64.benchmark     |  7 ++
 benchmark/tpcds/sf1-parquet/q65.benchmark     |  7 ++
 benchmark/tpcds/sf1-parquet/q66.benchmark     |  7 ++
 benchmark/tpcds/sf1-parquet/q67.benchmark     |  7 ++
 benchmark/tpcds/sf1-parquet/q68.benchmark     |  7 ++
 benchmark/tpcds/sf1-parquet/q69.benchmark     |  7 ++
 benchmark/tpcds/sf1-parquet/q70.benchmark     |  7 ++
 benchmark/tpcds/sf1-parquet/q71.benchmark     |  7 ++
 benchmark/tpcds/sf1-parquet/q72.benchmark     |  7 ++
 benchmark/tpcds/sf1-parquet/q73.benchmark     |  7 ++
 benchmark/tpcds/sf1-parquet/q74.benchmark     |  7 ++
 benchmark/tpcds/sf1-parquet/q75.benchmark     |  7 ++
 benchmark/tpcds/sf1-parquet/q76.benchmark     |  7 ++
 benchmark/tpcds/sf1-parquet/q77.benchmark     |  7 ++
 benchmark/tpcds/sf1-parquet/q78.benchmark     |  7 ++
 benchmark/tpcds/sf1-parquet/q79.benchmark     |  7 ++
 benchmark/tpcds/sf1-parquet/q80.benchmark     |  7 ++
 benchmark/tpcds/sf1-parquet/q81.benchmark     |  7 ++
 benchmark/tpcds/sf1-parquet/q82.benchmark     |  7 ++
 benchmark/tpcds/sf1-parquet/q83.benchmark     |  7 ++
 benchmark/tpcds/sf1-parquet/q84.benchmark     |  7 ++
 benchmark/tpcds/sf1-parquet/q85.benchmark     |  7 ++
 benchmark/tpcds/sf1-parquet/q86.benchmark     |  7 ++
 benchmark/tpcds/sf1-parquet/q87.benchmark     |  7 ++
 benchmark/tpcds/sf1-parquet/q88.benchmark     |  7 ++
 benchmark/tpcds/sf1-parquet/q89.benchmark     |  7 ++
 benchmark/tpcds/sf1-parquet/q90.benchmark     |  7 ++
 benchmark/tpcds/sf1-parquet/q91.benchmark     |  7 ++
 benchmark/tpcds/sf1-parquet/q92.benchmark     |  7 ++
 benchmark/tpcds/sf1-parquet/q93.benchmark     |  7 ++
 benchmark/tpcds/sf1-parquet/q94.benchmark     |  7 ++
 benchmark/tpcds/sf1-parquet/q95.benchmark     |  7 ++
 benchmark/tpcds/sf1-parquet/q96.benchmark     |  7 ++
 benchmark/tpcds/sf1-parquet/q97.benchmark     |  7 ++
 benchmark/tpcds/sf1-parquet/q98.benchmark     |  7 ++
 benchmark/tpcds/sf1-parquet/q99.benchmark     |  7 ++
 .../tpcds/sf1-parquet/tpcds_sf1.benchmark.in  | 17 +++++
 benchmark/tpch/sf1-delta-remote/load.sql      |  8 +++
 benchmark/tpch/sf1-delta-remote/q01.benchmark |  7 ++
 benchmark/tpch/sf1-delta-remote/q02.benchmark |  7 ++
 benchmark/tpch/sf1-delta-remote/q03.benchmark |  7 ++
 benchmark/tpch/sf1-delta-remote/q04.benchmark |  7 ++
 benchmark/tpch/sf1-delta-remote/q05.benchmark |  7 ++
 benchmark/tpch/sf1-delta-remote/q06.benchmark |  7 ++
 benchmark/tpch/sf1-delta-remote/q07.benchmark |  7 ++
 benchmark/tpch/sf1-delta-remote/q08.benchmark |  7 ++
 benchmark/tpch/sf1-delta-remote/q09.benchmark |  7 ++
 benchmark/tpch/sf1-delta-remote/q10.benchmark |  7 ++
 benchmark/tpch/sf1-delta-remote/q11.benchmark |  7 ++
 benchmark/tpch/sf1-delta-remote/q12.benchmark |  7 ++
 benchmark/tpch/sf1-delta-remote/q13.benchmark |  7 ++
 benchmark/tpch/sf1-delta-remote/q14.benchmark |  7 ++
 benchmark/tpch/sf1-delta-remote/q15.benchmark |  7 ++
 benchmark/tpch/sf1-delta-remote/q16.benchmark |  7 ++
 benchmark/tpch/sf1-delta-remote/q17.benchmark |  7 ++
 benchmark/tpch/sf1-delta-remote/q18.benchmark |  7 ++
 benchmark/tpch/sf1-delta-remote/q19.benchmark |  7 ++
 benchmark/tpch/sf1-delta-remote/q20.benchmark |  7 ++
 benchmark/tpch/sf1-delta-remote/q21.benchmark |  7 ++
 benchmark/tpch/sf1-delta-remote/q22.benchmark |  7 ++
 .../tpch_sf1_delta.benchmark.in               | 19 ++++++
 benchmark/tpch/sf1-delta/load.sql             |  8 +++
 benchmark/tpch/sf1-delta/q01.benchmark        |  7 ++
 benchmark/tpch/sf1-delta/q02.benchmark        |  7 ++
 benchmark/tpch/sf1-delta/q03.benchmark        |  7 ++
 benchmark/tpch/sf1-delta/q04.benchmark        |  7 ++
 benchmark/tpch/sf1-delta/q05.benchmark        |  7 ++
 benchmark/tpch/sf1-delta/q06.benchmark        |  7 ++
 benchmark/tpch/sf1-delta/q07.benchmark        |  7 ++
 benchmark/tpch/sf1-delta/q08.benchmark        |  7 ++
 benchmark/tpch/sf1-delta/q09.benchmark        |  7 ++
 benchmark/tpch/sf1-delta/q10.benchmark        |  7 ++
 benchmark/tpch/sf1-delta/q11.benchmark        |  7 ++
 benchmark/tpch/sf1-delta/q12.benchmark        |  7 ++
 benchmark/tpch/sf1-delta/q13.benchmark        |  7 ++
 benchmark/tpch/sf1-delta/q14.benchmark        |  7 ++
 benchmark/tpch/sf1-delta/q15.benchmark        |  7 ++
 benchmark/tpch/sf1-delta/q16.benchmark        |  7 ++
 benchmark/tpch/sf1-delta/q17.benchmark        |  7 ++
 benchmark/tpch/sf1-delta/q18.benchmark        |  7 ++
 benchmark/tpch/sf1-delta/q19.benchmark        |  7 ++
 benchmark/tpch/sf1-delta/q20.benchmark        |  7 ++
 benchmark/tpch/sf1-delta/q21.benchmark        |  7 ++
 benchmark/tpch/sf1-delta/q22.benchmark        |  7 ++
 .../sf1-delta/tpch_sf1_delta.benchmark.in     | 17 +++++
 benchmark/tpch/sf1-parquet-remote/load.sql    |  8 +++
 .../tpch/sf1-parquet-remote/q01.benchmark     |  7 ++
 .../tpch/sf1-parquet-remote/q02.benchmark     |  7 ++
 .../tpch/sf1-parquet-remote/q03.benchmark     |  7 ++
 .../tpch/sf1-parquet-remote/q04.benchmark     |  7 ++
 .../tpch/sf1-parquet-remote/q05.benchmark     |  7 ++
 .../tpch/sf1-parquet-remote/q06.benchmark     |  7 ++
 .../tpch/sf1-parquet-remote/q07.benchmark     |  7 ++
 .../tpch/sf1-parquet-remote/q08.benchmark     |  7 ++
 .../tpch/sf1-parquet-remote/q09.benchmark     |  7 ++
 .../tpch/sf1-parquet-remote/q10.benchmark     |  7 ++
 .../tpch/sf1-parquet-remote/q11.benchmark     |  7 ++
 .../tpch/sf1-parquet-remote/q12.benchmark     |  7 ++
 .../tpch/sf1-parquet-remote/q13.benchmark     |  7 ++
 .../tpch/sf1-parquet-remote/q14.benchmark     |  7 ++
 .../tpch/sf1-parquet-remote/q15.benchmark     |  7 ++
 .../tpch/sf1-parquet-remote/q16.benchmark     |  7 ++
 .../tpch/sf1-parquet-remote/q17.benchmark     |  7 ++
 .../tpch/sf1-parquet-remote/q18.benchmark     |  7 ++
 .../tpch/sf1-parquet-remote/q19.benchmark     |  7 ++
 .../tpch/sf1-parquet-remote/q20.benchmark     |  7 ++
 .../tpch/sf1-parquet-remote/q21.benchmark     |  7 ++
 .../tpch/sf1-parquet-remote/q22.benchmark     |  7 ++
 .../tpch_sf1_delta.benchmark.in               | 19 ++++++
 scripts/plot.py                               | 27 ++++++++
 279 files changed, 2133 insertions(+)
 create mode 100644 benchmark/README.md
 create mode 100644 benchmark/benchmark.Makefile
 create mode 100644 benchmark/tpcds/sf1-delta/load.sql
 create mode 100644 benchmark/tpcds/sf1-delta/q01.benchmark
 create mode 100644 benchmark/tpcds/sf1-delta/q02.benchmark
 create mode 100644 benchmark/tpcds/sf1-delta/q03.benchmark
 create mode 100644 benchmark/tpcds/sf1-delta/q04.benchmark
 create mode 100644 benchmark/tpcds/sf1-delta/q05.benchmark
 create mode 100644 benchmark/tpcds/sf1-delta/q06.benchmark
 create mode 100644 benchmark/tpcds/sf1-delta/q07.benchmark
 create mode 100644 benchmark/tpcds/sf1-delta/q08.benchmark
 create mode 100644 benchmark/tpcds/sf1-delta/q09.benchmark
 create mode 100644 benchmark/tpcds/sf1-delta/q10.benchmark
 create mode 100644 benchmark/tpcds/sf1-delta/q11.benchmark
 create mode 100644 benchmark/tpcds/sf1-delta/q12.benchmark
 create mode 100644 benchmark/tpcds/sf1-delta/q13.benchmark
 create mode 100644 benchmark/tpcds/sf1-delta/q14.benchmark
 create mode 100644 benchmark/tpcds/sf1-delta/q15.benchmark
 create mode 100644 benchmark/tpcds/sf1-delta/q16.benchmark
 create mode 100644 benchmark/tpcds/sf1-delta/q17.benchmark
 create mode 100644 benchmark/tpcds/sf1-delta/q18.benchmark
 create mode 100644 benchmark/tpcds/sf1-delta/q19.benchmark
 create mode 100644 benchmark/tpcds/sf1-delta/q20.benchmark
 create mode 100644 benchmark/tpcds/sf1-delta/q21.benchmark
 create mode 100644 benchmark/tpcds/sf1-delta/q22.benchmark
 create mode 100644 benchmark/tpcds/sf1-delta/q23.benchmark
 create mode 100644 benchmark/tpcds/sf1-delta/q24.benchmark
 create mode 100644 benchmark/tpcds/sf1-delta/q25.benchmark
 create mode 100644 benchmark/tpcds/sf1-delta/q26.benchmark
 create mode 100644 benchmark/tpcds/sf1-delta/q27.benchmark
 create mode 100644 benchmark/tpcds/sf1-delta/q28.benchmark
 create mode 100644 benchmark/tpcds/sf1-delta/q29.benchmark
 create mode 100644 benchmark/tpcds/sf1-delta/q30.benchmark
 create mode 100644 benchmark/tpcds/sf1-delta/q31.benchmark
 create mode 100644 benchmark/tpcds/sf1-delta/q32.benchmark
 create mode 100644 benchmark/tpcds/sf1-delta/q33.benchmark
 create mode 100644 benchmark/tpcds/sf1-delta/q34.benchmark
 create mode 100644 benchmark/tpcds/sf1-delta/q35.benchmark
 create mode 100644 benchmark/tpcds/sf1-delta/q36.benchmark
 create mode 100644 benchmark/tpcds/sf1-delta/q37.benchmark
 create mode 100644 benchmark/tpcds/sf1-delta/q38.benchmark
 create mode 100644 benchmark/tpcds/sf1-delta/q39.benchmark
 create mode 100644 benchmark/tpcds/sf1-delta/q40.benchmark
 create mode 100644 benchmark/tpcds/sf1-delta/q41.benchmark
 create mode 100644 benchmark/tpcds/sf1-delta/q42.benchmark
 create mode 100644 benchmark/tpcds/sf1-delta/q43.benchmark
 create mode 100644 benchmark/tpcds/sf1-delta/q44.benchmark
 create mode 100644 benchmark/tpcds/sf1-delta/q45.benchmark
 create mode 100644 benchmark/tpcds/sf1-delta/q46.benchmark
 create mode 100644 benchmark/tpcds/sf1-delta/q47.benchmark
 create mode 100644 benchmark/tpcds/sf1-delta/q48.benchmark
 create mode 100644 benchmark/tpcds/sf1-delta/q49.benchmark
 create mode 100644 benchmark/tpcds/sf1-delta/q50.benchmark
 create mode 100644 benchmark/tpcds/sf1-delta/q51.benchmark
 create mode 100644 benchmark/tpcds/sf1-delta/q52.benchmark
 create mode 100644 benchmark/tpcds/sf1-delta/q53.benchmark
 create mode 100644 benchmark/tpcds/sf1-delta/q54.benchmark
 create mode 100644 benchmark/tpcds/sf1-delta/q55.benchmark
 create mode 100644 benchmark/tpcds/sf1-delta/q56.benchmark
 create mode 100644 benchmark/tpcds/sf1-delta/q57.benchmark
 create mode 100644 benchmark/tpcds/sf1-delta/q58.benchmark
 create mode 100644 benchmark/tpcds/sf1-delta/q59.benchmark
 create mode 100644 benchmark/tpcds/sf1-delta/q60.benchmark
 create mode 100644 benchmark/tpcds/sf1-delta/q61.benchmark
 create mode 100644 benchmark/tpcds/sf1-delta/q62.benchmark
 create mode 100644 benchmark/tpcds/sf1-delta/q63.benchmark
 create mode 100644 benchmark/tpcds/sf1-delta/q64.benchmark
 create mode 100644 benchmark/tpcds/sf1-delta/q65.benchmark
 create mode 100644 benchmark/tpcds/sf1-delta/q66.benchmark
 create mode 100644 benchmark/tpcds/sf1-delta/q67.benchmark
 create mode 100644 benchmark/tpcds/sf1-delta/q68.benchmark
 create mode 100644 benchmark/tpcds/sf1-delta/q69.benchmark
 create mode 100644 benchmark/tpcds/sf1-delta/q70.benchmark
 create mode 100644 benchmark/tpcds/sf1-delta/q71.benchmark
 create mode 100644 benchmark/tpcds/sf1-delta/q72.benchmark
 create mode 100644 benchmark/tpcds/sf1-delta/q73.benchmark
 create mode 100644 benchmark/tpcds/sf1-delta/q74.benchmark
 create mode 100644 benchmark/tpcds/sf1-delta/q75.benchmark
 create mode 100644 benchmark/tpcds/sf1-delta/q76.benchmark
 create mode 100644 benchmark/tpcds/sf1-delta/q77.benchmark
 create mode 100644 benchmark/tpcds/sf1-delta/q78.benchmark
 create mode 100644 benchmark/tpcds/sf1-delta/q79.benchmark
 create mode 100644 benchmark/tpcds/sf1-delta/q80.benchmark
 create mode 100644 benchmark/tpcds/sf1-delta/q81.benchmark
 create mode 100644 benchmark/tpcds/sf1-delta/q82.benchmark
 create mode 100644 benchmark/tpcds/sf1-delta/q83.benchmark
 create mode 100644 benchmark/tpcds/sf1-delta/q84.benchmark
 create mode 100644 benchmark/tpcds/sf1-delta/q85.benchmark
 create mode 100644 benchmark/tpcds/sf1-delta/q86.benchmark
 create mode 100644 benchmark/tpcds/sf1-delta/q87.benchmark
 create mode 100644 benchmark/tpcds/sf1-delta/q88.benchmark
 create mode 100644 benchmark/tpcds/sf1-delta/q89.benchmark
 create mode 100644 benchmark/tpcds/sf1-delta/q90.benchmark
 create mode 100644 benchmark/tpcds/sf1-delta/q91.benchmark
 create mode 100644 benchmark/tpcds/sf1-delta/q92.benchmark
 create mode 100644 benchmark/tpcds/sf1-delta/q93.benchmark
 create mode 100644 benchmark/tpcds/sf1-delta/q94.benchmark
 create mode 100644 benchmark/tpcds/sf1-delta/q95.benchmark
 create mode 100644 benchmark/tpcds/sf1-delta/q96.benchmark
 create mode 100644 benchmark/tpcds/sf1-delta/q97.benchmark
 create mode 100644 benchmark/tpcds/sf1-delta/q98.benchmark
 create mode 100644 benchmark/tpcds/sf1-delta/q99.benchmark
 create mode 100644 benchmark/tpcds/sf1-delta/tpcds_sf1.benchmark.in
 create mode 100644 benchmark/tpcds/sf1-parquet/load.sql
 create mode 100644 benchmark/tpcds/sf1-parquet/q01.benchmark
 create mode 100644 benchmark/tpcds/sf1-parquet/q02.benchmark
 create mode 100644 benchmark/tpcds/sf1-parquet/q03.benchmark
 create mode 100644 benchmark/tpcds/sf1-parquet/q04.benchmark
 create mode 100644 benchmark/tpcds/sf1-parquet/q05.benchmark
 create mode 100644 benchmark/tpcds/sf1-parquet/q06.benchmark
 create mode 100644 benchmark/tpcds/sf1-parquet/q07.benchmark
 create mode 100644 benchmark/tpcds/sf1-parquet/q08.benchmark
 create mode 100644 benchmark/tpcds/sf1-parquet/q09.benchmark
 create mode 100644 benchmark/tpcds/sf1-parquet/q10.benchmark
 create mode 100644 benchmark/tpcds/sf1-parquet/q11.benchmark
 create mode 100644 benchmark/tpcds/sf1-parquet/q12.benchmark
 create mode 100644 benchmark/tpcds/sf1-parquet/q13.benchmark
 create mode 100644 benchmark/tpcds/sf1-parquet/q14.benchmark
 create mode 100644 benchmark/tpcds/sf1-parquet/q15.benchmark
 create mode 100644 benchmark/tpcds/sf1-parquet/q16.benchmark
 create mode 100644 benchmark/tpcds/sf1-parquet/q17.benchmark
 create mode 100644 benchmark/tpcds/sf1-parquet/q18.benchmark
 create mode 100644 benchmark/tpcds/sf1-parquet/q19.benchmark
 create mode 100644 benchmark/tpcds/sf1-parquet/q20.benchmark
 create mode 100644 benchmark/tpcds/sf1-parquet/q21.benchmark
 create mode 100644 benchmark/tpcds/sf1-parquet/q22.benchmark
 create mode 100644 benchmark/tpcds/sf1-parquet/q23.benchmark
 create mode 100644 benchmark/tpcds/sf1-parquet/q24.benchmark
 create mode 100644 benchmark/tpcds/sf1-parquet/q25.benchmark
 create mode 100644 benchmark/tpcds/sf1-parquet/q26.benchmark
 create mode 100644 benchmark/tpcds/sf1-parquet/q27.benchmark
 create mode 100644 benchmark/tpcds/sf1-parquet/q28.benchmark
 create mode 100644 benchmark/tpcds/sf1-parquet/q29.benchmark
 create mode 100644 benchmark/tpcds/sf1-parquet/q30.benchmark
 create mode 100644 benchmark/tpcds/sf1-parquet/q31.benchmark
 create mode 100644 benchmark/tpcds/sf1-parquet/q32.benchmark
 create mode 100644 benchmark/tpcds/sf1-parquet/q33.benchmark
 create mode 100644 benchmark/tpcds/sf1-parquet/q34.benchmark
 create mode 100644 benchmark/tpcds/sf1-parquet/q35.benchmark
 create mode 100644 benchmark/tpcds/sf1-parquet/q36.benchmark
 create mode 100644 benchmark/tpcds/sf1-parquet/q37.benchmark
 create mode 100644 benchmark/tpcds/sf1-parquet/q38.benchmark
 create mode 100644 benchmark/tpcds/sf1-parquet/q39.benchmark
 create mode 100644 benchmark/tpcds/sf1-parquet/q40.benchmark
 create mode 100644 benchmark/tpcds/sf1-parquet/q41.benchmark
 create mode 100644 benchmark/tpcds/sf1-parquet/q42.benchmark
 create mode 100644 benchmark/tpcds/sf1-parquet/q43.benchmark
 create mode 100644 benchmark/tpcds/sf1-parquet/q44.benchmark
 create mode 100644 benchmark/tpcds/sf1-parquet/q45.benchmark
 create mode 100644 benchmark/tpcds/sf1-parquet/q46.benchmark
 create mode 100644 benchmark/tpcds/sf1-parquet/q47.benchmark
 create mode 100644 benchmark/tpcds/sf1-parquet/q48.benchmark
 create mode 100644 benchmark/tpcds/sf1-parquet/q49.benchmark
 create mode 100644 benchmark/tpcds/sf1-parquet/q50.benchmark
 create mode 100644 benchmark/tpcds/sf1-parquet/q51.benchmark
 create mode 100644 benchmark/tpcds/sf1-parquet/q52.benchmark
 create mode 100644 benchmark/tpcds/sf1-parquet/q53.benchmark
 create mode 100644 benchmark/tpcds/sf1-parquet/q54.benchmark
 create mode 100644 benchmark/tpcds/sf1-parquet/q55.benchmark
 create mode 100644 benchmark/tpcds/sf1-parquet/q56.benchmark
 create mode 100644 benchmark/tpcds/sf1-parquet/q57.benchmark
 create mode 100644 benchmark/tpcds/sf1-parquet/q58.benchmark
 create mode 100644 benchmark/tpcds/sf1-parquet/q59.benchmark
 create mode 100644 benchmark/tpcds/sf1-parquet/q60.benchmark
 create mode 100644 benchmark/tpcds/sf1-parquet/q61.benchmark
 create mode 100644 benchmark/tpcds/sf1-parquet/q62.benchmark
 create mode 100644 benchmark/tpcds/sf1-parquet/q63.benchmark
 create mode 100644 benchmark/tpcds/sf1-parquet/q64.benchmark
 create mode 100644 benchmark/tpcds/sf1-parquet/q65.benchmark
 create mode 100644 benchmark/tpcds/sf1-parquet/q66.benchmark
 create mode 100644 benchmark/tpcds/sf1-parquet/q67.benchmark
 create mode 100644 benchmark/tpcds/sf1-parquet/q68.benchmark
 create mode 100644 benchmark/tpcds/sf1-parquet/q69.benchmark
 create mode 100644 benchmark/tpcds/sf1-parquet/q70.benchmark
 create mode 100644 benchmark/tpcds/sf1-parquet/q71.benchmark
 create mode 100644 benchmark/tpcds/sf1-parquet/q72.benchmark
 create mode 100644 benchmark/tpcds/sf1-parquet/q73.benchmark
 create mode 100644 benchmark/tpcds/sf1-parquet/q74.benchmark
 create mode 100644 benchmark/tpcds/sf1-parquet/q75.benchmark
 create mode 100644 benchmark/tpcds/sf1-parquet/q76.benchmark
 create mode 100644 benchmark/tpcds/sf1-parquet/q77.benchmark
 create mode 100644 benchmark/tpcds/sf1-parquet/q78.benchmark
 create mode 100644 benchmark/tpcds/sf1-parquet/q79.benchmark
 create mode 100644 benchmark/tpcds/sf1-parquet/q80.benchmark
 create mode 100644 benchmark/tpcds/sf1-parquet/q81.benchmark
 create mode 100644 benchmark/tpcds/sf1-parquet/q82.benchmark
 create mode 100644 benchmark/tpcds/sf1-parquet/q83.benchmark
 create mode 100644 benchmark/tpcds/sf1-parquet/q84.benchmark
 create mode 100644 benchmark/tpcds/sf1-parquet/q85.benchmark
 create mode 100644 benchmark/tpcds/sf1-parquet/q86.benchmark
 create mode 100644 benchmark/tpcds/sf1-parquet/q87.benchmark
 create mode 100644 benchmark/tpcds/sf1-parquet/q88.benchmark
 create mode 100644 benchmark/tpcds/sf1-parquet/q89.benchmark
 create mode 100644 benchmark/tpcds/sf1-parquet/q90.benchmark
 create mode 100644 benchmark/tpcds/sf1-parquet/q91.benchmark
 create mode 100644 benchmark/tpcds/sf1-parquet/q92.benchmark
 create mode 100644 benchmark/tpcds/sf1-parquet/q93.benchmark
 create mode 100644 benchmark/tpcds/sf1-parquet/q94.benchmark
 create mode 100644 benchmark/tpcds/sf1-parquet/q95.benchmark
 create mode 100644 benchmark/tpcds/sf1-parquet/q96.benchmark
 create mode 100644 benchmark/tpcds/sf1-parquet/q97.benchmark
 create mode 100644 benchmark/tpcds/sf1-parquet/q98.benchmark
 create mode 100644 benchmark/tpcds/sf1-parquet/q99.benchmark
 create mode 100644 benchmark/tpcds/sf1-parquet/tpcds_sf1.benchmark.in
 create mode 100644 benchmark/tpch/sf1-delta-remote/load.sql
 create mode 100644 benchmark/tpch/sf1-delta-remote/q01.benchmark
 create mode 100644 benchmark/tpch/sf1-delta-remote/q02.benchmark
 create mode 100644 benchmark/tpch/sf1-delta-remote/q03.benchmark
 create mode 100644 benchmark/tpch/sf1-delta-remote/q04.benchmark
 create mode 100644 benchmark/tpch/sf1-delta-remote/q05.benchmark
 create mode 100644 benchmark/tpch/sf1-delta-remote/q06.benchmark
 create mode 100644 benchmark/tpch/sf1-delta-remote/q07.benchmark
 create mode 100644 benchmark/tpch/sf1-delta-remote/q08.benchmark
 create mode 100644 benchmark/tpch/sf1-delta-remote/q09.benchmark
 create mode 100644 benchmark/tpch/sf1-delta-remote/q10.benchmark
 create mode 100644 benchmark/tpch/sf1-delta-remote/q11.benchmark
 create mode 100644 benchmark/tpch/sf1-delta-remote/q12.benchmark
 create mode 100644 benchmark/tpch/sf1-delta-remote/q13.benchmark
 create mode 100644 benchmark/tpch/sf1-delta-remote/q14.benchmark
 create mode 100644 benchmark/tpch/sf1-delta-remote/q15.benchmark
 create mode 100644 benchmark/tpch/sf1-delta-remote/q16.benchmark
 create mode 100644 benchmark/tpch/sf1-delta-remote/q17.benchmark
 create mode 100644 benchmark/tpch/sf1-delta-remote/q18.benchmark
 create mode 100644 benchmark/tpch/sf1-delta-remote/q19.benchmark
 create mode 100644 benchmark/tpch/sf1-delta-remote/q20.benchmark
 create mode 100644 benchmark/tpch/sf1-delta-remote/q21.benchmark
 create mode 100644 benchmark/tpch/sf1-delta-remote/q22.benchmark
 create mode 100644 benchmark/tpch/sf1-delta-remote/tpch_sf1_delta.benchmark.in
 create mode 100644 benchmark/tpch/sf1-delta/load.sql
 create mode 100644 benchmark/tpch/sf1-delta/q01.benchmark
 create mode 100644 benchmark/tpch/sf1-delta/q02.benchmark
 create mode 100644 benchmark/tpch/sf1-delta/q03.benchmark
 create mode 100644 benchmark/tpch/sf1-delta/q04.benchmark
 create mode 100644 benchmark/tpch/sf1-delta/q05.benchmark
 create mode 100644 benchmark/tpch/sf1-delta/q06.benchmark
 create mode 100644 benchmark/tpch/sf1-delta/q07.benchmark
 create mode 100644 benchmark/tpch/sf1-delta/q08.benchmark
 create mode 100644 benchmark/tpch/sf1-delta/q09.benchmark
 create mode 100644 benchmark/tpch/sf1-delta/q10.benchmark
 create mode 100644 benchmark/tpch/sf1-delta/q11.benchmark
 create mode 100644 benchmark/tpch/sf1-delta/q12.benchmark
 create mode 100644 benchmark/tpch/sf1-delta/q13.benchmark
 create mode 100644 benchmark/tpch/sf1-delta/q14.benchmark
 create mode 100644 benchmark/tpch/sf1-delta/q15.benchmark
 create mode 100644 benchmark/tpch/sf1-delta/q16.benchmark
 create mode 100644 benchmark/tpch/sf1-delta/q17.benchmark
 create mode 100644 benchmark/tpch/sf1-delta/q18.benchmark
 create mode 100644 benchmark/tpch/sf1-delta/q19.benchmark
 create mode 100644 benchmark/tpch/sf1-delta/q20.benchmark
 create mode 100644 benchmark/tpch/sf1-delta/q21.benchmark
 create mode 100644 benchmark/tpch/sf1-delta/q22.benchmark
 create mode 100644 benchmark/tpch/sf1-delta/tpch_sf1_delta.benchmark.in
 create mode 100644 benchmark/tpch/sf1-parquet-remote/load.sql
 create mode 100644 benchmark/tpch/sf1-parquet-remote/q01.benchmark
 create mode 100644 benchmark/tpch/sf1-parquet-remote/q02.benchmark
 create mode 100644 benchmark/tpch/sf1-parquet-remote/q03.benchmark
 create mode 100644 benchmark/tpch/sf1-parquet-remote/q04.benchmark
 create mode 100644 benchmark/tpch/sf1-parquet-remote/q05.benchmark
 create mode 100644 benchmark/tpch/sf1-parquet-remote/q06.benchmark
 create mode 100644 benchmark/tpch/sf1-parquet-remote/q07.benchmark
 create mode 100644 benchmark/tpch/sf1-parquet-remote/q08.benchmark
 create mode 100644 benchmark/tpch/sf1-parquet-remote/q09.benchmark
 create mode 100644 benchmark/tpch/sf1-parquet-remote/q10.benchmark
 create mode 100644 benchmark/tpch/sf1-parquet-remote/q11.benchmark
 create mode 100644 benchmark/tpch/sf1-parquet-remote/q12.benchmark
 create mode 100644 benchmark/tpch/sf1-parquet-remote/q13.benchmark
 create mode 100644 benchmark/tpch/sf1-parquet-remote/q14.benchmark
 create mode 100644 benchmark/tpch/sf1-parquet-remote/q15.benchmark
 create mode 100644 benchmark/tpch/sf1-parquet-remote/q16.benchmark
 create mode 100644 benchmark/tpch/sf1-parquet-remote/q17.benchmark
 create mode 100644 benchmark/tpch/sf1-parquet-remote/q18.benchmark
 create mode 100644 benchmark/tpch/sf1-parquet-remote/q19.benchmark
 create mode 100644 benchmark/tpch/sf1-parquet-remote/q20.benchmark
 create mode 100644 benchmark/tpch/sf1-parquet-remote/q21.benchmark
 create mode 100644 benchmark/tpch/sf1-parquet-remote/q22.benchmark
 create mode 100644 benchmark/tpch/sf1-parquet-remote/tpch_sf1_delta.benchmark.in
 create mode 100644 scripts/plot.py

diff --git a/.gitignore b/.gitignore
index 31bc287..bc1caa6 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,4 +1,6 @@
 build
+benchmark_results
+duckdb_benchmark_data/
 .idea
 cmake-build-debug
 duckdb_unittest_tempdir/
diff --git a/Makefile b/Makefile
index 78144e6..defa0b6 100644
--- a/Makefile
+++ b/Makefile
@@ -11,6 +11,9 @@ test_release: export DAT_PATH=./build/release/rust/src/delta_kernel/acceptance/t
 test_debug: export DELTA_KERNEL_TESTS_PATH=./build/debug/rust/src/delta_kernel/kernel/tests/data
 test_debug: export DAT_PATH=./build/debug/rust/src/delta_kernel/acceptance/tests/dat
 
+# Include the Makefile from the benchmark directory
+include benchmark/benchmark.Makefile
+
 # Include the Makefile from extension-ci-tools
 include extension-ci-tools/makefiles/duckdb_extension.Makefile
 
diff --git a/benchmark/README.md b/benchmark/README.md
new file mode 100644
index 0000000..6ff3801
--- /dev/null
+++ b/benchmark/README.md
@@ -0,0 +1,27 @@
+# Benchmarking the Delta Extension
+
+## Basics
+A primitive benchmarking suite exists for the Delta extension.
+
+To run the benchmarks, firstly run the build using:
+```shell
+BUILD_BENCHMARK=1 make
+```
+
+Then to run a benchmark, use one of the benchmark Makefile targets prefixed with `bench-run-`:
+```shell
+make bench-run-tpch-sf1
+```
+Now the TPCH benchmark will be run twice, once on parquet files and once on a delta table.
+
+To create a plot from the results run:
+```shell
+make plot
+```
+
+## Configurations options
+Specific benchmarks can be run from a suite using the `BENCHMARK_PATTERN` variable. For example to compare
+only Q01 from TPCH SF1, run:
+```shell
+BENCHMARK_PATTERN=q01.benchmark make bench-run-tpch-sf1
+```
\ No newline at end of file
diff --git a/benchmark/benchmark.Makefile b/benchmark/benchmark.Makefile
new file mode 100644
index 0000000..3b88eb7
--- /dev/null
+++ b/benchmark/benchmark.Makefile
@@ -0,0 +1,65 @@
+# Set this flag during building to enable the benchmark runner
+ifeq (${BUILD_BENCHMARK}, 1)
+	TOOLCHAIN_FLAGS:=${TOOLCHAIN_FLAGS} -DBUILD_BENCHMARKS=1
+endif
+
+ifeq ("${BENCHMARK_PATTERN}", "")
+	BENCHMARK_PATTERN:=.*
+endif
+
+bench-output-dir:
+	mkdir -p benchmark_results
+
+clean_benchmark:
+	rm -rf benchmark_results
+
+plot:
+	python3 scripts/plot.py
+
+
+############### BENCHMARK TARGETS ###############
+
+###
+# TPCH LOCAL
+###
+
+# TPCH SF1 on delta table
+bench-run-tpch-sf1-delta: bench-output-dir
+	./build/release/benchmark/benchmark_runner --root-dir './' 'benchmark/tpch/sf1-delta/${BENCHMARK_PATTERN}' &> benchmark_results/tpch-sf1-delta.csv
+# TPCH SF1 on parquet files
+bench-run-tpch-sf1-parquet: bench-output-dir
+	./build/release/benchmark/benchmark_runner 'benchmark/tpch/sf1-parquet/${BENCHMARK_PATTERN}' &> benchmark_results/tpch-sf1-parquet.csv
+# COMPARES TPCH SF1 on parquet file vs on delta files
+bench-run-tpch-sf1: bench-run-tpch-sf1-delta bench-run-tpch-sf1-parquet
+
+###
+# TPCH REMOTE
+###
+
+# TPCH on remote delta table (set BENCHMARK_DATA_S3_LINEITEM_SF1)
+bench-run-tpch-sf1-remote-delta: bench-output-dir
+	./build/release/benchmark/benchmark_runner --root-dir './' 'benchmark/tpch/sf1-delta-remote/${BENCHMARK_PATTERN}' &> benchmark_results/tpch-sf1-remote-delta.csv
+# TPCH on remote parquet table (set BENCHMARK_DATA_S3_LINEITEM_SF1)
+bench-run-tpch-sf1-remote-parquet: bench-output-dir
+	./build/release/benchmark/benchmark_runner --root-dir './' 'benchmark/tpch/sf1-parquet-remote/${BENCHMARK_PATTERN}'  &> benchmark_results/tpch-sf1-remote-parquet.csv
+# COMPARES TPCH SF1 on parquet file vs on delta files
+bench-run-tpch-sf1-remote: bench-run-tpch-sf1-remote-parquet bench-run-tpch-sf1-remote-delta
+
+###
+# TPCDS LOCAL
+###
+
+# TPCDS SF1 on delta table
+bench-run-tpcds-sf1-delta: bench-output-dir
+	./build/release/benchmark/benchmark_runner --root-dir './' 'benchmark/tpcds/sf1-delta/${BENCHMARK_PATTERN}' &> benchmark_results/tpcds-sf1-delta.csv
+# TPCDS SF1 on parquet files
+bench-run-tpcds-sf1-parquet: bench-output-dir
+	./build/release/benchmark/benchmark_runner --root-dir './' 'benchmark/tpcds/sf1-parquet/${BENCHMARK_PATTERN}' &> benchmark_results/tpcds-sf1-parquet.csv
+# COMPARES TPCDS SF1 on parquet file vs on delta files
+bench-run-tpcds-sf1: bench-run-tpcds-sf1-delta bench-run-tpcds-sf1-parquet
+
+###
+# ALL
+###
+bench-run-all-local: bench-run-tpcds-sf1 bench-run-tpch-sf1
+
diff --git a/benchmark/tpcds/sf1-delta/load.sql b/benchmark/tpcds/sf1-delta/load.sql
new file mode 100644
index 0000000..c482618
--- /dev/null
+++ b/benchmark/tpcds/sf1-delta/load.sql
@@ -0,0 +1,24 @@
+create view call_center as from delta_scan('./data/generated/tpcds_sf1/call_center/delta_lake');
+create view catalog_page as from delta_scan('./data/generated/tpcds_sf1/catalog_page/delta_lake');
+create view catalog_returns as from delta_scan('./data/generated/tpcds_sf1/catalog_returns/delta_lake');
+create view catalog_sales as from delta_scan('./data/generated/tpcds_sf1/catalog_sales/delta_lake');
+create view customer as from delta_scan('./data/generated/tpcds_sf1/customer/delta_lake');
+create view customer_demographics as from delta_scan('./data/generated/tpcds_sf1/customer_demographics/delta_lake');
+create view customer_address as from delta_scan('./data/generated/tpcds_sf1/customer_address/delta_lake');
+create view date_dim as from delta_scan('./data/generated/tpcds_sf1/date_dim/delta_lake');
+create view household_demographics as from delta_scan('./data/generated/tpcds_sf1/household_demographics/delta_lake');
+create view inventory as from delta_scan('./data/generated/tpcds_sf1/inventory/delta_lake');
+create view income_band as from delta_scan('./data/generated/tpcds_sf1/income_band/delta_lake');
+create view item as from delta_scan('./data/generated/tpcds_sf1/item/delta_lake');
+create view promotion as from delta_scan('./data/generated/tpcds_sf1/promotion/delta_lake');
+create view reason as from delta_scan('./data/generated/tpcds_sf1/reason/delta_lake');
+create view ship_mode as from delta_scan('./data/generated/tpcds_sf1/ship_mode/delta_lake');
+create view store as from delta_scan('./data/generated/tpcds_sf1/store/delta_lake');
+create view store_returns as from delta_scan('./data/generated/tpcds_sf1/store_returns/delta_lake');
+create view store_sales as from delta_scan('./data/generated/tpcds_sf1/store_sales/delta_lake');
+create view time_dim as from delta_scan('./data/generated/tpcds_sf1/time_dim/delta_lake');
+create view warehouse as from delta_scan('./data/generated/tpcds_sf1/warehouse/delta_lake');
+create view web_page as from delta_scan('./data/generated/tpcds_sf1/web_page/delta_lake');
+create view web_returns as from delta_scan('./data/generated/tpcds_sf1/web_returns/delta_lake');
+create view web_sales as from delta_scan('./data/generated/tpcds_sf1/web_sales/delta_lake');
+create view web_site as from delta_scan('./data/generated/tpcds_sf1/web_site/delta_lake');
\ No newline at end of file
diff --git a/benchmark/tpcds/sf1-delta/q01.benchmark b/benchmark/tpcds/sf1-delta/q01.benchmark
new file mode 100644
index 0000000..4afa256
--- /dev/null
+++ b/benchmark/tpcds/sf1-delta/q01.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpcds/sf1/q01.benchmark
+# description: Run query 01 from the TPC-DS benchmark
+# group: [sf1]
+
+template benchmark/tpcds/sf1-delta/tpcds_sf1.benchmark.in
+QUERY_NUMBER=1
+QUERY_NUMBER_PADDED=01
diff --git a/benchmark/tpcds/sf1-delta/q02.benchmark b/benchmark/tpcds/sf1-delta/q02.benchmark
new file mode 100644
index 0000000..260f65a
--- /dev/null
+++ b/benchmark/tpcds/sf1-delta/q02.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpcds/sf1/q02.benchmark
+# description: Run query 02 from the TPC-DS benchmark
+# group: [sf1]
+
+template benchmark/tpcds/sf1-delta/tpcds_sf1.benchmark.in
+QUERY_NUMBER=2
+QUERY_NUMBER_PADDED=02
diff --git a/benchmark/tpcds/sf1-delta/q03.benchmark b/benchmark/tpcds/sf1-delta/q03.benchmark
new file mode 100644
index 0000000..06c4bf0
--- /dev/null
+++ b/benchmark/tpcds/sf1-delta/q03.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpcds/sf1/q03.benchmark
+# description: Run query 03 from the TPC-DS benchmark
+# group: [sf1]
+
+template benchmark/tpcds/sf1-delta/tpcds_sf1.benchmark.in
+QUERY_NUMBER=3
+QUERY_NUMBER_PADDED=03
diff --git a/benchmark/tpcds/sf1-delta/q04.benchmark b/benchmark/tpcds/sf1-delta/q04.benchmark
new file mode 100644
index 0000000..ab13a2a
--- /dev/null
+++ b/benchmark/tpcds/sf1-delta/q04.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpcds/sf1/q04.benchmark
+# description: Run query 04 from the TPC-DS benchmark
+# group: [sf1]
+
+template benchmark/tpcds/sf1-delta/tpcds_sf1.benchmark.in
+QUERY_NUMBER=4
+QUERY_NUMBER_PADDED=04
diff --git a/benchmark/tpcds/sf1-delta/q05.benchmark b/benchmark/tpcds/sf1-delta/q05.benchmark
new file mode 100644
index 0000000..583f4bc
--- /dev/null
+++ b/benchmark/tpcds/sf1-delta/q05.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpcds/sf1/q05.benchmark
+# description: Run query 05 from the TPC-DS benchmark
+# group: [sf1]
+
+template benchmark/tpcds/sf1-delta/tpcds_sf1.benchmark.in
+QUERY_NUMBER=5
+QUERY_NUMBER_PADDED=05
diff --git a/benchmark/tpcds/sf1-delta/q06.benchmark b/benchmark/tpcds/sf1-delta/q06.benchmark
new file mode 100644
index 0000000..17a5d62
--- /dev/null
+++ b/benchmark/tpcds/sf1-delta/q06.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpcds/sf1/q06.benchmark
+# description: Run query 06 from the TPC-DS benchmark
+# group: [sf1]
+
+template benchmark/tpcds/sf1-delta/tpcds_sf1.benchmark.in
+QUERY_NUMBER=6
+QUERY_NUMBER_PADDED=06
diff --git a/benchmark/tpcds/sf1-delta/q07.benchmark b/benchmark/tpcds/sf1-delta/q07.benchmark
new file mode 100644
index 0000000..aa54538
--- /dev/null
+++ b/benchmark/tpcds/sf1-delta/q07.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpcds/sf1/q07.benchmark
+# description: Run query 07 from the TPC-DS benchmark
+# group: [sf1]
+
+template benchmark/tpcds/sf1-delta/tpcds_sf1.benchmark.in
+QUERY_NUMBER=7
+QUERY_NUMBER_PADDED=07
diff --git a/benchmark/tpcds/sf1-delta/q08.benchmark b/benchmark/tpcds/sf1-delta/q08.benchmark
new file mode 100644
index 0000000..655ac7f
--- /dev/null
+++ b/benchmark/tpcds/sf1-delta/q08.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpcds/sf1/q08.benchmark
+# description: Run query 08 from the TPC-DS benchmark
+# group: [sf1]
+
+template benchmark/tpcds/sf1-delta/tpcds_sf1.benchmark.in
+QUERY_NUMBER=8
+QUERY_NUMBER_PADDED=08
diff --git a/benchmark/tpcds/sf1-delta/q09.benchmark b/benchmark/tpcds/sf1-delta/q09.benchmark
new file mode 100644
index 0000000..c0bef34
--- /dev/null
+++ b/benchmark/tpcds/sf1-delta/q09.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpcds/sf1/q09.benchmark
+# description: Run query 09 from the TPC-DS benchmark
+# group: [sf1]
+
+template benchmark/tpcds/sf1-delta/tpcds_sf1.benchmark.in
+QUERY_NUMBER=9
+QUERY_NUMBER_PADDED=09
diff --git a/benchmark/tpcds/sf1-delta/q10.benchmark b/benchmark/tpcds/sf1-delta/q10.benchmark
new file mode 100644
index 0000000..c99f19e
--- /dev/null
+++ b/benchmark/tpcds/sf1-delta/q10.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpcds/sf1/q10.benchmark
+# description: Run query 10 from the TPC-DS benchmark
+# group: [sf1]
+
+template benchmark/tpcds/sf1-delta/tpcds_sf1.benchmark.in
+QUERY_NUMBER=10
+QUERY_NUMBER_PADDED=10
diff --git a/benchmark/tpcds/sf1-delta/q11.benchmark b/benchmark/tpcds/sf1-delta/q11.benchmark
new file mode 100644
index 0000000..bddc394
--- /dev/null
+++ b/benchmark/tpcds/sf1-delta/q11.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpcds/sf1/q11.benchmark
+# description: Run query 11 from the TPC-DS benchmark
+# group: [sf1]
+
+template benchmark/tpcds/sf1-delta/tpcds_sf1.benchmark.in
+QUERY_NUMBER=11
+QUERY_NUMBER_PADDED=11
diff --git a/benchmark/tpcds/sf1-delta/q12.benchmark b/benchmark/tpcds/sf1-delta/q12.benchmark
new file mode 100644
index 0000000..475d6c8
--- /dev/null
+++ b/benchmark/tpcds/sf1-delta/q12.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpcds/sf1/q12.benchmark
+# description: Run query 12 from the TPC-DS benchmark
+# group: [sf1]
+
+template benchmark/tpcds/sf1-delta/tpcds_sf1.benchmark.in
+QUERY_NUMBER=12
+QUERY_NUMBER_PADDED=12
diff --git a/benchmark/tpcds/sf1-delta/q13.benchmark b/benchmark/tpcds/sf1-delta/q13.benchmark
new file mode 100644
index 0000000..a6915d0
--- /dev/null
+++ b/benchmark/tpcds/sf1-delta/q13.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpcds/sf1/q13.benchmark
+# description: Run query 13 from the TPC-DS benchmark
+# group: [sf1]
+
+template benchmark/tpcds/sf1-delta/tpcds_sf1.benchmark.in
+QUERY_NUMBER=13
+QUERY_NUMBER_PADDED=13
diff --git a/benchmark/tpcds/sf1-delta/q14.benchmark b/benchmark/tpcds/sf1-delta/q14.benchmark
new file mode 100644
index 0000000..eec9b70
--- /dev/null
+++ b/benchmark/tpcds/sf1-delta/q14.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpcds/sf1/q14.benchmark
+# description: Run query 14 from the TPC-DS benchmark
+# group: [sf1]
+
+template benchmark/tpcds/sf1-delta/tpcds_sf1.benchmark.in
+QUERY_NUMBER=14
+QUERY_NUMBER_PADDED=14
diff --git a/benchmark/tpcds/sf1-delta/q15.benchmark b/benchmark/tpcds/sf1-delta/q15.benchmark
new file mode 100644
index 0000000..9f44d26
--- /dev/null
+++ b/benchmark/tpcds/sf1-delta/q15.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpcds/sf1/q15.benchmark
+# description: Run query 15 from the TPC-DS benchmark
+# group: [sf1]
+
+template benchmark/tpcds/sf1-delta/tpcds_sf1.benchmark.in
+QUERY_NUMBER=15
+QUERY_NUMBER_PADDED=15
diff --git a/benchmark/tpcds/sf1-delta/q16.benchmark b/benchmark/tpcds/sf1-delta/q16.benchmark
new file mode 100644
index 0000000..b76f510
--- /dev/null
+++ b/benchmark/tpcds/sf1-delta/q16.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpcds/sf1/q16.benchmark
+# description: Run query 16 from the TPC-DS benchmark
+# group: [sf1]
+
+template benchmark/tpcds/sf1-delta/tpcds_sf1.benchmark.in
+QUERY_NUMBER=16
+QUERY_NUMBER_PADDED=16
diff --git a/benchmark/tpcds/sf1-delta/q17.benchmark b/benchmark/tpcds/sf1-delta/q17.benchmark
new file mode 100644
index 0000000..46fa716
--- /dev/null
+++ b/benchmark/tpcds/sf1-delta/q17.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpcds/sf1/q17.benchmark
+# description: Run query 17 from the TPC-DS benchmark
+# group: [sf1]
+
+template benchmark/tpcds/sf1-delta/tpcds_sf1.benchmark.in
+QUERY_NUMBER=17
+QUERY_NUMBER_PADDED=17
diff --git a/benchmark/tpcds/sf1-delta/q18.benchmark b/benchmark/tpcds/sf1-delta/q18.benchmark
new file mode 100644
index 0000000..e7cebaa
--- /dev/null
+++ b/benchmark/tpcds/sf1-delta/q18.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpcds/sf1/q18.benchmark
+# description: Run query 18 from the TPC-DS benchmark
+# group: [sf1]
+
+template benchmark/tpcds/sf1-delta/tpcds_sf1.benchmark.in
+QUERY_NUMBER=18
+QUERY_NUMBER_PADDED=18
diff --git a/benchmark/tpcds/sf1-delta/q19.benchmark b/benchmark/tpcds/sf1-delta/q19.benchmark
new file mode 100644
index 0000000..88d96c6
--- /dev/null
+++ b/benchmark/tpcds/sf1-delta/q19.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpcds/sf1/q19.benchmark
+# description: Run query 19 from the TPC-DS benchmark
+# group: [sf1]
+
+template benchmark/tpcds/sf1-delta/tpcds_sf1.benchmark.in
+QUERY_NUMBER=19
+QUERY_NUMBER_PADDED=19
diff --git a/benchmark/tpcds/sf1-delta/q20.benchmark b/benchmark/tpcds/sf1-delta/q20.benchmark
new file mode 100644
index 0000000..aa01979
--- /dev/null
+++ b/benchmark/tpcds/sf1-delta/q20.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpcds/sf1/q20.benchmark
+# description: Run query 20 from the TPC-DS benchmark
+# group: [sf1]
+
+template benchmark/tpcds/sf1-delta/tpcds_sf1.benchmark.in
+QUERY_NUMBER=20
+QUERY_NUMBER_PADDED=20
diff --git a/benchmark/tpcds/sf1-delta/q21.benchmark b/benchmark/tpcds/sf1-delta/q21.benchmark
new file mode 100644
index 0000000..dbe5a17
--- /dev/null
+++ b/benchmark/tpcds/sf1-delta/q21.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpcds/sf1/q21.benchmark
+# description: Run query 21 from the TPC-DS benchmark
+# group: [sf1]
+
+template benchmark/tpcds/sf1-delta/tpcds_sf1.benchmark.in
+QUERY_NUMBER=21
+QUERY_NUMBER_PADDED=21
diff --git a/benchmark/tpcds/sf1-delta/q22.benchmark b/benchmark/tpcds/sf1-delta/q22.benchmark
new file mode 100644
index 0000000..486db66
--- /dev/null
+++ b/benchmark/tpcds/sf1-delta/q22.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpcds/sf1/q22.benchmark
+# description: Run query 22 from the TPC-DS benchmark
+# group: [sf1]
+
+template benchmark/tpcds/sf1-delta/tpcds_sf1.benchmark.in
+QUERY_NUMBER=22
+QUERY_NUMBER_PADDED=22
diff --git a/benchmark/tpcds/sf1-delta/q23.benchmark b/benchmark/tpcds/sf1-delta/q23.benchmark
new file mode 100644
index 0000000..695da9a
--- /dev/null
+++ b/benchmark/tpcds/sf1-delta/q23.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpcds/sf1/q23.benchmark
+# description: Run query 23 from the TPC-DS benchmark
+# group: [sf1]
+
+template benchmark/tpcds/sf1-delta/tpcds_sf1.benchmark.in
+QUERY_NUMBER=23
+QUERY_NUMBER_PADDED=23
diff --git a/benchmark/tpcds/sf1-delta/q24.benchmark b/benchmark/tpcds/sf1-delta/q24.benchmark
new file mode 100644
index 0000000..e12cffc
--- /dev/null
+++ b/benchmark/tpcds/sf1-delta/q24.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpcds/sf1/q24.benchmark
+# description: Run query 24 from the TPC-DS benchmark
+# group: [sf1]
+
+template benchmark/tpcds/sf1-delta/tpcds_sf1.benchmark.in
+QUERY_NUMBER=24
+QUERY_NUMBER_PADDED=24
diff --git a/benchmark/tpcds/sf1-delta/q25.benchmark b/benchmark/tpcds/sf1-delta/q25.benchmark
new file mode 100644
index 0000000..988426f
--- /dev/null
+++ b/benchmark/tpcds/sf1-delta/q25.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpcds/sf1/q25.benchmark
+# description: Run query 25 from the TPC-DS benchmark
+# group: [sf1]
+
+template benchmark/tpcds/sf1-delta/tpcds_sf1.benchmark.in
+QUERY_NUMBER=25
+QUERY_NUMBER_PADDED=25
diff --git a/benchmark/tpcds/sf1-delta/q26.benchmark b/benchmark/tpcds/sf1-delta/q26.benchmark
new file mode 100644
index 0000000..a475e11
--- /dev/null
+++ b/benchmark/tpcds/sf1-delta/q26.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpcds/sf1/q26.benchmark
+# description: Run query 26 from the TPC-DS benchmark
+# group: [sf1]
+
+template benchmark/tpcds/sf1-delta/tpcds_sf1.benchmark.in
+QUERY_NUMBER=26
+QUERY_NUMBER_PADDED=26
diff --git a/benchmark/tpcds/sf1-delta/q27.benchmark b/benchmark/tpcds/sf1-delta/q27.benchmark
new file mode 100644
index 0000000..3474886
--- /dev/null
+++ b/benchmark/tpcds/sf1-delta/q27.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpcds/sf1/q27.benchmark
+# description: Run query 27 from the TPC-DS benchmark
+# group: [sf1]
+
+template benchmark/tpcds/sf1-delta/tpcds_sf1.benchmark.in
+QUERY_NUMBER=27
+QUERY_NUMBER_PADDED=27
diff --git a/benchmark/tpcds/sf1-delta/q28.benchmark b/benchmark/tpcds/sf1-delta/q28.benchmark
new file mode 100644
index 0000000..9dea5b9
--- /dev/null
+++ b/benchmark/tpcds/sf1-delta/q28.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpcds/sf1/q28.benchmark
+# description: Run query 28 from the TPC-DS benchmark
+# group: [sf1]
+
+template benchmark/tpcds/sf1-delta/tpcds_sf1.benchmark.in
+QUERY_NUMBER=28
+QUERY_NUMBER_PADDED=28
diff --git a/benchmark/tpcds/sf1-delta/q29.benchmark b/benchmark/tpcds/sf1-delta/q29.benchmark
new file mode 100644
index 0000000..22247ea
--- /dev/null
+++ b/benchmark/tpcds/sf1-delta/q29.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpcds/sf1/q29.benchmark
+# description: Run query 29 from the TPC-DS benchmark
+# group: [sf1]
+
+template benchmark/tpcds/sf1-delta/tpcds_sf1.benchmark.in
+QUERY_NUMBER=29
+QUERY_NUMBER_PADDED=29
diff --git a/benchmark/tpcds/sf1-delta/q30.benchmark b/benchmark/tpcds/sf1-delta/q30.benchmark
new file mode 100644
index 0000000..b41d4ad
--- /dev/null
+++ b/benchmark/tpcds/sf1-delta/q30.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpcds/sf1/q30.benchmark
+# description: Run query 30 from the TPC-DS benchmark
+# group: [sf1]
+
+template benchmark/tpcds/sf1-delta/tpcds_sf1.benchmark.in
+QUERY_NUMBER=30
+QUERY_NUMBER_PADDED=30
diff --git a/benchmark/tpcds/sf1-delta/q31.benchmark b/benchmark/tpcds/sf1-delta/q31.benchmark
new file mode 100644
index 0000000..92a0306
--- /dev/null
+++ b/benchmark/tpcds/sf1-delta/q31.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpcds/sf1/q31.benchmark
+# description: Run query 31 from the TPC-DS benchmark
+# group: [sf1]
+
+template benchmark/tpcds/sf1-delta/tpcds_sf1.benchmark.in
+QUERY_NUMBER=31
+QUERY_NUMBER_PADDED=31
diff --git a/benchmark/tpcds/sf1-delta/q32.benchmark b/benchmark/tpcds/sf1-delta/q32.benchmark
new file mode 100644
index 0000000..57ebee9
--- /dev/null
+++ b/benchmark/tpcds/sf1-delta/q32.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpcds/sf1/q32.benchmark
+# description: Run query 32 from the TPC-DS benchmark
+# group: [sf1]
+
+template benchmark/tpcds/sf1-delta/tpcds_sf1.benchmark.in
+QUERY_NUMBER=32
+QUERY_NUMBER_PADDED=32
diff --git a/benchmark/tpcds/sf1-delta/q33.benchmark b/benchmark/tpcds/sf1-delta/q33.benchmark
new file mode 100644
index 0000000..32c0479
--- /dev/null
+++ b/benchmark/tpcds/sf1-delta/q33.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpcds/sf1/q33.benchmark
+# description: Run query 33 from the TPC-DS benchmark
+# group: [sf1]
+
+template benchmark/tpcds/sf1-delta/tpcds_sf1.benchmark.in
+QUERY_NUMBER=33
+QUERY_NUMBER_PADDED=33
diff --git a/benchmark/tpcds/sf1-delta/q34.benchmark b/benchmark/tpcds/sf1-delta/q34.benchmark
new file mode 100644
index 0000000..0a034c4
--- /dev/null
+++ b/benchmark/tpcds/sf1-delta/q34.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpcds/sf1/q34.benchmark
+# description: Run query 34 from the TPC-DS benchmark
+# group: [sf1]
+
+template benchmark/tpcds/sf1-delta/tpcds_sf1.benchmark.in
+QUERY_NUMBER=34
+QUERY_NUMBER_PADDED=34
diff --git a/benchmark/tpcds/sf1-delta/q35.benchmark b/benchmark/tpcds/sf1-delta/q35.benchmark
new file mode 100644
index 0000000..7ee5bb2
--- /dev/null
+++ b/benchmark/tpcds/sf1-delta/q35.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpcds/sf1/q35.benchmark
+# description: Run query 35 from the TPC-DS benchmark
+# group: [sf1]
+
+template benchmark/tpcds/sf1-delta/tpcds_sf1.benchmark.in
+QUERY_NUMBER=35
+QUERY_NUMBER_PADDED=35
diff --git a/benchmark/tpcds/sf1-delta/q36.benchmark b/benchmark/tpcds/sf1-delta/q36.benchmark
new file mode 100644
index 0000000..f7b3fc0
--- /dev/null
+++ b/benchmark/tpcds/sf1-delta/q36.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpcds/sf1/q36.benchmark
+# description: Run query 36 from the TPC-DS benchmark
+# group: [sf1]
+
+template benchmark/tpcds/sf1-delta/tpcds_sf1.benchmark.in
+QUERY_NUMBER=36
+QUERY_NUMBER_PADDED=36
diff --git a/benchmark/tpcds/sf1-delta/q37.benchmark b/benchmark/tpcds/sf1-delta/q37.benchmark
new file mode 100644
index 0000000..e482a3d
--- /dev/null
+++ b/benchmark/tpcds/sf1-delta/q37.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpcds/sf1/q37.benchmark
+# description: Run query 37 from the TPC-DS benchmark
+# group: [sf1]
+
+template benchmark/tpcds/sf1-delta/tpcds_sf1.benchmark.in
+QUERY_NUMBER=37
+QUERY_NUMBER_PADDED=37
diff --git a/benchmark/tpcds/sf1-delta/q38.benchmark b/benchmark/tpcds/sf1-delta/q38.benchmark
new file mode 100644
index 0000000..120d4d1
--- /dev/null
+++ b/benchmark/tpcds/sf1-delta/q38.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpcds/sf1/q38.benchmark
+# description: Run query 38 from the TPC-DS benchmark
+# group: [sf1]
+
+template benchmark/tpcds/sf1-delta/tpcds_sf1.benchmark.in
+QUERY_NUMBER=38
+QUERY_NUMBER_PADDED=38
diff --git a/benchmark/tpcds/sf1-delta/q39.benchmark b/benchmark/tpcds/sf1-delta/q39.benchmark
new file mode 100644
index 0000000..9216a08
--- /dev/null
+++ b/benchmark/tpcds/sf1-delta/q39.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpcds/sf1/q39.benchmark
+# description: Run query 39 from the TPC-DS benchmark
+# group: [sf1]
+
+template benchmark/tpcds/sf1-delta/tpcds_sf1.benchmark.in
+QUERY_NUMBER=39
+QUERY_NUMBER_PADDED=39
diff --git a/benchmark/tpcds/sf1-delta/q40.benchmark b/benchmark/tpcds/sf1-delta/q40.benchmark
new file mode 100644
index 0000000..7b78045
--- /dev/null
+++ b/benchmark/tpcds/sf1-delta/q40.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpcds/sf1/q40.benchmark
+# description: Run query 40 from the TPC-DS benchmark
+# group: [sf1]
+
+template benchmark/tpcds/sf1-delta/tpcds_sf1.benchmark.in
+QUERY_NUMBER=40
+QUERY_NUMBER_PADDED=40
diff --git a/benchmark/tpcds/sf1-delta/q41.benchmark b/benchmark/tpcds/sf1-delta/q41.benchmark
new file mode 100644
index 0000000..03c041f
--- /dev/null
+++ b/benchmark/tpcds/sf1-delta/q41.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpcds/sf1/q41.benchmark
+# description: Run query 41 from the TPC-DS benchmark
+# group: [sf1]
+
+template benchmark/tpcds/sf1-delta/tpcds_sf1.benchmark.in
+QUERY_NUMBER=41
+QUERY_NUMBER_PADDED=41
diff --git a/benchmark/tpcds/sf1-delta/q42.benchmark b/benchmark/tpcds/sf1-delta/q42.benchmark
new file mode 100644
index 0000000..80eabea
--- /dev/null
+++ b/benchmark/tpcds/sf1-delta/q42.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpcds/sf1/q42.benchmark
+# description: Run query 42 from the TPC-DS benchmark
+# group: [sf1]
+
+template benchmark/tpcds/sf1-delta/tpcds_sf1.benchmark.in
+QUERY_NUMBER=42
+QUERY_NUMBER_PADDED=42
diff --git a/benchmark/tpcds/sf1-delta/q43.benchmark b/benchmark/tpcds/sf1-delta/q43.benchmark
new file mode 100644
index 0000000..2b0e27a
--- /dev/null
+++ b/benchmark/tpcds/sf1-delta/q43.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpcds/sf1/q43.benchmark
+# description: Run query 43 from the TPC-DS benchmark
+# group: [sf1]
+
+template benchmark/tpcds/sf1-delta/tpcds_sf1.benchmark.in
+QUERY_NUMBER=43
+QUERY_NUMBER_PADDED=43
diff --git a/benchmark/tpcds/sf1-delta/q44.benchmark b/benchmark/tpcds/sf1-delta/q44.benchmark
new file mode 100644
index 0000000..6e2571a
--- /dev/null
+++ b/benchmark/tpcds/sf1-delta/q44.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpcds/sf1/q44.benchmark
+# description: Run query 44 from the TPC-DS benchmark
+# group: [sf1]
+
+template benchmark/tpcds/sf1-delta/tpcds_sf1.benchmark.in
+QUERY_NUMBER=44
+QUERY_NUMBER_PADDED=44
diff --git a/benchmark/tpcds/sf1-delta/q45.benchmark b/benchmark/tpcds/sf1-delta/q45.benchmark
new file mode 100644
index 0000000..bc733aa
--- /dev/null
+++ b/benchmark/tpcds/sf1-delta/q45.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpcds/sf1/q45.benchmark
+# description: Run query 45 from the TPC-DS benchmark
+# group: [sf1]
+
+template benchmark/tpcds/sf1-delta/tpcds_sf1.benchmark.in
+QUERY_NUMBER=45
+QUERY_NUMBER_PADDED=45
diff --git a/benchmark/tpcds/sf1-delta/q46.benchmark b/benchmark/tpcds/sf1-delta/q46.benchmark
new file mode 100644
index 0000000..338d6c2
--- /dev/null
+++ b/benchmark/tpcds/sf1-delta/q46.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpcds/sf1/q46.benchmark
+# description: Run query 46 from the TPC-DS benchmark
+# group: [sf1]
+
+template benchmark/tpcds/sf1-delta/tpcds_sf1.benchmark.in
+QUERY_NUMBER=46
+QUERY_NUMBER_PADDED=46
diff --git a/benchmark/tpcds/sf1-delta/q47.benchmark b/benchmark/tpcds/sf1-delta/q47.benchmark
new file mode 100644
index 0000000..0031c36
--- /dev/null
+++ b/benchmark/tpcds/sf1-delta/q47.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpcds/sf1/q47.benchmark
+# description: Run query 47 from the TPC-DS benchmark
+# group: [sf1]
+
+template benchmark/tpcds/sf1-delta/tpcds_sf1.benchmark.in
+QUERY_NUMBER=47
+QUERY_NUMBER_PADDED=47
diff --git a/benchmark/tpcds/sf1-delta/q48.benchmark b/benchmark/tpcds/sf1-delta/q48.benchmark
new file mode 100644
index 0000000..dba4376
--- /dev/null
+++ b/benchmark/tpcds/sf1-delta/q48.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpcds/sf1/q48.benchmark
+# description: Run query 48 from the TPC-DS benchmark
+# group: [sf1]
+
+template benchmark/tpcds/sf1-delta/tpcds_sf1.benchmark.in
+QUERY_NUMBER=48
+QUERY_NUMBER_PADDED=48
diff --git a/benchmark/tpcds/sf1-delta/q49.benchmark b/benchmark/tpcds/sf1-delta/q49.benchmark
new file mode 100644
index 0000000..e87d59d
--- /dev/null
+++ b/benchmark/tpcds/sf1-delta/q49.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpcds/sf1/q49.benchmark
+# description: Run query 49 from the TPC-DS benchmark
+# group: [sf1]
+
+template benchmark/tpcds/sf1-delta/tpcds_sf1.benchmark.in
+QUERY_NUMBER=49
+QUERY_NUMBER_PADDED=49
diff --git a/benchmark/tpcds/sf1-delta/q50.benchmark b/benchmark/tpcds/sf1-delta/q50.benchmark
new file mode 100644
index 0000000..994ca01
--- /dev/null
+++ b/benchmark/tpcds/sf1-delta/q50.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpcds/sf1/q50.benchmark
+# description: Run query 50 from the TPC-DS benchmark
+# group: [sf1]
+
+template benchmark/tpcds/sf1-delta/tpcds_sf1.benchmark.in
+QUERY_NUMBER=50
+QUERY_NUMBER_PADDED=50
diff --git a/benchmark/tpcds/sf1-delta/q51.benchmark b/benchmark/tpcds/sf1-delta/q51.benchmark
new file mode 100644
index 0000000..f684809
--- /dev/null
+++ b/benchmark/tpcds/sf1-delta/q51.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpcds/sf1/q51.benchmark
+# description: Run query 51 from the TPC-DS benchmark
+# group: [sf1]
+
+template benchmark/tpcds/sf1-delta/tpcds_sf1.benchmark.in
+QUERY_NUMBER=51
+QUERY_NUMBER_PADDED=51
diff --git a/benchmark/tpcds/sf1-delta/q52.benchmark b/benchmark/tpcds/sf1-delta/q52.benchmark
new file mode 100644
index 0000000..d10ac68
--- /dev/null
+++ b/benchmark/tpcds/sf1-delta/q52.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpcds/sf1/q52.benchmark
+# description: Run query 52 from the TPC-DS benchmark
+# group: [sf1]
+
+template benchmark/tpcds/sf1-delta/tpcds_sf1.benchmark.in
+QUERY_NUMBER=52
+QUERY_NUMBER_PADDED=52
diff --git a/benchmark/tpcds/sf1-delta/q53.benchmark b/benchmark/tpcds/sf1-delta/q53.benchmark
new file mode 100644
index 0000000..209bb6c
--- /dev/null
+++ b/benchmark/tpcds/sf1-delta/q53.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpcds/sf1/q53.benchmark
+# description: Run query 53 from the TPC-DS benchmark
+# group: [sf1]
+
+template benchmark/tpcds/sf1-delta/tpcds_sf1.benchmark.in
+QUERY_NUMBER=53
+QUERY_NUMBER_PADDED=53
diff --git a/benchmark/tpcds/sf1-delta/q54.benchmark b/benchmark/tpcds/sf1-delta/q54.benchmark
new file mode 100644
index 0000000..bac8abd
--- /dev/null
+++ b/benchmark/tpcds/sf1-delta/q54.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpcds/sf1/q54.benchmark
+# description: Run query 54 from the TPC-DS benchmark
+# group: [sf1]
+
+template benchmark/tpcds/sf1-delta/tpcds_sf1.benchmark.in
+QUERY_NUMBER=54
+QUERY_NUMBER_PADDED=54
diff --git a/benchmark/tpcds/sf1-delta/q55.benchmark b/benchmark/tpcds/sf1-delta/q55.benchmark
new file mode 100644
index 0000000..cf9cfde
--- /dev/null
+++ b/benchmark/tpcds/sf1-delta/q55.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpcds/sf1/q55.benchmark
+# description: Run query 55 from the TPC-DS benchmark
+# group: [sf1]
+
+template benchmark/tpcds/sf1-delta/tpcds_sf1.benchmark.in
+QUERY_NUMBER=55
+QUERY_NUMBER_PADDED=55
diff --git a/benchmark/tpcds/sf1-delta/q56.benchmark b/benchmark/tpcds/sf1-delta/q56.benchmark
new file mode 100644
index 0000000..3658451
--- /dev/null
+++ b/benchmark/tpcds/sf1-delta/q56.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpcds/sf1/q56.benchmark
+# description: Run query 56 from the TPC-DS benchmark
+# group: [sf1]
+
+template benchmark/tpcds/sf1-delta/tpcds_sf1.benchmark.in
+QUERY_NUMBER=56
+QUERY_NUMBER_PADDED=56
diff --git a/benchmark/tpcds/sf1-delta/q57.benchmark b/benchmark/tpcds/sf1-delta/q57.benchmark
new file mode 100644
index 0000000..6730b17
--- /dev/null
+++ b/benchmark/tpcds/sf1-delta/q57.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpcds/sf1/q57.benchmark
+# description: Run query 57 from the TPC-DS benchmark
+# group: [sf1]
+
+template benchmark/tpcds/sf1-delta/tpcds_sf1.benchmark.in
+QUERY_NUMBER=57
+QUERY_NUMBER_PADDED=57
diff --git a/benchmark/tpcds/sf1-delta/q58.benchmark b/benchmark/tpcds/sf1-delta/q58.benchmark
new file mode 100644
index 0000000..e462efb
--- /dev/null
+++ b/benchmark/tpcds/sf1-delta/q58.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpcds/sf1/q58.benchmark
+# description: Run query 58 from the TPC-DS benchmark
+# group: [sf1]
+
+template benchmark/tpcds/sf1-delta/tpcds_sf1.benchmark.in
+QUERY_NUMBER=58
+QUERY_NUMBER_PADDED=58
diff --git a/benchmark/tpcds/sf1-delta/q59.benchmark b/benchmark/tpcds/sf1-delta/q59.benchmark
new file mode 100644
index 0000000..5281155
--- /dev/null
+++ b/benchmark/tpcds/sf1-delta/q59.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpcds/sf1/q59.benchmark
+# description: Run query 59 from the TPC-DS benchmark
+# group: [sf1]
+
+template benchmark/tpcds/sf1-delta/tpcds_sf1.benchmark.in
+QUERY_NUMBER=59
+QUERY_NUMBER_PADDED=59
diff --git a/benchmark/tpcds/sf1-delta/q60.benchmark b/benchmark/tpcds/sf1-delta/q60.benchmark
new file mode 100644
index 0000000..71be7db
--- /dev/null
+++ b/benchmark/tpcds/sf1-delta/q60.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpcds/sf1/q60.benchmark
+# description: Run query 60 from the TPC-DS benchmark
+# group: [sf1]
+
+template benchmark/tpcds/sf1-delta/tpcds_sf1.benchmark.in
+QUERY_NUMBER=60
+QUERY_NUMBER_PADDED=60
diff --git a/benchmark/tpcds/sf1-delta/q61.benchmark b/benchmark/tpcds/sf1-delta/q61.benchmark
new file mode 100644
index 0000000..95506eb
--- /dev/null
+++ b/benchmark/tpcds/sf1-delta/q61.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpcds/sf1/q61.benchmark
+# description: Run query 61 from the TPC-DS benchmark
+# group: [sf1]
+
+template benchmark/tpcds/sf1-delta/tpcds_sf1.benchmark.in
+QUERY_NUMBER=61
+QUERY_NUMBER_PADDED=61
diff --git a/benchmark/tpcds/sf1-delta/q62.benchmark b/benchmark/tpcds/sf1-delta/q62.benchmark
new file mode 100644
index 0000000..349e4e9
--- /dev/null
+++ b/benchmark/tpcds/sf1-delta/q62.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpcds/sf1/q62.benchmark
+# description: Run query 62 from the TPC-DS benchmark
+# group: [sf1]
+
+template benchmark/tpcds/sf1-delta/tpcds_sf1.benchmark.in
+QUERY_NUMBER=62
+QUERY_NUMBER_PADDED=62
diff --git a/benchmark/tpcds/sf1-delta/q63.benchmark b/benchmark/tpcds/sf1-delta/q63.benchmark
new file mode 100644
index 0000000..6a03287
--- /dev/null
+++ b/benchmark/tpcds/sf1-delta/q63.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpcds/sf1/q63.benchmark
+# description: Run query 63 from the TPC-DS benchmark
+# group: [sf1]
+
+template benchmark/tpcds/sf1-delta/tpcds_sf1.benchmark.in
+QUERY_NUMBER=63
+QUERY_NUMBER_PADDED=63
diff --git a/benchmark/tpcds/sf1-delta/q64.benchmark b/benchmark/tpcds/sf1-delta/q64.benchmark
new file mode 100644
index 0000000..3c3c968
--- /dev/null
+++ b/benchmark/tpcds/sf1-delta/q64.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpcds/sf1/q64.benchmark
+# description: Run query 64 from the TPC-DS benchmark
+# group: [sf1]
+
+template benchmark/tpcds/sf1-delta/tpcds_sf1.benchmark.in
+QUERY_NUMBER=64
+QUERY_NUMBER_PADDED=64
diff --git a/benchmark/tpcds/sf1-delta/q65.benchmark b/benchmark/tpcds/sf1-delta/q65.benchmark
new file mode 100644
index 0000000..8e4d181
--- /dev/null
+++ b/benchmark/tpcds/sf1-delta/q65.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpcds/sf1/q65.benchmark
+# description: Run query 65 from the TPC-DS benchmark
+# group: [sf1]
+
+template benchmark/tpcds/sf1-delta/tpcds_sf1.benchmark.in
+QUERY_NUMBER=65
+QUERY_NUMBER_PADDED=65
diff --git a/benchmark/tpcds/sf1-delta/q66.benchmark b/benchmark/tpcds/sf1-delta/q66.benchmark
new file mode 100644
index 0000000..c0130f2
--- /dev/null
+++ b/benchmark/tpcds/sf1-delta/q66.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpcds/sf1/q66.benchmark
+# description: Run query 66 from the TPC-DS benchmark
+# group: [sf1]
+
+template benchmark/tpcds/sf1-delta/tpcds_sf1.benchmark.in
+QUERY_NUMBER=66
+QUERY_NUMBER_PADDED=66
diff --git a/benchmark/tpcds/sf1-delta/q67.benchmark b/benchmark/tpcds/sf1-delta/q67.benchmark
new file mode 100644
index 0000000..3aa7a26
--- /dev/null
+++ b/benchmark/tpcds/sf1-delta/q67.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpcds/sf1/q67.benchmark
+# description: Run query 67 from the TPC-DS benchmark
+# group: [sf1]
+
+template benchmark/tpcds/sf1-delta/tpcds_sf1.benchmark.in
+QUERY_NUMBER=67
+QUERY_NUMBER_PADDED=67
diff --git a/benchmark/tpcds/sf1-delta/q68.benchmark b/benchmark/tpcds/sf1-delta/q68.benchmark
new file mode 100644
index 0000000..faf1c29
--- /dev/null
+++ b/benchmark/tpcds/sf1-delta/q68.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpcds/sf1/q68.benchmark
+# description: Run query 68 from the TPC-DS benchmark
+# group: [sf1]
+
+template benchmark/tpcds/sf1-delta/tpcds_sf1.benchmark.in
+QUERY_NUMBER=68
+QUERY_NUMBER_PADDED=68
diff --git a/benchmark/tpcds/sf1-delta/q69.benchmark b/benchmark/tpcds/sf1-delta/q69.benchmark
new file mode 100644
index 0000000..bd36138
--- /dev/null
+++ b/benchmark/tpcds/sf1-delta/q69.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpcds/sf1/q69.benchmark
+# description: Run query 69 from the TPC-DS benchmark
+# group: [sf1]
+
+template benchmark/tpcds/sf1-delta/tpcds_sf1.benchmark.in
+QUERY_NUMBER=69
+QUERY_NUMBER_PADDED=69
diff --git a/benchmark/tpcds/sf1-delta/q70.benchmark b/benchmark/tpcds/sf1-delta/q70.benchmark
new file mode 100644
index 0000000..f5a4e89
--- /dev/null
+++ b/benchmark/tpcds/sf1-delta/q70.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpcds/sf1/q70.benchmark
+# description: Run query 70 from the TPC-DS benchmark
+# group: [sf1]
+
+template benchmark/tpcds/sf1-delta/tpcds_sf1.benchmark.in
+QUERY_NUMBER=70
+QUERY_NUMBER_PADDED=70
diff --git a/benchmark/tpcds/sf1-delta/q71.benchmark b/benchmark/tpcds/sf1-delta/q71.benchmark
new file mode 100644
index 0000000..9eab10a
--- /dev/null
+++ b/benchmark/tpcds/sf1-delta/q71.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpcds/sf1/q71.benchmark
+# description: Run query 71 from the TPC-DS benchmark
+# group: [sf1]
+
+template benchmark/tpcds/sf1-delta/tpcds_sf1.benchmark.in
+QUERY_NUMBER=71
+QUERY_NUMBER_PADDED=71
diff --git a/benchmark/tpcds/sf1-delta/q72.benchmark b/benchmark/tpcds/sf1-delta/q72.benchmark
new file mode 100644
index 0000000..e07b4a0
--- /dev/null
+++ b/benchmark/tpcds/sf1-delta/q72.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpcds/sf1/q72.benchmark
+# description: Run query 72 from the TPC-DS benchmark
+# group: [sf1]
+
+template benchmark/tpcds/sf1-delta/tpcds_sf1.benchmark.in
+QUERY_NUMBER=72
+QUERY_NUMBER_PADDED=72
diff --git a/benchmark/tpcds/sf1-delta/q73.benchmark b/benchmark/tpcds/sf1-delta/q73.benchmark
new file mode 100644
index 0000000..2a4205b
--- /dev/null
+++ b/benchmark/tpcds/sf1-delta/q73.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpcds/sf1/q73.benchmark
+# description: Run query 73 from the TPC-DS benchmark
+# group: [sf1]
+
+template benchmark/tpcds/sf1-delta/tpcds_sf1.benchmark.in
+QUERY_NUMBER=73
+QUERY_NUMBER_PADDED=73
diff --git a/benchmark/tpcds/sf1-delta/q74.benchmark b/benchmark/tpcds/sf1-delta/q74.benchmark
new file mode 100644
index 0000000..6b51bb4
--- /dev/null
+++ b/benchmark/tpcds/sf1-delta/q74.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpcds/sf1/q74.benchmark
+# description: Run query 74 from the TPC-DS benchmark
+# group: [sf1]
+
+template benchmark/tpcds/sf1-delta/tpcds_sf1.benchmark.in
+QUERY_NUMBER=74
+QUERY_NUMBER_PADDED=74
diff --git a/benchmark/tpcds/sf1-delta/q75.benchmark b/benchmark/tpcds/sf1-delta/q75.benchmark
new file mode 100644
index 0000000..1ec1156
--- /dev/null
+++ b/benchmark/tpcds/sf1-delta/q75.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpcds/sf1/q75.benchmark
+# description: Run query 75 from the TPC-DS benchmark
+# group: [sf1]
+
+template benchmark/tpcds/sf1-delta/tpcds_sf1.benchmark.in
+QUERY_NUMBER=75
+QUERY_NUMBER_PADDED=75
diff --git a/benchmark/tpcds/sf1-delta/q76.benchmark b/benchmark/tpcds/sf1-delta/q76.benchmark
new file mode 100644
index 0000000..1c50fa8
--- /dev/null
+++ b/benchmark/tpcds/sf1-delta/q76.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpcds/sf1/q76.benchmark
+# description: Run query 76 from the TPC-DS benchmark
+# group: [sf1]
+
+template benchmark/tpcds/sf1-delta/tpcds_sf1.benchmark.in
+QUERY_NUMBER=76
+QUERY_NUMBER_PADDED=76
diff --git a/benchmark/tpcds/sf1-delta/q77.benchmark b/benchmark/tpcds/sf1-delta/q77.benchmark
new file mode 100644
index 0000000..d4751c3
--- /dev/null
+++ b/benchmark/tpcds/sf1-delta/q77.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpcds/sf1/q77.benchmark
+# description: Run query 77 from the TPC-DS benchmark
+# group: [sf1]
+
+template benchmark/tpcds/sf1-delta/tpcds_sf1.benchmark.in
+QUERY_NUMBER=77
+QUERY_NUMBER_PADDED=77
diff --git a/benchmark/tpcds/sf1-delta/q78.benchmark b/benchmark/tpcds/sf1-delta/q78.benchmark
new file mode 100644
index 0000000..e820401
--- /dev/null
+++ b/benchmark/tpcds/sf1-delta/q78.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpcds/sf1/q78.benchmark
+# description: Run query 78 from the TPC-DS benchmark
+# group: [sf1]
+
+template benchmark/tpcds/sf1-delta/tpcds_sf1.benchmark.in
+QUERY_NUMBER=78
+QUERY_NUMBER_PADDED=78
diff --git a/benchmark/tpcds/sf1-delta/q79.benchmark b/benchmark/tpcds/sf1-delta/q79.benchmark
new file mode 100644
index 0000000..58b820f
--- /dev/null
+++ b/benchmark/tpcds/sf1-delta/q79.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpcds/sf1/q79.benchmark
+# description: Run query 79 from the TPC-DS benchmark
+# group: [sf1]
+
+template benchmark/tpcds/sf1-delta/tpcds_sf1.benchmark.in
+QUERY_NUMBER=79
+QUERY_NUMBER_PADDED=79
diff --git a/benchmark/tpcds/sf1-delta/q80.benchmark b/benchmark/tpcds/sf1-delta/q80.benchmark
new file mode 100644
index 0000000..1815a97
--- /dev/null
+++ b/benchmark/tpcds/sf1-delta/q80.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpcds/sf1/q80.benchmark
+# description: Run query 80 from the TPC-DS benchmark
+# group: [sf1]
+
+template benchmark/tpcds/sf1-delta/tpcds_sf1.benchmark.in
+QUERY_NUMBER=80
+QUERY_NUMBER_PADDED=80
diff --git a/benchmark/tpcds/sf1-delta/q81.benchmark b/benchmark/tpcds/sf1-delta/q81.benchmark
new file mode 100644
index 0000000..9a07b52
--- /dev/null
+++ b/benchmark/tpcds/sf1-delta/q81.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpcds/sf1/q81.benchmark
+# description: Run query 81 from the TPC-DS benchmark
+# group: [sf1]
+
+template benchmark/tpcds/sf1-delta/tpcds_sf1.benchmark.in
+QUERY_NUMBER=81
+QUERY_NUMBER_PADDED=81
diff --git a/benchmark/tpcds/sf1-delta/q82.benchmark b/benchmark/tpcds/sf1-delta/q82.benchmark
new file mode 100644
index 0000000..6c27933
--- /dev/null
+++ b/benchmark/tpcds/sf1-delta/q82.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpcds/sf1/q82.benchmark
+# description: Run query 82 from the TPC-DS benchmark
+# group: [sf1]
+
+template benchmark/tpcds/sf1-delta/tpcds_sf1.benchmark.in
+QUERY_NUMBER=82
+QUERY_NUMBER_PADDED=82
diff --git a/benchmark/tpcds/sf1-delta/q83.benchmark b/benchmark/tpcds/sf1-delta/q83.benchmark
new file mode 100644
index 0000000..96dd1f3
--- /dev/null
+++ b/benchmark/tpcds/sf1-delta/q83.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpcds/sf1/q83.benchmark
+# description: Run query 83 from the TPC-DS benchmark
+# group: [sf1]
+
+template benchmark/tpcds/sf1-delta/tpcds_sf1.benchmark.in
+QUERY_NUMBER=83
+QUERY_NUMBER_PADDED=83
diff --git a/benchmark/tpcds/sf1-delta/q84.benchmark b/benchmark/tpcds/sf1-delta/q84.benchmark
new file mode 100644
index 0000000..1c90d12
--- /dev/null
+++ b/benchmark/tpcds/sf1-delta/q84.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpcds/sf1/q84.benchmark
+# description: Run query 84 from the TPC-DS benchmark
+# group: [sf1]
+
+template benchmark/tpcds/sf1-delta/tpcds_sf1.benchmark.in
+QUERY_NUMBER=84
+QUERY_NUMBER_PADDED=84
diff --git a/benchmark/tpcds/sf1-delta/q85.benchmark b/benchmark/tpcds/sf1-delta/q85.benchmark
new file mode 100644
index 0000000..9687a23
--- /dev/null
+++ b/benchmark/tpcds/sf1-delta/q85.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpcds/sf1/q85.benchmark
+# description: Run query 85 from the TPC-DS benchmark
+# group: [sf1]
+
+template benchmark/tpcds/sf1-delta/tpcds_sf1.benchmark.in
+QUERY_NUMBER=85
+QUERY_NUMBER_PADDED=85
diff --git a/benchmark/tpcds/sf1-delta/q86.benchmark b/benchmark/tpcds/sf1-delta/q86.benchmark
new file mode 100644
index 0000000..b2c74c8
--- /dev/null
+++ b/benchmark/tpcds/sf1-delta/q86.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpcds/sf1/q86.benchmark
+# description: Run query 86 from the TPC-DS benchmark
+# group: [sf1]
+
+template benchmark/tpcds/sf1-delta/tpcds_sf1.benchmark.in
+QUERY_NUMBER=86
+QUERY_NUMBER_PADDED=86
diff --git a/benchmark/tpcds/sf1-delta/q87.benchmark b/benchmark/tpcds/sf1-delta/q87.benchmark
new file mode 100644
index 0000000..95a55c5
--- /dev/null
+++ b/benchmark/tpcds/sf1-delta/q87.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpcds/sf1/q87.benchmark
+# description: Run query 87 from the TPC-DS benchmark
+# group: [sf1]
+
+template benchmark/tpcds/sf1-delta/tpcds_sf1.benchmark.in
+QUERY_NUMBER=87
+QUERY_NUMBER_PADDED=87
diff --git a/benchmark/tpcds/sf1-delta/q88.benchmark b/benchmark/tpcds/sf1-delta/q88.benchmark
new file mode 100644
index 0000000..c6de497
--- /dev/null
+++ b/benchmark/tpcds/sf1-delta/q88.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpcds/sf1/q88.benchmark
+# description: Run query 88 from the TPC-DS benchmark
+# group: [sf1]
+
+template benchmark/tpcds/sf1-delta/tpcds_sf1.benchmark.in
+QUERY_NUMBER=88
+QUERY_NUMBER_PADDED=88
diff --git a/benchmark/tpcds/sf1-delta/q89.benchmark b/benchmark/tpcds/sf1-delta/q89.benchmark
new file mode 100644
index 0000000..f7bb181
--- /dev/null
+++ b/benchmark/tpcds/sf1-delta/q89.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpcds/sf1/q89.benchmark
+# description: Run query 89 from the TPC-DS benchmark
+# group: [sf1]
+
+template benchmark/tpcds/sf1-delta/tpcds_sf1.benchmark.in
+QUERY_NUMBER=89
+QUERY_NUMBER_PADDED=89
diff --git a/benchmark/tpcds/sf1-delta/q90.benchmark b/benchmark/tpcds/sf1-delta/q90.benchmark
new file mode 100644
index 0000000..b641fd9
--- /dev/null
+++ b/benchmark/tpcds/sf1-delta/q90.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpcds/sf1/q90.benchmark
+# description: Run query 90 from the TPC-DS benchmark
+# group: [sf1]
+
+template benchmark/tpcds/sf1-delta/tpcds_sf1.benchmark.in
+QUERY_NUMBER=90
+QUERY_NUMBER_PADDED=90
diff --git a/benchmark/tpcds/sf1-delta/q91.benchmark b/benchmark/tpcds/sf1-delta/q91.benchmark
new file mode 100644
index 0000000..7e960d5
--- /dev/null
+++ b/benchmark/tpcds/sf1-delta/q91.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpcds/sf1/q91.benchmark
+# description: Run query 91 from the TPC-DS benchmark
+# group: [sf1]
+
+template benchmark/tpcds/sf1-delta/tpcds_sf1.benchmark.in
+QUERY_NUMBER=91
+QUERY_NUMBER_PADDED=91
diff --git a/benchmark/tpcds/sf1-delta/q92.benchmark b/benchmark/tpcds/sf1-delta/q92.benchmark
new file mode 100644
index 0000000..6486179
--- /dev/null
+++ b/benchmark/tpcds/sf1-delta/q92.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpcds/sf1/q92.benchmark
+# description: Run query 92 from the TPC-DS benchmark
+# group: [sf1]
+
+template benchmark/tpcds/sf1-delta/tpcds_sf1.benchmark.in
+QUERY_NUMBER=92
+QUERY_NUMBER_PADDED=92
diff --git a/benchmark/tpcds/sf1-delta/q93.benchmark b/benchmark/tpcds/sf1-delta/q93.benchmark
new file mode 100644
index 0000000..e42fded
--- /dev/null
+++ b/benchmark/tpcds/sf1-delta/q93.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpcds/sf1/q93.benchmark
+# description: Run query 93 from the TPC-DS benchmark
+# group: [sf1]
+
+template benchmark/tpcds/sf1-delta/tpcds_sf1.benchmark.in
+QUERY_NUMBER=93
+QUERY_NUMBER_PADDED=93
diff --git a/benchmark/tpcds/sf1-delta/q94.benchmark b/benchmark/tpcds/sf1-delta/q94.benchmark
new file mode 100644
index 0000000..13bab12
--- /dev/null
+++ b/benchmark/tpcds/sf1-delta/q94.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpcds/sf1/q94.benchmark
+# description: Run query 94 from the TPC-DS benchmark
+# group: [sf1]
+
+template benchmark/tpcds/sf1-delta/tpcds_sf1.benchmark.in
+QUERY_NUMBER=94
+QUERY_NUMBER_PADDED=94
diff --git a/benchmark/tpcds/sf1-delta/q95.benchmark b/benchmark/tpcds/sf1-delta/q95.benchmark
new file mode 100644
index 0000000..a6cffc8
--- /dev/null
+++ b/benchmark/tpcds/sf1-delta/q95.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpcds/sf1/q95.benchmark
+# description: Run query 95 from the TPC-DS benchmark
+# group: [sf1]
+
+template benchmark/tpcds/sf1-delta/tpcds_sf1.benchmark.in
+QUERY_NUMBER=95
+QUERY_NUMBER_PADDED=95
diff --git a/benchmark/tpcds/sf1-delta/q96.benchmark b/benchmark/tpcds/sf1-delta/q96.benchmark
new file mode 100644
index 0000000..10cb8ad
--- /dev/null
+++ b/benchmark/tpcds/sf1-delta/q96.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpcds/sf1/q96.benchmark
+# description: Run query 96 from the TPC-DS benchmark
+# group: [sf1]
+
+template benchmark/tpcds/sf1-delta/tpcds_sf1.benchmark.in
+QUERY_NUMBER=96
+QUERY_NUMBER_PADDED=96
diff --git a/benchmark/tpcds/sf1-delta/q97.benchmark b/benchmark/tpcds/sf1-delta/q97.benchmark
new file mode 100644
index 0000000..465959f
--- /dev/null
+++ b/benchmark/tpcds/sf1-delta/q97.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpcds/sf1/q97.benchmark
+# description: Run query 97 from the TPC-DS benchmark
+# group: [sf1]
+
+template benchmark/tpcds/sf1-delta/tpcds_sf1.benchmark.in
+QUERY_NUMBER=97
+QUERY_NUMBER_PADDED=97
diff --git a/benchmark/tpcds/sf1-delta/q98.benchmark b/benchmark/tpcds/sf1-delta/q98.benchmark
new file mode 100644
index 0000000..d57b90c
--- /dev/null
+++ b/benchmark/tpcds/sf1-delta/q98.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpcds/sf1/q98.benchmark
+# description: Run query 98 from the TPC-DS benchmark
+# group: [sf1]
+
+template benchmark/tpcds/sf1-delta/tpcds_sf1.benchmark.in
+QUERY_NUMBER=98
+QUERY_NUMBER_PADDED=98
diff --git a/benchmark/tpcds/sf1-delta/q99.benchmark b/benchmark/tpcds/sf1-delta/q99.benchmark
new file mode 100644
index 0000000..9a3cf32
--- /dev/null
+++ b/benchmark/tpcds/sf1-delta/q99.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpcds/sf1/q99.benchmark
+# description: Run query 99 from the TPC-DS benchmark
+# group: [sf1]
+
+template benchmark/tpcds/sf1-delta/tpcds_sf1.benchmark.in
+QUERY_NUMBER=99
+QUERY_NUMBER_PADDED=99
diff --git a/benchmark/tpcds/sf1-delta/tpcds_sf1.benchmark.in b/benchmark/tpcds/sf1-delta/tpcds_sf1.benchmark.in
new file mode 100644
index 0000000..57d33d1
--- /dev/null
+++ b/benchmark/tpcds/sf1-delta/tpcds_sf1.benchmark.in
@@ -0,0 +1,17 @@
+# name: ${FILE_PATH}
+# description: ${DESCRIPTION}
+# group: [tpcds-sf1]
+
+name DSQ${QUERY_NUMBER_PADDED}
+group tpcds
+subgroup sf1
+
+require delta
+
+require parquet
+
+load benchmark/tpcds/sf1-delta/load.sql
+
+run duckdb/extension/tpcds/dsdgen/queries/${QUERY_NUMBER_PADDED}.sql
+
+result duckdb/extension/tpcds/dsdgen/answers/sf1/${QUERY_NUMBER_PADDED}.csv
diff --git a/benchmark/tpcds/sf1-parquet/load.sql b/benchmark/tpcds/sf1-parquet/load.sql
new file mode 100644
index 0000000..23e75d7
--- /dev/null
+++ b/benchmark/tpcds/sf1-parquet/load.sql
@@ -0,0 +1,24 @@
+create view call_center as from parquet_scan('./data/generated/tpcds_sf1/call_center/parquet/**/*.parquet');
+create view catalog_page as from parquet_scan('./data/generated/tpcds_sf1/catalog_page/parquet/**/*.parquet');
+create view catalog_returns as from parquet_scan('./data/generated/tpcds_sf1/catalog_returns/parquet/**/*.parquet');
+create view catalog_sales as from parquet_scan('./data/generated/tpcds_sf1/catalog_sales/parquet/**/*.parquet');
+create view customer as from parquet_scan('./data/generated/tpcds_sf1/customer/parquet/**/*.parquet');
+create view customer_demographics as from parquet_scan('./data/generated/tpcds_sf1/customer_demographics/parquet/**/*.parquet');
+create view customer_address as from parquet_scan('./data/generated/tpcds_sf1/customer_address/parquet/**/*.parquet');
+create view date_dim as from parquet_scan('./data/generated/tpcds_sf1/date_dim/parquet/**/*.parquet');
+create view household_demographics as from parquet_scan('./data/generated/tpcds_sf1/household_demographics/parquet/**/*.parquet');
+create view inventory as from parquet_scan('./data/generated/tpcds_sf1/inventory/parquet/**/*.parquet');
+create view income_band as from parquet_scan('./data/generated/tpcds_sf1/income_band/parquet/**/*.parquet');
+create view item as from parquet_scan('./data/generated/tpcds_sf1/item/parquet/**/*.parquet');
+create view promotion as from parquet_scan('./data/generated/tpcds_sf1/promotion/parquet/**/*.parquet');
+create view reason as from parquet_scan('./data/generated/tpcds_sf1/reason/parquet/**/*.parquet');
+create view ship_mode as from parquet_scan('./data/generated/tpcds_sf1/ship_mode/parquet/**/*.parquet');
+create view store as from parquet_scan('./data/generated/tpcds_sf1/store/parquet/**/*.parquet');
+create view store_returns as from parquet_scan('./data/generated/tpcds_sf1/store_returns/parquet/**/*.parquet');
+create view store_sales as from parquet_scan('./data/generated/tpcds_sf1/store_sales/parquet/**/*.parquet');
+create view time_dim as from parquet_scan('./data/generated/tpcds_sf1/time_dim/parquet/**/*.parquet');
+create view warehouse as from parquet_scan('./data/generated/tpcds_sf1/warehouse/parquet/**/*.parquet');
+create view web_page as from parquet_scan('./data/generated/tpcds_sf1/web_page/parquet/**/*.parquet');
+create view web_returns as from parquet_scan('./data/generated/tpcds_sf1/web_returns/parquet/**/*.parquet');
+create view web_sales as from parquet_scan('./data/generated/tpcds_sf1/web_sales/parquet/**/*.parquet');
+create view web_site as from parquet_scan('./data/generated/tpcds_sf1/web_site/parquet/**/*.parquet');
\ No newline at end of file
diff --git a/benchmark/tpcds/sf1-parquet/q01.benchmark b/benchmark/tpcds/sf1-parquet/q01.benchmark
new file mode 100644
index 0000000..80cfeec
--- /dev/null
+++ b/benchmark/tpcds/sf1-parquet/q01.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpcds/sf1/q01.benchmark
+# description: Run query 01 from the TPC-DS benchmark
+# group: [sf1]
+
+template benchmark/tpcds/sf1-parquet/tpcds_sf1.benchmark.in
+QUERY_NUMBER=1
+QUERY_NUMBER_PADDED=01
diff --git a/benchmark/tpcds/sf1-parquet/q02.benchmark b/benchmark/tpcds/sf1-parquet/q02.benchmark
new file mode 100644
index 0000000..fe7d0fd
--- /dev/null
+++ b/benchmark/tpcds/sf1-parquet/q02.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpcds/sf1/q02.benchmark
+# description: Run query 02 from the TPC-DS benchmark
+# group: [sf1]
+
+template benchmark/tpcds/sf1-parquet/tpcds_sf1.benchmark.in
+QUERY_NUMBER=2
+QUERY_NUMBER_PADDED=02
diff --git a/benchmark/tpcds/sf1-parquet/q03.benchmark b/benchmark/tpcds/sf1-parquet/q03.benchmark
new file mode 100644
index 0000000..214ed35
--- /dev/null
+++ b/benchmark/tpcds/sf1-parquet/q03.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpcds/sf1/q03.benchmark
+# description: Run query 03 from the TPC-DS benchmark
+# group: [sf1]
+
+template benchmark/tpcds/sf1-parquet/tpcds_sf1.benchmark.in
+QUERY_NUMBER=3
+QUERY_NUMBER_PADDED=03
diff --git a/benchmark/tpcds/sf1-parquet/q04.benchmark b/benchmark/tpcds/sf1-parquet/q04.benchmark
new file mode 100644
index 0000000..246e1fb
--- /dev/null
+++ b/benchmark/tpcds/sf1-parquet/q04.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpcds/sf1/q04.benchmark
+# description: Run query 04 from the TPC-DS benchmark
+# group: [sf1]
+
+template benchmark/tpcds/sf1-parquet/tpcds_sf1.benchmark.in
+QUERY_NUMBER=4
+QUERY_NUMBER_PADDED=04
diff --git a/benchmark/tpcds/sf1-parquet/q05.benchmark b/benchmark/tpcds/sf1-parquet/q05.benchmark
new file mode 100644
index 0000000..1ea004b
--- /dev/null
+++ b/benchmark/tpcds/sf1-parquet/q05.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpcds/sf1/q05.benchmark
+# description: Run query 05 from the TPC-DS benchmark
+# group: [sf1]
+
+template benchmark/tpcds/sf1-parquet/tpcds_sf1.benchmark.in
+QUERY_NUMBER=5
+QUERY_NUMBER_PADDED=05
diff --git a/benchmark/tpcds/sf1-parquet/q06.benchmark b/benchmark/tpcds/sf1-parquet/q06.benchmark
new file mode 100644
index 0000000..4f887b2
--- /dev/null
+++ b/benchmark/tpcds/sf1-parquet/q06.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpcds/sf1/q06.benchmark
+# description: Run query 06 from the TPC-DS benchmark
+# group: [sf1]
+
+template benchmark/tpcds/sf1-parquet/tpcds_sf1.benchmark.in
+QUERY_NUMBER=6
+QUERY_NUMBER_PADDED=06
diff --git a/benchmark/tpcds/sf1-parquet/q07.benchmark b/benchmark/tpcds/sf1-parquet/q07.benchmark
new file mode 100644
index 0000000..1d116c9
--- /dev/null
+++ b/benchmark/tpcds/sf1-parquet/q07.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpcds/sf1/q07.benchmark
+# description: Run query 07 from the TPC-DS benchmark
+# group: [sf1]
+
+template benchmark/tpcds/sf1-parquet/tpcds_sf1.benchmark.in
+QUERY_NUMBER=7
+QUERY_NUMBER_PADDED=07
diff --git a/benchmark/tpcds/sf1-parquet/q08.benchmark b/benchmark/tpcds/sf1-parquet/q08.benchmark
new file mode 100644
index 0000000..2d0f9cb
--- /dev/null
+++ b/benchmark/tpcds/sf1-parquet/q08.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpcds/sf1/q08.benchmark
+# description: Run query 08 from the TPC-DS benchmark
+# group: [sf1]
+
+template benchmark/tpcds/sf1-parquet/tpcds_sf1.benchmark.in
+QUERY_NUMBER=8
+QUERY_NUMBER_PADDED=08
diff --git a/benchmark/tpcds/sf1-parquet/q09.benchmark b/benchmark/tpcds/sf1-parquet/q09.benchmark
new file mode 100644
index 0000000..d115826
--- /dev/null
+++ b/benchmark/tpcds/sf1-parquet/q09.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpcds/sf1/q09.benchmark
+# description: Run query 09 from the TPC-DS benchmark
+# group: [sf1]
+
+template benchmark/tpcds/sf1-parquet/tpcds_sf1.benchmark.in
+QUERY_NUMBER=9
+QUERY_NUMBER_PADDED=09
diff --git a/benchmark/tpcds/sf1-parquet/q10.benchmark b/benchmark/tpcds/sf1-parquet/q10.benchmark
new file mode 100644
index 0000000..f4a048d
--- /dev/null
+++ b/benchmark/tpcds/sf1-parquet/q10.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpcds/sf1/q10.benchmark
+# description: Run query 10 from the TPC-DS benchmark
+# group: [sf1]
+
+template benchmark/tpcds/sf1-parquet/tpcds_sf1.benchmark.in
+QUERY_NUMBER=10
+QUERY_NUMBER_PADDED=10
diff --git a/benchmark/tpcds/sf1-parquet/q11.benchmark b/benchmark/tpcds/sf1-parquet/q11.benchmark
new file mode 100644
index 0000000..3c1f80a
--- /dev/null
+++ b/benchmark/tpcds/sf1-parquet/q11.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpcds/sf1/q11.benchmark
+# description: Run query 11 from the TPC-DS benchmark
+# group: [sf1]
+
+template benchmark/tpcds/sf1-parquet/tpcds_sf1.benchmark.in
+QUERY_NUMBER=11
+QUERY_NUMBER_PADDED=11
diff --git a/benchmark/tpcds/sf1-parquet/q12.benchmark b/benchmark/tpcds/sf1-parquet/q12.benchmark
new file mode 100644
index 0000000..4af68b7
--- /dev/null
+++ b/benchmark/tpcds/sf1-parquet/q12.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpcds/sf1/q12.benchmark
+# description: Run query 12 from the TPC-DS benchmark
+# group: [sf1]
+
+template benchmark/tpcds/sf1-parquet/tpcds_sf1.benchmark.in
+QUERY_NUMBER=12
+QUERY_NUMBER_PADDED=12
diff --git a/benchmark/tpcds/sf1-parquet/q13.benchmark b/benchmark/tpcds/sf1-parquet/q13.benchmark
new file mode 100644
index 0000000..d4066d6
--- /dev/null
+++ b/benchmark/tpcds/sf1-parquet/q13.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpcds/sf1/q13.benchmark
+# description: Run query 13 from the TPC-DS benchmark
+# group: [sf1]
+
+template benchmark/tpcds/sf1-parquet/tpcds_sf1.benchmark.in
+QUERY_NUMBER=13
+QUERY_NUMBER_PADDED=13
diff --git a/benchmark/tpcds/sf1-parquet/q14.benchmark b/benchmark/tpcds/sf1-parquet/q14.benchmark
new file mode 100644
index 0000000..af8071e
--- /dev/null
+++ b/benchmark/tpcds/sf1-parquet/q14.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpcds/sf1/q14.benchmark
+# description: Run query 14 from the TPC-DS benchmark
+# group: [sf1]
+
+template benchmark/tpcds/sf1-parquet/tpcds_sf1.benchmark.in
+QUERY_NUMBER=14
+QUERY_NUMBER_PADDED=14
diff --git a/benchmark/tpcds/sf1-parquet/q15.benchmark b/benchmark/tpcds/sf1-parquet/q15.benchmark
new file mode 100644
index 0000000..7ac477c
--- /dev/null
+++ b/benchmark/tpcds/sf1-parquet/q15.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpcds/sf1/q15.benchmark
+# description: Run query 15 from the TPC-DS benchmark
+# group: [sf1]
+
+template benchmark/tpcds/sf1-parquet/tpcds_sf1.benchmark.in
+QUERY_NUMBER=15
+QUERY_NUMBER_PADDED=15
diff --git a/benchmark/tpcds/sf1-parquet/q16.benchmark b/benchmark/tpcds/sf1-parquet/q16.benchmark
new file mode 100644
index 0000000..e89fc35
--- /dev/null
+++ b/benchmark/tpcds/sf1-parquet/q16.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpcds/sf1/q16.benchmark
+# description: Run query 16 from the TPC-DS benchmark
+# group: [sf1]
+
+template benchmark/tpcds/sf1-parquet/tpcds_sf1.benchmark.in
+QUERY_NUMBER=16
+QUERY_NUMBER_PADDED=16
diff --git a/benchmark/tpcds/sf1-parquet/q17.benchmark b/benchmark/tpcds/sf1-parquet/q17.benchmark
new file mode 100644
index 0000000..78af07a
--- /dev/null
+++ b/benchmark/tpcds/sf1-parquet/q17.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpcds/sf1/q17.benchmark
+# description: Run query 17 from the TPC-DS benchmark
+# group: [sf1]
+
+template benchmark/tpcds/sf1-parquet/tpcds_sf1.benchmark.in
+QUERY_NUMBER=17
+QUERY_NUMBER_PADDED=17
diff --git a/benchmark/tpcds/sf1-parquet/q18.benchmark b/benchmark/tpcds/sf1-parquet/q18.benchmark
new file mode 100644
index 0000000..c993e81
--- /dev/null
+++ b/benchmark/tpcds/sf1-parquet/q18.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpcds/sf1/q18.benchmark
+# description: Run query 18 from the TPC-DS benchmark
+# group: [sf1]
+
+template benchmark/tpcds/sf1-parquet/tpcds_sf1.benchmark.in
+QUERY_NUMBER=18
+QUERY_NUMBER_PADDED=18
diff --git a/benchmark/tpcds/sf1-parquet/q19.benchmark b/benchmark/tpcds/sf1-parquet/q19.benchmark
new file mode 100644
index 0000000..8781304
--- /dev/null
+++ b/benchmark/tpcds/sf1-parquet/q19.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpcds/sf1/q19.benchmark
+# description: Run query 19 from the TPC-DS benchmark
+# group: [sf1]
+
+template benchmark/tpcds/sf1-parquet/tpcds_sf1.benchmark.in
+QUERY_NUMBER=19
+QUERY_NUMBER_PADDED=19
diff --git a/benchmark/tpcds/sf1-parquet/q20.benchmark b/benchmark/tpcds/sf1-parquet/q20.benchmark
new file mode 100644
index 0000000..eabd4f6
--- /dev/null
+++ b/benchmark/tpcds/sf1-parquet/q20.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpcds/sf1/q20.benchmark
+# description: Run query 20 from the TPC-DS benchmark
+# group: [sf1]
+
+template benchmark/tpcds/sf1-parquet/tpcds_sf1.benchmark.in
+QUERY_NUMBER=20
+QUERY_NUMBER_PADDED=20
diff --git a/benchmark/tpcds/sf1-parquet/q21.benchmark b/benchmark/tpcds/sf1-parquet/q21.benchmark
new file mode 100644
index 0000000..acf7b1c
--- /dev/null
+++ b/benchmark/tpcds/sf1-parquet/q21.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpcds/sf1/q21.benchmark
+# description: Run query 21 from the TPC-DS benchmark
+# group: [sf1]
+
+template benchmark/tpcds/sf1-parquet/tpcds_sf1.benchmark.in
+QUERY_NUMBER=21
+QUERY_NUMBER_PADDED=21
diff --git a/benchmark/tpcds/sf1-parquet/q22.benchmark b/benchmark/tpcds/sf1-parquet/q22.benchmark
new file mode 100644
index 0000000..1b1b008
--- /dev/null
+++ b/benchmark/tpcds/sf1-parquet/q22.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpcds/sf1/q22.benchmark
+# description: Run query 22 from the TPC-DS benchmark
+# group: [sf1]
+
+template benchmark/tpcds/sf1-parquet/tpcds_sf1.benchmark.in
+QUERY_NUMBER=22
+QUERY_NUMBER_PADDED=22
diff --git a/benchmark/tpcds/sf1-parquet/q23.benchmark b/benchmark/tpcds/sf1-parquet/q23.benchmark
new file mode 100644
index 0000000..52f434f
--- /dev/null
+++ b/benchmark/tpcds/sf1-parquet/q23.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpcds/sf1/q23.benchmark
+# description: Run query 23 from the TPC-DS benchmark
+# group: [sf1]
+
+template benchmark/tpcds/sf1-parquet/tpcds_sf1.benchmark.in
+QUERY_NUMBER=23
+QUERY_NUMBER_PADDED=23
diff --git a/benchmark/tpcds/sf1-parquet/q24.benchmark b/benchmark/tpcds/sf1-parquet/q24.benchmark
new file mode 100644
index 0000000..17b6627
--- /dev/null
+++ b/benchmark/tpcds/sf1-parquet/q24.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpcds/sf1/q24.benchmark
+# description: Run query 24 from the TPC-DS benchmark
+# group: [sf1]
+
+template benchmark/tpcds/sf1-parquet/tpcds_sf1.benchmark.in
+QUERY_NUMBER=24
+QUERY_NUMBER_PADDED=24
diff --git a/benchmark/tpcds/sf1-parquet/q25.benchmark b/benchmark/tpcds/sf1-parquet/q25.benchmark
new file mode 100644
index 0000000..b349885
--- /dev/null
+++ b/benchmark/tpcds/sf1-parquet/q25.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpcds/sf1/q25.benchmark
+# description: Run query 25 from the TPC-DS benchmark
+# group: [sf1]
+
+template benchmark/tpcds/sf1-parquet/tpcds_sf1.benchmark.in
+QUERY_NUMBER=25
+QUERY_NUMBER_PADDED=25
diff --git a/benchmark/tpcds/sf1-parquet/q26.benchmark b/benchmark/tpcds/sf1-parquet/q26.benchmark
new file mode 100644
index 0000000..4ff955e
--- /dev/null
+++ b/benchmark/tpcds/sf1-parquet/q26.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpcds/sf1/q26.benchmark
+# description: Run query 26 from the TPC-DS benchmark
+# group: [sf1]
+
+template benchmark/tpcds/sf1-parquet/tpcds_sf1.benchmark.in
+QUERY_NUMBER=26
+QUERY_NUMBER_PADDED=26
diff --git a/benchmark/tpcds/sf1-parquet/q27.benchmark b/benchmark/tpcds/sf1-parquet/q27.benchmark
new file mode 100644
index 0000000..05a2dae
--- /dev/null
+++ b/benchmark/tpcds/sf1-parquet/q27.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpcds/sf1/q27.benchmark
+# description: Run query 27 from the TPC-DS benchmark
+# group: [sf1]
+
+template benchmark/tpcds/sf1-parquet/tpcds_sf1.benchmark.in
+QUERY_NUMBER=27
+QUERY_NUMBER_PADDED=27
diff --git a/benchmark/tpcds/sf1-parquet/q28.benchmark b/benchmark/tpcds/sf1-parquet/q28.benchmark
new file mode 100644
index 0000000..cbc909e
--- /dev/null
+++ b/benchmark/tpcds/sf1-parquet/q28.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpcds/sf1/q28.benchmark
+# description: Run query 28 from the TPC-DS benchmark
+# group: [sf1]
+
+template benchmark/tpcds/sf1-parquet/tpcds_sf1.benchmark.in
+QUERY_NUMBER=28
+QUERY_NUMBER_PADDED=28
diff --git a/benchmark/tpcds/sf1-parquet/q29.benchmark b/benchmark/tpcds/sf1-parquet/q29.benchmark
new file mode 100644
index 0000000..922a7cc
--- /dev/null
+++ b/benchmark/tpcds/sf1-parquet/q29.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpcds/sf1/q29.benchmark
+# description: Run query 29 from the TPC-DS benchmark
+# group: [sf1]
+
+template benchmark/tpcds/sf1-parquet/tpcds_sf1.benchmark.in
+QUERY_NUMBER=29
+QUERY_NUMBER_PADDED=29
diff --git a/benchmark/tpcds/sf1-parquet/q30.benchmark b/benchmark/tpcds/sf1-parquet/q30.benchmark
new file mode 100644
index 0000000..e610adc
--- /dev/null
+++ b/benchmark/tpcds/sf1-parquet/q30.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpcds/sf1/q30.benchmark
+# description: Run query 30 from the TPC-DS benchmark
+# group: [sf1]
+
+template benchmark/tpcds/sf1-parquet/tpcds_sf1.benchmark.in
+QUERY_NUMBER=30
+QUERY_NUMBER_PADDED=30
diff --git a/benchmark/tpcds/sf1-parquet/q31.benchmark b/benchmark/tpcds/sf1-parquet/q31.benchmark
new file mode 100644
index 0000000..d08908e
--- /dev/null
+++ b/benchmark/tpcds/sf1-parquet/q31.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpcds/sf1/q31.benchmark
+# description: Run query 31 from the TPC-DS benchmark
+# group: [sf1]
+
+template benchmark/tpcds/sf1-parquet/tpcds_sf1.benchmark.in
+QUERY_NUMBER=31
+QUERY_NUMBER_PADDED=31
diff --git a/benchmark/tpcds/sf1-parquet/q32.benchmark b/benchmark/tpcds/sf1-parquet/q32.benchmark
new file mode 100644
index 0000000..50d6382
--- /dev/null
+++ b/benchmark/tpcds/sf1-parquet/q32.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpcds/sf1/q32.benchmark
+# description: Run query 32 from the TPC-DS benchmark
+# group: [sf1]
+
+template benchmark/tpcds/sf1-parquet/tpcds_sf1.benchmark.in
+QUERY_NUMBER=32
+QUERY_NUMBER_PADDED=32
diff --git a/benchmark/tpcds/sf1-parquet/q33.benchmark b/benchmark/tpcds/sf1-parquet/q33.benchmark
new file mode 100644
index 0000000..b74cc23
--- /dev/null
+++ b/benchmark/tpcds/sf1-parquet/q33.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpcds/sf1/q33.benchmark
+# description: Run query 33 from the TPC-DS benchmark
+# group: [sf1]
+
+template benchmark/tpcds/sf1-parquet/tpcds_sf1.benchmark.in
+QUERY_NUMBER=33
+QUERY_NUMBER_PADDED=33
diff --git a/benchmark/tpcds/sf1-parquet/q34.benchmark b/benchmark/tpcds/sf1-parquet/q34.benchmark
new file mode 100644
index 0000000..10887ab
--- /dev/null
+++ b/benchmark/tpcds/sf1-parquet/q34.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpcds/sf1/q34.benchmark
+# description: Run query 34 from the TPC-DS benchmark
+# group: [sf1]
+
+template benchmark/tpcds/sf1-parquet/tpcds_sf1.benchmark.in
+QUERY_NUMBER=34
+QUERY_NUMBER_PADDED=34
diff --git a/benchmark/tpcds/sf1-parquet/q35.benchmark b/benchmark/tpcds/sf1-parquet/q35.benchmark
new file mode 100644
index 0000000..871d4a5
--- /dev/null
+++ b/benchmark/tpcds/sf1-parquet/q35.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpcds/sf1/q35.benchmark
+# description: Run query 35 from the TPC-DS benchmark
+# group: [sf1]
+
+template benchmark/tpcds/sf1-parquet/tpcds_sf1.benchmark.in
+QUERY_NUMBER=35
+QUERY_NUMBER_PADDED=35
diff --git a/benchmark/tpcds/sf1-parquet/q36.benchmark b/benchmark/tpcds/sf1-parquet/q36.benchmark
new file mode 100644
index 0000000..134c567
--- /dev/null
+++ b/benchmark/tpcds/sf1-parquet/q36.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpcds/sf1/q36.benchmark
+# description: Run query 36 from the TPC-DS benchmark
+# group: [sf1]
+
+template benchmark/tpcds/sf1-parquet/tpcds_sf1.benchmark.in
+QUERY_NUMBER=36
+QUERY_NUMBER_PADDED=36
diff --git a/benchmark/tpcds/sf1-parquet/q37.benchmark b/benchmark/tpcds/sf1-parquet/q37.benchmark
new file mode 100644
index 0000000..5851965
--- /dev/null
+++ b/benchmark/tpcds/sf1-parquet/q37.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpcds/sf1/q37.benchmark
+# description: Run query 37 from the TPC-DS benchmark
+# group: [sf1]
+
+template benchmark/tpcds/sf1-parquet/tpcds_sf1.benchmark.in
+QUERY_NUMBER=37
+QUERY_NUMBER_PADDED=37
diff --git a/benchmark/tpcds/sf1-parquet/q38.benchmark b/benchmark/tpcds/sf1-parquet/q38.benchmark
new file mode 100644
index 0000000..498f842
--- /dev/null
+++ b/benchmark/tpcds/sf1-parquet/q38.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpcds/sf1/q38.benchmark
+# description: Run query 38 from the TPC-DS benchmark
+# group: [sf1]
+
+template benchmark/tpcds/sf1-parquet/tpcds_sf1.benchmark.in
+QUERY_NUMBER=38
+QUERY_NUMBER_PADDED=38
diff --git a/benchmark/tpcds/sf1-parquet/q39.benchmark b/benchmark/tpcds/sf1-parquet/q39.benchmark
new file mode 100644
index 0000000..e7b00bd
--- /dev/null
+++ b/benchmark/tpcds/sf1-parquet/q39.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpcds/sf1/q39.benchmark
+# description: Run query 39 from the TPC-DS benchmark
+# group: [sf1]
+
+template benchmark/tpcds/sf1-parquet/tpcds_sf1.benchmark.in
+QUERY_NUMBER=39
+QUERY_NUMBER_PADDED=39
diff --git a/benchmark/tpcds/sf1-parquet/q40.benchmark b/benchmark/tpcds/sf1-parquet/q40.benchmark
new file mode 100644
index 0000000..e2463d6
--- /dev/null
+++ b/benchmark/tpcds/sf1-parquet/q40.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpcds/sf1/q40.benchmark
+# description: Run query 40 from the TPC-DS benchmark
+# group: [sf1]
+
+template benchmark/tpcds/sf1-parquet/tpcds_sf1.benchmark.in
+QUERY_NUMBER=40
+QUERY_NUMBER_PADDED=40
diff --git a/benchmark/tpcds/sf1-parquet/q41.benchmark b/benchmark/tpcds/sf1-parquet/q41.benchmark
new file mode 100644
index 0000000..6577235
--- /dev/null
+++ b/benchmark/tpcds/sf1-parquet/q41.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpcds/sf1/q41.benchmark
+# description: Run query 41 from the TPC-DS benchmark
+# group: [sf1]
+
+template benchmark/tpcds/sf1-parquet/tpcds_sf1.benchmark.in
+QUERY_NUMBER=41
+QUERY_NUMBER_PADDED=41
diff --git a/benchmark/tpcds/sf1-parquet/q42.benchmark b/benchmark/tpcds/sf1-parquet/q42.benchmark
new file mode 100644
index 0000000..933d648
--- /dev/null
+++ b/benchmark/tpcds/sf1-parquet/q42.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpcds/sf1/q42.benchmark
+# description: Run query 42 from the TPC-DS benchmark
+# group: [sf1]
+
+template benchmark/tpcds/sf1-parquet/tpcds_sf1.benchmark.in
+QUERY_NUMBER=42
+QUERY_NUMBER_PADDED=42
diff --git a/benchmark/tpcds/sf1-parquet/q43.benchmark b/benchmark/tpcds/sf1-parquet/q43.benchmark
new file mode 100644
index 0000000..b60ff92
--- /dev/null
+++ b/benchmark/tpcds/sf1-parquet/q43.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpcds/sf1/q43.benchmark
+# description: Run query 43 from the TPC-DS benchmark
+# group: [sf1]
+
+template benchmark/tpcds/sf1-parquet/tpcds_sf1.benchmark.in
+QUERY_NUMBER=43
+QUERY_NUMBER_PADDED=43
diff --git a/benchmark/tpcds/sf1-parquet/q44.benchmark b/benchmark/tpcds/sf1-parquet/q44.benchmark
new file mode 100644
index 0000000..8baefdb
--- /dev/null
+++ b/benchmark/tpcds/sf1-parquet/q44.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpcds/sf1/q44.benchmark
+# description: Run query 44 from the TPC-DS benchmark
+# group: [sf1]
+
+template benchmark/tpcds/sf1-parquet/tpcds_sf1.benchmark.in
+QUERY_NUMBER=44
+QUERY_NUMBER_PADDED=44
diff --git a/benchmark/tpcds/sf1-parquet/q45.benchmark b/benchmark/tpcds/sf1-parquet/q45.benchmark
new file mode 100644
index 0000000..8e6ecd5
--- /dev/null
+++ b/benchmark/tpcds/sf1-parquet/q45.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpcds/sf1/q45.benchmark
+# description: Run query 45 from the TPC-DS benchmark
+# group: [sf1]
+
+template benchmark/tpcds/sf1-parquet/tpcds_sf1.benchmark.in
+QUERY_NUMBER=45
+QUERY_NUMBER_PADDED=45
diff --git a/benchmark/tpcds/sf1-parquet/q46.benchmark b/benchmark/tpcds/sf1-parquet/q46.benchmark
new file mode 100644
index 0000000..2b266c8
--- /dev/null
+++ b/benchmark/tpcds/sf1-parquet/q46.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpcds/sf1/q46.benchmark
+# description: Run query 46 from the TPC-DS benchmark
+# group: [sf1]
+
+template benchmark/tpcds/sf1-parquet/tpcds_sf1.benchmark.in
+QUERY_NUMBER=46
+QUERY_NUMBER_PADDED=46
diff --git a/benchmark/tpcds/sf1-parquet/q47.benchmark b/benchmark/tpcds/sf1-parquet/q47.benchmark
new file mode 100644
index 0000000..684daef
--- /dev/null
+++ b/benchmark/tpcds/sf1-parquet/q47.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpcds/sf1/q47.benchmark
+# description: Run query 47 from the TPC-DS benchmark
+# group: [sf1]
+
+template benchmark/tpcds/sf1-parquet/tpcds_sf1.benchmark.in
+QUERY_NUMBER=47
+QUERY_NUMBER_PADDED=47
diff --git a/benchmark/tpcds/sf1-parquet/q48.benchmark b/benchmark/tpcds/sf1-parquet/q48.benchmark
new file mode 100644
index 0000000..5452cff
--- /dev/null
+++ b/benchmark/tpcds/sf1-parquet/q48.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpcds/sf1/q48.benchmark
+# description: Run query 48 from the TPC-DS benchmark
+# group: [sf1]
+
+template benchmark/tpcds/sf1-parquet/tpcds_sf1.benchmark.in
+QUERY_NUMBER=48
+QUERY_NUMBER_PADDED=48
diff --git a/benchmark/tpcds/sf1-parquet/q49.benchmark b/benchmark/tpcds/sf1-parquet/q49.benchmark
new file mode 100644
index 0000000..dbbc8bf
--- /dev/null
+++ b/benchmark/tpcds/sf1-parquet/q49.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpcds/sf1/q49.benchmark
+# description: Run query 49 from the TPC-DS benchmark
+# group: [sf1]
+
+template benchmark/tpcds/sf1-parquet/tpcds_sf1.benchmark.in
+QUERY_NUMBER=49
+QUERY_NUMBER_PADDED=49
diff --git a/benchmark/tpcds/sf1-parquet/q50.benchmark b/benchmark/tpcds/sf1-parquet/q50.benchmark
new file mode 100644
index 0000000..37e0ba3
--- /dev/null
+++ b/benchmark/tpcds/sf1-parquet/q50.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpcds/sf1/q50.benchmark
+# description: Run query 50 from the TPC-DS benchmark
+# group: [sf1]
+
+template benchmark/tpcds/sf1-parquet/tpcds_sf1.benchmark.in
+QUERY_NUMBER=50
+QUERY_NUMBER_PADDED=50
diff --git a/benchmark/tpcds/sf1-parquet/q51.benchmark b/benchmark/tpcds/sf1-parquet/q51.benchmark
new file mode 100644
index 0000000..7b8e96d
--- /dev/null
+++ b/benchmark/tpcds/sf1-parquet/q51.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpcds/sf1/q51.benchmark
+# description: Run query 51 from the TPC-DS benchmark
+# group: [sf1]
+
+template benchmark/tpcds/sf1-parquet/tpcds_sf1.benchmark.in
+QUERY_NUMBER=51
+QUERY_NUMBER_PADDED=51
diff --git a/benchmark/tpcds/sf1-parquet/q52.benchmark b/benchmark/tpcds/sf1-parquet/q52.benchmark
new file mode 100644
index 0000000..a4aca4c
--- /dev/null
+++ b/benchmark/tpcds/sf1-parquet/q52.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpcds/sf1/q52.benchmark
+# description: Run query 52 from the TPC-DS benchmark
+# group: [sf1]
+
+template benchmark/tpcds/sf1-parquet/tpcds_sf1.benchmark.in
+QUERY_NUMBER=52
+QUERY_NUMBER_PADDED=52
diff --git a/benchmark/tpcds/sf1-parquet/q53.benchmark b/benchmark/tpcds/sf1-parquet/q53.benchmark
new file mode 100644
index 0000000..80c1f6f
--- /dev/null
+++ b/benchmark/tpcds/sf1-parquet/q53.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpcds/sf1/q53.benchmark
+# description: Run query 53 from the TPC-DS benchmark
+# group: [sf1]
+
+template benchmark/tpcds/sf1-parquet/tpcds_sf1.benchmark.in
+QUERY_NUMBER=53
+QUERY_NUMBER_PADDED=53
diff --git a/benchmark/tpcds/sf1-parquet/q54.benchmark b/benchmark/tpcds/sf1-parquet/q54.benchmark
new file mode 100644
index 0000000..496eb9e
--- /dev/null
+++ b/benchmark/tpcds/sf1-parquet/q54.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpcds/sf1/q54.benchmark
+# description: Run query 54 from the TPC-DS benchmark
+# group: [sf1]
+
+template benchmark/tpcds/sf1-parquet/tpcds_sf1.benchmark.in
+QUERY_NUMBER=54
+QUERY_NUMBER_PADDED=54
diff --git a/benchmark/tpcds/sf1-parquet/q55.benchmark b/benchmark/tpcds/sf1-parquet/q55.benchmark
new file mode 100644
index 0000000..1117fac
--- /dev/null
+++ b/benchmark/tpcds/sf1-parquet/q55.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpcds/sf1/q55.benchmark
+# description: Run query 55 from the TPC-DS benchmark
+# group: [sf1]
+
+template benchmark/tpcds/sf1-parquet/tpcds_sf1.benchmark.in
+QUERY_NUMBER=55
+QUERY_NUMBER_PADDED=55
diff --git a/benchmark/tpcds/sf1-parquet/q56.benchmark b/benchmark/tpcds/sf1-parquet/q56.benchmark
new file mode 100644
index 0000000..43c423f
--- /dev/null
+++ b/benchmark/tpcds/sf1-parquet/q56.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpcds/sf1/q56.benchmark
+# description: Run query 56 from the TPC-DS benchmark
+# group: [sf1]
+
+template benchmark/tpcds/sf1-parquet/tpcds_sf1.benchmark.in
+QUERY_NUMBER=56
+QUERY_NUMBER_PADDED=56
diff --git a/benchmark/tpcds/sf1-parquet/q57.benchmark b/benchmark/tpcds/sf1-parquet/q57.benchmark
new file mode 100644
index 0000000..25b36ed
--- /dev/null
+++ b/benchmark/tpcds/sf1-parquet/q57.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpcds/sf1/q57.benchmark
+# description: Run query 57 from the TPC-DS benchmark
+# group: [sf1]
+
+template benchmark/tpcds/sf1-parquet/tpcds_sf1.benchmark.in
+QUERY_NUMBER=57
+QUERY_NUMBER_PADDED=57
diff --git a/benchmark/tpcds/sf1-parquet/q58.benchmark b/benchmark/tpcds/sf1-parquet/q58.benchmark
new file mode 100644
index 0000000..b60ebfc
--- /dev/null
+++ b/benchmark/tpcds/sf1-parquet/q58.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpcds/sf1/q58.benchmark
+# description: Run query 58 from the TPC-DS benchmark
+# group: [sf1]
+
+template benchmark/tpcds/sf1-parquet/tpcds_sf1.benchmark.in
+QUERY_NUMBER=58
+QUERY_NUMBER_PADDED=58
diff --git a/benchmark/tpcds/sf1-parquet/q59.benchmark b/benchmark/tpcds/sf1-parquet/q59.benchmark
new file mode 100644
index 0000000..296ac30
--- /dev/null
+++ b/benchmark/tpcds/sf1-parquet/q59.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpcds/sf1/q59.benchmark
+# description: Run query 59 from the TPC-DS benchmark
+# group: [sf1]
+
+template benchmark/tpcds/sf1-parquet/tpcds_sf1.benchmark.in
+QUERY_NUMBER=59
+QUERY_NUMBER_PADDED=59
diff --git a/benchmark/tpcds/sf1-parquet/q60.benchmark b/benchmark/tpcds/sf1-parquet/q60.benchmark
new file mode 100644
index 0000000..a383742
--- /dev/null
+++ b/benchmark/tpcds/sf1-parquet/q60.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpcds/sf1/q60.benchmark
+# description: Run query 60 from the TPC-DS benchmark
+# group: [sf1]
+
+template benchmark/tpcds/sf1-parquet/tpcds_sf1.benchmark.in
+QUERY_NUMBER=60
+QUERY_NUMBER_PADDED=60
diff --git a/benchmark/tpcds/sf1-parquet/q61.benchmark b/benchmark/tpcds/sf1-parquet/q61.benchmark
new file mode 100644
index 0000000..794f3de
--- /dev/null
+++ b/benchmark/tpcds/sf1-parquet/q61.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpcds/sf1/q61.benchmark
+# description: Run query 61 from the TPC-DS benchmark
+# group: [sf1]
+
+template benchmark/tpcds/sf1-parquet/tpcds_sf1.benchmark.in
+QUERY_NUMBER=61
+QUERY_NUMBER_PADDED=61
diff --git a/benchmark/tpcds/sf1-parquet/q62.benchmark b/benchmark/tpcds/sf1-parquet/q62.benchmark
new file mode 100644
index 0000000..01e52ac
--- /dev/null
+++ b/benchmark/tpcds/sf1-parquet/q62.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpcds/sf1/q62.benchmark
+# description: Run query 62 from the TPC-DS benchmark
+# group: [sf1]
+
+template benchmark/tpcds/sf1-parquet/tpcds_sf1.benchmark.in
+QUERY_NUMBER=62
+QUERY_NUMBER_PADDED=62
diff --git a/benchmark/tpcds/sf1-parquet/q63.benchmark b/benchmark/tpcds/sf1-parquet/q63.benchmark
new file mode 100644
index 0000000..2027bf4
--- /dev/null
+++ b/benchmark/tpcds/sf1-parquet/q63.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpcds/sf1/q63.benchmark
+# description: Run query 63 from the TPC-DS benchmark
+# group: [sf1]
+
+template benchmark/tpcds/sf1-parquet/tpcds_sf1.benchmark.in
+QUERY_NUMBER=63
+QUERY_NUMBER_PADDED=63
diff --git a/benchmark/tpcds/sf1-parquet/q64.benchmark b/benchmark/tpcds/sf1-parquet/q64.benchmark
new file mode 100644
index 0000000..0e7178e
--- /dev/null
+++ b/benchmark/tpcds/sf1-parquet/q64.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpcds/sf1/q64.benchmark
+# description: Run query 64 from the TPC-DS benchmark
+# group: [sf1]
+
+template benchmark/tpcds/sf1-parquet/tpcds_sf1.benchmark.in
+QUERY_NUMBER=64
+QUERY_NUMBER_PADDED=64
diff --git a/benchmark/tpcds/sf1-parquet/q65.benchmark b/benchmark/tpcds/sf1-parquet/q65.benchmark
new file mode 100644
index 0000000..b3f6ea2
--- /dev/null
+++ b/benchmark/tpcds/sf1-parquet/q65.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpcds/sf1/q65.benchmark
+# description: Run query 65 from the TPC-DS benchmark
+# group: [sf1]
+
+template benchmark/tpcds/sf1-parquet/tpcds_sf1.benchmark.in
+QUERY_NUMBER=65
+QUERY_NUMBER_PADDED=65
diff --git a/benchmark/tpcds/sf1-parquet/q66.benchmark b/benchmark/tpcds/sf1-parquet/q66.benchmark
new file mode 100644
index 0000000..479dfb5
--- /dev/null
+++ b/benchmark/tpcds/sf1-parquet/q66.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpcds/sf1/q66.benchmark
+# description: Run query 66 from the TPC-DS benchmark
+# group: [sf1]
+
+template benchmark/tpcds/sf1-parquet/tpcds_sf1.benchmark.in
+QUERY_NUMBER=66
+QUERY_NUMBER_PADDED=66
diff --git a/benchmark/tpcds/sf1-parquet/q67.benchmark b/benchmark/tpcds/sf1-parquet/q67.benchmark
new file mode 100644
index 0000000..3971bc0
--- /dev/null
+++ b/benchmark/tpcds/sf1-parquet/q67.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpcds/sf1/q67.benchmark
+# description: Run query 67 from the TPC-DS benchmark
+# group: [sf1]
+
+template benchmark/tpcds/sf1-parquet/tpcds_sf1.benchmark.in
+QUERY_NUMBER=67
+QUERY_NUMBER_PADDED=67
diff --git a/benchmark/tpcds/sf1-parquet/q68.benchmark b/benchmark/tpcds/sf1-parquet/q68.benchmark
new file mode 100644
index 0000000..80b5bd3
--- /dev/null
+++ b/benchmark/tpcds/sf1-parquet/q68.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpcds/sf1/q68.benchmark
+# description: Run query 68 from the TPC-DS benchmark
+# group: [sf1]
+
+template benchmark/tpcds/sf1-parquet/tpcds_sf1.benchmark.in
+QUERY_NUMBER=68
+QUERY_NUMBER_PADDED=68
diff --git a/benchmark/tpcds/sf1-parquet/q69.benchmark b/benchmark/tpcds/sf1-parquet/q69.benchmark
new file mode 100644
index 0000000..20cbc5b
--- /dev/null
+++ b/benchmark/tpcds/sf1-parquet/q69.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpcds/sf1/q69.benchmark
+# description: Run query 69 from the TPC-DS benchmark
+# group: [sf1]
+
+template benchmark/tpcds/sf1-parquet/tpcds_sf1.benchmark.in
+QUERY_NUMBER=69
+QUERY_NUMBER_PADDED=69
diff --git a/benchmark/tpcds/sf1-parquet/q70.benchmark b/benchmark/tpcds/sf1-parquet/q70.benchmark
new file mode 100644
index 0000000..64590ae
--- /dev/null
+++ b/benchmark/tpcds/sf1-parquet/q70.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpcds/sf1/q70.benchmark
+# description: Run query 70 from the TPC-DS benchmark
+# group: [sf1]
+
+template benchmark/tpcds/sf1-parquet/tpcds_sf1.benchmark.in
+QUERY_NUMBER=70
+QUERY_NUMBER_PADDED=70
diff --git a/benchmark/tpcds/sf1-parquet/q71.benchmark b/benchmark/tpcds/sf1-parquet/q71.benchmark
new file mode 100644
index 0000000..bfb61bf
--- /dev/null
+++ b/benchmark/tpcds/sf1-parquet/q71.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpcds/sf1/q71.benchmark
+# description: Run query 71 from the TPC-DS benchmark
+# group: [sf1]
+
+template benchmark/tpcds/sf1-parquet/tpcds_sf1.benchmark.in
+QUERY_NUMBER=71
+QUERY_NUMBER_PADDED=71
diff --git a/benchmark/tpcds/sf1-parquet/q72.benchmark b/benchmark/tpcds/sf1-parquet/q72.benchmark
new file mode 100644
index 0000000..8cab000
--- /dev/null
+++ b/benchmark/tpcds/sf1-parquet/q72.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpcds/sf1/q72.benchmark
+# description: Run query 72 from the TPC-DS benchmark
+# group: [sf1]
+
+template benchmark/tpcds/sf1-parquet/tpcds_sf1.benchmark.in
+QUERY_NUMBER=72
+QUERY_NUMBER_PADDED=72
diff --git a/benchmark/tpcds/sf1-parquet/q73.benchmark b/benchmark/tpcds/sf1-parquet/q73.benchmark
new file mode 100644
index 0000000..23a8ea4
--- /dev/null
+++ b/benchmark/tpcds/sf1-parquet/q73.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpcds/sf1/q73.benchmark
+# description: Run query 73 from the TPC-DS benchmark
+# group: [sf1]
+
+template benchmark/tpcds/sf1-parquet/tpcds_sf1.benchmark.in
+QUERY_NUMBER=73
+QUERY_NUMBER_PADDED=73
diff --git a/benchmark/tpcds/sf1-parquet/q74.benchmark b/benchmark/tpcds/sf1-parquet/q74.benchmark
new file mode 100644
index 0000000..883c44d
--- /dev/null
+++ b/benchmark/tpcds/sf1-parquet/q74.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpcds/sf1/q74.benchmark
+# description: Run query 74 from the TPC-DS benchmark
+# group: [sf1]
+
+template benchmark/tpcds/sf1-parquet/tpcds_sf1.benchmark.in
+QUERY_NUMBER=74
+QUERY_NUMBER_PADDED=74
diff --git a/benchmark/tpcds/sf1-parquet/q75.benchmark b/benchmark/tpcds/sf1-parquet/q75.benchmark
new file mode 100644
index 0000000..6ab4a04
--- /dev/null
+++ b/benchmark/tpcds/sf1-parquet/q75.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpcds/sf1/q75.benchmark
+# description: Run query 75 from the TPC-DS benchmark
+# group: [sf1]
+
+template benchmark/tpcds/sf1-parquet/tpcds_sf1.benchmark.in
+QUERY_NUMBER=75
+QUERY_NUMBER_PADDED=75
diff --git a/benchmark/tpcds/sf1-parquet/q76.benchmark b/benchmark/tpcds/sf1-parquet/q76.benchmark
new file mode 100644
index 0000000..ddb1f4e
--- /dev/null
+++ b/benchmark/tpcds/sf1-parquet/q76.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpcds/sf1/q76.benchmark
+# description: Run query 76 from the TPC-DS benchmark
+# group: [sf1]
+
+template benchmark/tpcds/sf1-parquet/tpcds_sf1.benchmark.in
+QUERY_NUMBER=76
+QUERY_NUMBER_PADDED=76
diff --git a/benchmark/tpcds/sf1-parquet/q77.benchmark b/benchmark/tpcds/sf1-parquet/q77.benchmark
new file mode 100644
index 0000000..f69818e
--- /dev/null
+++ b/benchmark/tpcds/sf1-parquet/q77.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpcds/sf1/q77.benchmark
+# description: Run query 77 from the TPC-DS benchmark
+# group: [sf1]
+
+template benchmark/tpcds/sf1-parquet/tpcds_sf1.benchmark.in
+QUERY_NUMBER=77
+QUERY_NUMBER_PADDED=77
diff --git a/benchmark/tpcds/sf1-parquet/q78.benchmark b/benchmark/tpcds/sf1-parquet/q78.benchmark
new file mode 100644
index 0000000..9156c52
--- /dev/null
+++ b/benchmark/tpcds/sf1-parquet/q78.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpcds/sf1/q78.benchmark
+# description: Run query 78 from the TPC-DS benchmark
+# group: [sf1]
+
+template benchmark/tpcds/sf1-parquet/tpcds_sf1.benchmark.in
+QUERY_NUMBER=78
+QUERY_NUMBER_PADDED=78
diff --git a/benchmark/tpcds/sf1-parquet/q79.benchmark b/benchmark/tpcds/sf1-parquet/q79.benchmark
new file mode 100644
index 0000000..ae54bba
--- /dev/null
+++ b/benchmark/tpcds/sf1-parquet/q79.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpcds/sf1/q79.benchmark
+# description: Run query 79 from the TPC-DS benchmark
+# group: [sf1]
+
+template benchmark/tpcds/sf1-parquet/tpcds_sf1.benchmark.in
+QUERY_NUMBER=79
+QUERY_NUMBER_PADDED=79
diff --git a/benchmark/tpcds/sf1-parquet/q80.benchmark b/benchmark/tpcds/sf1-parquet/q80.benchmark
new file mode 100644
index 0000000..282c8d9
--- /dev/null
+++ b/benchmark/tpcds/sf1-parquet/q80.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpcds/sf1/q80.benchmark
+# description: Run query 80 from the TPC-DS benchmark
+# group: [sf1]
+
+template benchmark/tpcds/sf1-parquet/tpcds_sf1.benchmark.in
+QUERY_NUMBER=80
+QUERY_NUMBER_PADDED=80
diff --git a/benchmark/tpcds/sf1-parquet/q81.benchmark b/benchmark/tpcds/sf1-parquet/q81.benchmark
new file mode 100644
index 0000000..b33e8e7
--- /dev/null
+++ b/benchmark/tpcds/sf1-parquet/q81.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpcds/sf1/q81.benchmark
+# description: Run query 81 from the TPC-DS benchmark
+# group: [sf1]
+
+template benchmark/tpcds/sf1-parquet/tpcds_sf1.benchmark.in
+QUERY_NUMBER=81
+QUERY_NUMBER_PADDED=81
diff --git a/benchmark/tpcds/sf1-parquet/q82.benchmark b/benchmark/tpcds/sf1-parquet/q82.benchmark
new file mode 100644
index 0000000..b74d67d
--- /dev/null
+++ b/benchmark/tpcds/sf1-parquet/q82.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpcds/sf1/q82.benchmark
+# description: Run query 82 from the TPC-DS benchmark
+# group: [sf1]
+
+template benchmark/tpcds/sf1-parquet/tpcds_sf1.benchmark.in
+QUERY_NUMBER=82
+QUERY_NUMBER_PADDED=82
diff --git a/benchmark/tpcds/sf1-parquet/q83.benchmark b/benchmark/tpcds/sf1-parquet/q83.benchmark
new file mode 100644
index 0000000..79a4221
--- /dev/null
+++ b/benchmark/tpcds/sf1-parquet/q83.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpcds/sf1/q83.benchmark
+# description: Run query 83 from the TPC-DS benchmark
+# group: [sf1]
+
+template benchmark/tpcds/sf1-parquet/tpcds_sf1.benchmark.in
+QUERY_NUMBER=83
+QUERY_NUMBER_PADDED=83
diff --git a/benchmark/tpcds/sf1-parquet/q84.benchmark b/benchmark/tpcds/sf1-parquet/q84.benchmark
new file mode 100644
index 0000000..61b4ba0
--- /dev/null
+++ b/benchmark/tpcds/sf1-parquet/q84.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpcds/sf1/q84.benchmark
+# description: Run query 84 from the TPC-DS benchmark
+# group: [sf1]
+
+template benchmark/tpcds/sf1-parquet/tpcds_sf1.benchmark.in
+QUERY_NUMBER=84
+QUERY_NUMBER_PADDED=84
diff --git a/benchmark/tpcds/sf1-parquet/q85.benchmark b/benchmark/tpcds/sf1-parquet/q85.benchmark
new file mode 100644
index 0000000..19458e3
--- /dev/null
+++ b/benchmark/tpcds/sf1-parquet/q85.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpcds/sf1/q85.benchmark
+# description: Run query 85 from the TPC-DS benchmark
+# group: [sf1]
+
+template benchmark/tpcds/sf1-parquet/tpcds_sf1.benchmark.in
+QUERY_NUMBER=85
+QUERY_NUMBER_PADDED=85
diff --git a/benchmark/tpcds/sf1-parquet/q86.benchmark b/benchmark/tpcds/sf1-parquet/q86.benchmark
new file mode 100644
index 0000000..6b687fc
--- /dev/null
+++ b/benchmark/tpcds/sf1-parquet/q86.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpcds/sf1/q86.benchmark
+# description: Run query 86 from the TPC-DS benchmark
+# group: [sf1]
+
+template benchmark/tpcds/sf1-parquet/tpcds_sf1.benchmark.in
+QUERY_NUMBER=86
+QUERY_NUMBER_PADDED=86
diff --git a/benchmark/tpcds/sf1-parquet/q87.benchmark b/benchmark/tpcds/sf1-parquet/q87.benchmark
new file mode 100644
index 0000000..d1c4a02
--- /dev/null
+++ b/benchmark/tpcds/sf1-parquet/q87.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpcds/sf1/q87.benchmark
+# description: Run query 87 from the TPC-DS benchmark
+# group: [sf1]
+
+template benchmark/tpcds/sf1-parquet/tpcds_sf1.benchmark.in
+QUERY_NUMBER=87
+QUERY_NUMBER_PADDED=87
diff --git a/benchmark/tpcds/sf1-parquet/q88.benchmark b/benchmark/tpcds/sf1-parquet/q88.benchmark
new file mode 100644
index 0000000..758597f
--- /dev/null
+++ b/benchmark/tpcds/sf1-parquet/q88.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpcds/sf1/q88.benchmark
+# description: Run query 88 from the TPC-DS benchmark
+# group: [sf1]
+
+template benchmark/tpcds/sf1-parquet/tpcds_sf1.benchmark.in
+QUERY_NUMBER=88
+QUERY_NUMBER_PADDED=88
diff --git a/benchmark/tpcds/sf1-parquet/q89.benchmark b/benchmark/tpcds/sf1-parquet/q89.benchmark
new file mode 100644
index 0000000..626a3f0
--- /dev/null
+++ b/benchmark/tpcds/sf1-parquet/q89.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpcds/sf1/q89.benchmark
+# description: Run query 89 from the TPC-DS benchmark
+# group: [sf1]
+
+template benchmark/tpcds/sf1-parquet/tpcds_sf1.benchmark.in
+QUERY_NUMBER=89
+QUERY_NUMBER_PADDED=89
diff --git a/benchmark/tpcds/sf1-parquet/q90.benchmark b/benchmark/tpcds/sf1-parquet/q90.benchmark
new file mode 100644
index 0000000..f88d96d
--- /dev/null
+++ b/benchmark/tpcds/sf1-parquet/q90.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpcds/sf1/q90.benchmark
+# description: Run query 90 from the TPC-DS benchmark
+# group: [sf1]
+
+template benchmark/tpcds/sf1-parquet/tpcds_sf1.benchmark.in
+QUERY_NUMBER=90
+QUERY_NUMBER_PADDED=90
diff --git a/benchmark/tpcds/sf1-parquet/q91.benchmark b/benchmark/tpcds/sf1-parquet/q91.benchmark
new file mode 100644
index 0000000..85d5ef8
--- /dev/null
+++ b/benchmark/tpcds/sf1-parquet/q91.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpcds/sf1/q91.benchmark
+# description: Run query 91 from the TPC-DS benchmark
+# group: [sf1]
+
+template benchmark/tpcds/sf1-parquet/tpcds_sf1.benchmark.in
+QUERY_NUMBER=91
+QUERY_NUMBER_PADDED=91
diff --git a/benchmark/tpcds/sf1-parquet/q92.benchmark b/benchmark/tpcds/sf1-parquet/q92.benchmark
new file mode 100644
index 0000000..880af0b
--- /dev/null
+++ b/benchmark/tpcds/sf1-parquet/q92.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpcds/sf1/q92.benchmark
+# description: Run query 92 from the TPC-DS benchmark
+# group: [sf1]
+
+template benchmark/tpcds/sf1-parquet/tpcds_sf1.benchmark.in
+QUERY_NUMBER=92
+QUERY_NUMBER_PADDED=92
diff --git a/benchmark/tpcds/sf1-parquet/q93.benchmark b/benchmark/tpcds/sf1-parquet/q93.benchmark
new file mode 100644
index 0000000..1b5c5f6
--- /dev/null
+++ b/benchmark/tpcds/sf1-parquet/q93.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpcds/sf1/q93.benchmark
+# description: Run query 93 from the TPC-DS benchmark
+# group: [sf1]
+
+template benchmark/tpcds/sf1-parquet/tpcds_sf1.benchmark.in
+QUERY_NUMBER=93
+QUERY_NUMBER_PADDED=93
diff --git a/benchmark/tpcds/sf1-parquet/q94.benchmark b/benchmark/tpcds/sf1-parquet/q94.benchmark
new file mode 100644
index 0000000..46886f2
--- /dev/null
+++ b/benchmark/tpcds/sf1-parquet/q94.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpcds/sf1/q94.benchmark
+# description: Run query 94 from the TPC-DS benchmark
+# group: [sf1]
+
+template benchmark/tpcds/sf1-parquet/tpcds_sf1.benchmark.in
+QUERY_NUMBER=94
+QUERY_NUMBER_PADDED=94
diff --git a/benchmark/tpcds/sf1-parquet/q95.benchmark b/benchmark/tpcds/sf1-parquet/q95.benchmark
new file mode 100644
index 0000000..35b283e
--- /dev/null
+++ b/benchmark/tpcds/sf1-parquet/q95.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpcds/sf1/q95.benchmark
+# description: Run query 95 from the TPC-DS benchmark
+# group: [sf1]
+
+template benchmark/tpcds/sf1-parquet/tpcds_sf1.benchmark.in
+QUERY_NUMBER=95
+QUERY_NUMBER_PADDED=95
diff --git a/benchmark/tpcds/sf1-parquet/q96.benchmark b/benchmark/tpcds/sf1-parquet/q96.benchmark
new file mode 100644
index 0000000..2955000
--- /dev/null
+++ b/benchmark/tpcds/sf1-parquet/q96.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpcds/sf1/q96.benchmark
+# description: Run query 96 from the TPC-DS benchmark
+# group: [sf1]
+
+template benchmark/tpcds/sf1-parquet/tpcds_sf1.benchmark.in
+QUERY_NUMBER=96
+QUERY_NUMBER_PADDED=96
diff --git a/benchmark/tpcds/sf1-parquet/q97.benchmark b/benchmark/tpcds/sf1-parquet/q97.benchmark
new file mode 100644
index 0000000..1d2aa64
--- /dev/null
+++ b/benchmark/tpcds/sf1-parquet/q97.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpcds/sf1/q97.benchmark
+# description: Run query 97 from the TPC-DS benchmark
+# group: [sf1]
+
+template benchmark/tpcds/sf1-parquet/tpcds_sf1.benchmark.in
+QUERY_NUMBER=97
+QUERY_NUMBER_PADDED=97
diff --git a/benchmark/tpcds/sf1-parquet/q98.benchmark b/benchmark/tpcds/sf1-parquet/q98.benchmark
new file mode 100644
index 0000000..c36e0e6
--- /dev/null
+++ b/benchmark/tpcds/sf1-parquet/q98.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpcds/sf1/q98.benchmark
+# description: Run query 98 from the TPC-DS benchmark
+# group: [sf1]
+
+template benchmark/tpcds/sf1-parquet/tpcds_sf1.benchmark.in
+QUERY_NUMBER=98
+QUERY_NUMBER_PADDED=98
diff --git a/benchmark/tpcds/sf1-parquet/q99.benchmark b/benchmark/tpcds/sf1-parquet/q99.benchmark
new file mode 100644
index 0000000..2e6d066
--- /dev/null
+++ b/benchmark/tpcds/sf1-parquet/q99.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpcds/sf1/q99.benchmark
+# description: Run query 99 from the TPC-DS benchmark
+# group: [sf1]
+
+template benchmark/tpcds/sf1-parquet/tpcds_sf1.benchmark.in
+QUERY_NUMBER=99
+QUERY_NUMBER_PADDED=99
diff --git a/benchmark/tpcds/sf1-parquet/tpcds_sf1.benchmark.in b/benchmark/tpcds/sf1-parquet/tpcds_sf1.benchmark.in
new file mode 100644
index 0000000..37dbc34
--- /dev/null
+++ b/benchmark/tpcds/sf1-parquet/tpcds_sf1.benchmark.in
@@ -0,0 +1,17 @@
+# name: ${FILE_PATH}
+# description: ${DESCRIPTION}
+# group: [tpcds-sf1]
+
+name DSQ${QUERY_NUMBER_PADDED}
+group tpcds
+subgroup sf1
+
+require delta
+
+require parquet
+
+load benchmark/tpcds/sf1-parquet/load.sql
+
+run duckdb/extension/tpcds/dsdgen/queries/${QUERY_NUMBER_PADDED}.sql
+
+result duckdb/extension/tpcds/dsdgen/answers/sf1/${QUERY_NUMBER_PADDED}.csv
diff --git a/benchmark/tpch/sf1-delta-remote/load.sql b/benchmark/tpch/sf1-delta-remote/load.sql
new file mode 100644
index 0000000..a095ffd
--- /dev/null
+++ b/benchmark/tpch/sf1-delta-remote/load.sql
@@ -0,0 +1,8 @@
+create view customer as from delta_scan('s3://test-bucket-ceiveran/delta_testing/tpch_sf0_01' || '/customer/delta_lake');
+create view lineitem as from delta_scan('s3://test-bucket-ceiveran/delta_testing/tpch_sf0_01' ||  '/lineitem/delta_lake');
+create view nation as from delta_scan('s3://test-bucket-ceiveran/delta_testing/tpch_sf0_01' || '/nation/delta_lake');
+create view orders as from delta_scan('s3://test-bucket-ceiveran/delta_testing/tpch_sf0_01' || '/orders/delta_lake');
+create view part as from delta_scan('s3://test-bucket-ceiveran/delta_testing/tpch_sf0_01' || '/part/delta_lake');
+create view partsupp as from delta_scan('s3://test-bucket-ceiveran/delta_testing/tpch_sf0_01' || '/partsupp/delta_lake');
+create view region as from delta_scan('s3://test-bucket-ceiveran/delta_testing/tpch_sf0_01' || '/region/delta_lake');
+create view supplier as from delta_scan('s3://test-bucket-ceiveran/delta_testing/tpch_sf0_01' || '/supplier/delta_lake');
\ No newline at end of file
diff --git a/benchmark/tpch/sf1-delta-remote/q01.benchmark b/benchmark/tpch/sf1-delta-remote/q01.benchmark
new file mode 100644
index 0000000..df280b4
--- /dev/null
+++ b/benchmark/tpch/sf1-delta-remote/q01.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpch/sf1-delta/q01.benchmark
+# description: Run query 01 from the TPC-H benchmark
+# group: [sf1-parquet]
+
+template benchmark/tpch/sf1-delta-remote/tpch_sf1_delta.benchmark.in
+QUERY_NUMBER=1
+QUERY_NUMBER_PADDED=01
diff --git a/benchmark/tpch/sf1-delta-remote/q02.benchmark b/benchmark/tpch/sf1-delta-remote/q02.benchmark
new file mode 100644
index 0000000..94b9891
--- /dev/null
+++ b/benchmark/tpch/sf1-delta-remote/q02.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpch/sf1-delta/q02.benchmark
+# description: Run query 02 from the TPC-H benchmark
+# group: [sf1-parquet]
+
+template benchmark/tpch/sf1-delta-remote/tpch_sf1_delta.benchmark.in
+QUERY_NUMBER=2
+QUERY_NUMBER_PADDED=02
diff --git a/benchmark/tpch/sf1-delta-remote/q03.benchmark b/benchmark/tpch/sf1-delta-remote/q03.benchmark
new file mode 100644
index 0000000..6a63a9a
--- /dev/null
+++ b/benchmark/tpch/sf1-delta-remote/q03.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpch/sf1-delta/q03.benchmark
+# description: Run query 03 from the TPC-H benchmark
+# group: [sf1-parquet]
+
+template benchmark/tpch/sf1-delta-remote/tpch_sf1_delta.benchmark.in
+QUERY_NUMBER=3
+QUERY_NUMBER_PADDED=03
diff --git a/benchmark/tpch/sf1-delta-remote/q04.benchmark b/benchmark/tpch/sf1-delta-remote/q04.benchmark
new file mode 100644
index 0000000..7049851
--- /dev/null
+++ b/benchmark/tpch/sf1-delta-remote/q04.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpch/sf1-delta/q04.benchmark
+# description: Run query 04 from the TPC-H benchmark
+# group: [sf1-parquet]
+
+template benchmark/tpch/sf1-delta-remote/tpch_sf1_delta.benchmark.in
+QUERY_NUMBER=4
+QUERY_NUMBER_PADDED=04
diff --git a/benchmark/tpch/sf1-delta-remote/q05.benchmark b/benchmark/tpch/sf1-delta-remote/q05.benchmark
new file mode 100644
index 0000000..4f93fb7
--- /dev/null
+++ b/benchmark/tpch/sf1-delta-remote/q05.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpch/sf1-delta/q05.benchmark
+# description: Run query 05 from the TPC-H benchmark
+# group: [sf1-parquet]
+
+template benchmark/tpch/sf1-delta-remote/tpch_sf1_delta.benchmark.in
+QUERY_NUMBER=5
+QUERY_NUMBER_PADDED=05
diff --git a/benchmark/tpch/sf1-delta-remote/q06.benchmark b/benchmark/tpch/sf1-delta-remote/q06.benchmark
new file mode 100644
index 0000000..78854bb
--- /dev/null
+++ b/benchmark/tpch/sf1-delta-remote/q06.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpch/sf1-delta/q06.benchmark
+# description: Run query 06 from the TPC-H benchmark
+# group: [sf1-parquet]
+
+template benchmark/tpch/sf1-delta-remote/tpch_sf1_delta.benchmark.in
+QUERY_NUMBER=6
+QUERY_NUMBER_PADDED=06
diff --git a/benchmark/tpch/sf1-delta-remote/q07.benchmark b/benchmark/tpch/sf1-delta-remote/q07.benchmark
new file mode 100644
index 0000000..9bf2a7b
--- /dev/null
+++ b/benchmark/tpch/sf1-delta-remote/q07.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpch/sf1-delta/q07.benchmark
+# description: Run query 07 from the TPC-H benchmark
+# group: [sf1-parquet]
+
+template benchmark/tpch/sf1-delta-remote/tpch_sf1_delta.benchmark.in
+QUERY_NUMBER=7
+QUERY_NUMBER_PADDED=07
diff --git a/benchmark/tpch/sf1-delta-remote/q08.benchmark b/benchmark/tpch/sf1-delta-remote/q08.benchmark
new file mode 100644
index 0000000..a7f1a80
--- /dev/null
+++ b/benchmark/tpch/sf1-delta-remote/q08.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpch/sf1-delta/q08.benchmark
+# description: Run query 08 from the TPC-H benchmark
+# group: [sf1-parquet]
+
+template benchmark/tpch/sf1-delta-remote/tpch_sf1_delta.benchmark.in
+QUERY_NUMBER=8
+QUERY_NUMBER_PADDED=08
diff --git a/benchmark/tpch/sf1-delta-remote/q09.benchmark b/benchmark/tpch/sf1-delta-remote/q09.benchmark
new file mode 100644
index 0000000..272167a
--- /dev/null
+++ b/benchmark/tpch/sf1-delta-remote/q09.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpch/sf1-delta/q09.benchmark
+# description: Run query 09 from the TPC-H benchmark
+# group: [sf1-parquet]
+
+template benchmark/tpch/sf1-delta-remote/tpch_sf1_delta.benchmark.in
+QUERY_NUMBER=9
+QUERY_NUMBER_PADDED=09
diff --git a/benchmark/tpch/sf1-delta-remote/q10.benchmark b/benchmark/tpch/sf1-delta-remote/q10.benchmark
new file mode 100644
index 0000000..14822e3
--- /dev/null
+++ b/benchmark/tpch/sf1-delta-remote/q10.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpch/sf1-delta/q10.benchmark
+# description: Run query 10 from the TPC-H benchmark
+# group: [sf1-parquet]
+
+template benchmark/tpch/sf1-delta-remote/tpch_sf1_delta.benchmark.in
+QUERY_NUMBER=10
+QUERY_NUMBER_PADDED=10
diff --git a/benchmark/tpch/sf1-delta-remote/q11.benchmark b/benchmark/tpch/sf1-delta-remote/q11.benchmark
new file mode 100644
index 0000000..df5276e
--- /dev/null
+++ b/benchmark/tpch/sf1-delta-remote/q11.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpch/sf1-delta/q11.benchmark
+# description: Run query 11 from the TPC-H benchmark
+# group: [sf1-parquet]
+
+template benchmark/tpch/sf1-delta-remote/tpch_sf1_delta.benchmark.in
+QUERY_NUMBER=11
+QUERY_NUMBER_PADDED=11
diff --git a/benchmark/tpch/sf1-delta-remote/q12.benchmark b/benchmark/tpch/sf1-delta-remote/q12.benchmark
new file mode 100644
index 0000000..fee7641
--- /dev/null
+++ b/benchmark/tpch/sf1-delta-remote/q12.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpch/sf1-delta/q12.benchmark
+# description: Run query 12 from the TPC-H benchmark
+# group: [sf1-parquet]
+
+template benchmark/tpch/sf1-delta-remote/tpch_sf1_delta.benchmark.in
+QUERY_NUMBER=12
+QUERY_NUMBER_PADDED=12
diff --git a/benchmark/tpch/sf1-delta-remote/q13.benchmark b/benchmark/tpch/sf1-delta-remote/q13.benchmark
new file mode 100644
index 0000000..2050dd4
--- /dev/null
+++ b/benchmark/tpch/sf1-delta-remote/q13.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpch/sf1-delta/q13.benchmark
+# description: Run query 13 from the TPC-H benchmark
+# group: [sf1-parquet]
+
+template benchmark/tpch/sf1-delta-remote/tpch_sf1_delta.benchmark.in
+QUERY_NUMBER=13
+QUERY_NUMBER_PADDED=13
diff --git a/benchmark/tpch/sf1-delta-remote/q14.benchmark b/benchmark/tpch/sf1-delta-remote/q14.benchmark
new file mode 100644
index 0000000..e458993
--- /dev/null
+++ b/benchmark/tpch/sf1-delta-remote/q14.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpch/sf1-delta/q14.benchmark
+# description: Run query 14 from the TPC-H benchmark
+# group: [sf1-parquet]
+
+template benchmark/tpch/sf1-delta-remote/tpch_sf1_delta.benchmark.in
+QUERY_NUMBER=14
+QUERY_NUMBER_PADDED=14
diff --git a/benchmark/tpch/sf1-delta-remote/q15.benchmark b/benchmark/tpch/sf1-delta-remote/q15.benchmark
new file mode 100644
index 0000000..fcd0523
--- /dev/null
+++ b/benchmark/tpch/sf1-delta-remote/q15.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpch/sf1-delta/q15.benchmark
+# description: Run query 15 from the TPC-H benchmark
+# group: [sf1-parquet]
+
+template benchmark/tpch/sf1-delta-remote/tpch_sf1_delta.benchmark.in
+QUERY_NUMBER=15
+QUERY_NUMBER_PADDED=15
diff --git a/benchmark/tpch/sf1-delta-remote/q16.benchmark b/benchmark/tpch/sf1-delta-remote/q16.benchmark
new file mode 100644
index 0000000..a7524ad
--- /dev/null
+++ b/benchmark/tpch/sf1-delta-remote/q16.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpch/sf1-delta/q16.benchmark
+# description: Run query 16 from the TPC-H benchmark
+# group: [sf1-parquet]
+
+template benchmark/tpch/sf1-delta-remote/tpch_sf1_delta.benchmark.in
+QUERY_NUMBER=16
+QUERY_NUMBER_PADDED=16
diff --git a/benchmark/tpch/sf1-delta-remote/q17.benchmark b/benchmark/tpch/sf1-delta-remote/q17.benchmark
new file mode 100644
index 0000000..05c9584
--- /dev/null
+++ b/benchmark/tpch/sf1-delta-remote/q17.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpch/sf1-delta/q17.benchmark
+# description: Run query 17 from the TPC-H benchmark
+# group: [sf1-parquet]
+
+template benchmark/tpch/sf1-delta-remote/tpch_sf1_delta.benchmark.in
+QUERY_NUMBER=17
+QUERY_NUMBER_PADDED=17
diff --git a/benchmark/tpch/sf1-delta-remote/q18.benchmark b/benchmark/tpch/sf1-delta-remote/q18.benchmark
new file mode 100644
index 0000000..a645646
--- /dev/null
+++ b/benchmark/tpch/sf1-delta-remote/q18.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpch/sf1-delta/q18.benchmark
+# description: Run query 18 from the TPC-H benchmark
+# group: [sf1-parquet]
+
+template benchmark/tpch/sf1-delta-remote/tpch_sf1_delta.benchmark.in
+QUERY_NUMBER=18
+QUERY_NUMBER_PADDED=18
diff --git a/benchmark/tpch/sf1-delta-remote/q19.benchmark b/benchmark/tpch/sf1-delta-remote/q19.benchmark
new file mode 100644
index 0000000..3bc9baf
--- /dev/null
+++ b/benchmark/tpch/sf1-delta-remote/q19.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpch/sf1-delta/q19.benchmark
+# description: Run query 19 from the TPC-H benchmark
+# group: [sf1-parquet]
+
+template benchmark/tpch/sf1-delta-remote/tpch_sf1_delta.benchmark.in
+QUERY_NUMBER=19
+QUERY_NUMBER_PADDED=19
diff --git a/benchmark/tpch/sf1-delta-remote/q20.benchmark b/benchmark/tpch/sf1-delta-remote/q20.benchmark
new file mode 100644
index 0000000..766e1d2
--- /dev/null
+++ b/benchmark/tpch/sf1-delta-remote/q20.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpch/sf1-delta/q20.benchmark
+# description: Run query 20 from the TPC-H benchmark
+# group: [sf1-parquet]
+
+template benchmark/tpch/sf1-delta-remote/tpch_sf1_delta.benchmark.in
+QUERY_NUMBER=20
+QUERY_NUMBER_PADDED=20
diff --git a/benchmark/tpch/sf1-delta-remote/q21.benchmark b/benchmark/tpch/sf1-delta-remote/q21.benchmark
new file mode 100644
index 0000000..6d75456
--- /dev/null
+++ b/benchmark/tpch/sf1-delta-remote/q21.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpch/sf1-delta/q21.benchmark
+# description: Run query 21 from the TPC-H benchmark
+# group: [sf1-parquet]
+
+template benchmark/tpch/sf1-delta-remote/tpch_sf1_delta.benchmark.in
+QUERY_NUMBER=21
+QUERY_NUMBER_PADDED=21
diff --git a/benchmark/tpch/sf1-delta-remote/q22.benchmark b/benchmark/tpch/sf1-delta-remote/q22.benchmark
new file mode 100644
index 0000000..c808aad
--- /dev/null
+++ b/benchmark/tpch/sf1-delta-remote/q22.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpch/sf1-delta/q22.benchmark
+# description: Run query 22 from the TPC-H benchmark
+# group: [sf1-parquet]
+
+template benchmark/tpch/sf1-delta-remote/tpch_sf1_delta.benchmark.in
+QUERY_NUMBER=22
+QUERY_NUMBER_PADDED=22
diff --git a/benchmark/tpch/sf1-delta-remote/tpch_sf1_delta.benchmark.in b/benchmark/tpch/sf1-delta-remote/tpch_sf1_delta.benchmark.in
new file mode 100644
index 0000000..c94ee1c
--- /dev/null
+++ b/benchmark/tpch/sf1-delta-remote/tpch_sf1_delta.benchmark.in
@@ -0,0 +1,19 @@
+# name: ${FILE_PATH}
+# description: ${DESCRIPTION}
+# group: [sf1]
+
+name Q${QUERY_NUMBER_PADDED}
+group tpch
+subgroup sf1
+
+require delta
+
+require parquet
+
+require httpfs
+
+load benchmark/tpch/sf1-delta-remote/load.sql
+
+run duckdb/extension/tpch/dbgen/queries/q${QUERY_NUMBER_PADDED}.sql
+
+result duckdb/extension/tpch/dbgen/answers/sf0.01/q${QUERY_NUMBER_PADDED}.csv
\ No newline at end of file
diff --git a/benchmark/tpch/sf1-delta/load.sql b/benchmark/tpch/sf1-delta/load.sql
new file mode 100644
index 0000000..5d97a08
--- /dev/null
+++ b/benchmark/tpch/sf1-delta/load.sql
@@ -0,0 +1,8 @@
+create view customer as from delta_scan('./data/generated/tpch_sf1/customer/delta_lake');
+create view lineitem as from delta_scan('./data/generated/tpch_sf1/lineitem/delta_lake');
+create view nation as from delta_scan('./data/generated/tpch_sf1/nation/delta_lake');
+create view orders as from delta_scan('./data/generated/tpch_sf1/orders/delta_lake');
+create view part as from delta_scan('./data/generated/tpch_sf1/part/delta_lake');
+create view partsupp as from delta_scan('./data/generated/tpch_sf1/partsupp/delta_lake');
+create view region as from delta_scan('./data/generated/tpch_sf1/region/delta_lake');
+create view supplier as from delta_scan('./data/generated/tpch_sf1/supplier/delta_lake');
\ No newline at end of file
diff --git a/benchmark/tpch/sf1-delta/q01.benchmark b/benchmark/tpch/sf1-delta/q01.benchmark
new file mode 100644
index 0000000..c002271
--- /dev/null
+++ b/benchmark/tpch/sf1-delta/q01.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpch/sf1-delta/q01.benchmark
+# description: Run query 01 from the TPC-H benchmark
+# group: [sf1-parquet]
+
+template benchmark/tpch/sf1-delta/tpch_sf1_delta.benchmark.in
+QUERY_NUMBER=1
+QUERY_NUMBER_PADDED=01
diff --git a/benchmark/tpch/sf1-delta/q02.benchmark b/benchmark/tpch/sf1-delta/q02.benchmark
new file mode 100644
index 0000000..1b29623
--- /dev/null
+++ b/benchmark/tpch/sf1-delta/q02.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpch/sf1-delta/q02.benchmark
+# description: Run query 02 from the TPC-H benchmark
+# group: [sf1-parquet]
+
+template benchmark/tpch/sf1-delta/tpch_sf1_delta.benchmark.in
+QUERY_NUMBER=2
+QUERY_NUMBER_PADDED=02
diff --git a/benchmark/tpch/sf1-delta/q03.benchmark b/benchmark/tpch/sf1-delta/q03.benchmark
new file mode 100644
index 0000000..105db6c
--- /dev/null
+++ b/benchmark/tpch/sf1-delta/q03.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpch/sf1-delta/q03.benchmark
+# description: Run query 03 from the TPC-H benchmark
+# group: [sf1-parquet]
+
+template benchmark/tpch/sf1-delta/tpch_sf1_delta.benchmark.in
+QUERY_NUMBER=3
+QUERY_NUMBER_PADDED=03
diff --git a/benchmark/tpch/sf1-delta/q04.benchmark b/benchmark/tpch/sf1-delta/q04.benchmark
new file mode 100644
index 0000000..e6adaa9
--- /dev/null
+++ b/benchmark/tpch/sf1-delta/q04.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpch/sf1-delta/q04.benchmark
+# description: Run query 04 from the TPC-H benchmark
+# group: [sf1-parquet]
+
+template benchmark/tpch/sf1-delta/tpch_sf1_delta.benchmark.in
+QUERY_NUMBER=4
+QUERY_NUMBER_PADDED=04
diff --git a/benchmark/tpch/sf1-delta/q05.benchmark b/benchmark/tpch/sf1-delta/q05.benchmark
new file mode 100644
index 0000000..c3d58fa
--- /dev/null
+++ b/benchmark/tpch/sf1-delta/q05.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpch/sf1-delta/q05.benchmark
+# description: Run query 05 from the TPC-H benchmark
+# group: [sf1-parquet]
+
+template benchmark/tpch/sf1-delta/tpch_sf1_delta.benchmark.in
+QUERY_NUMBER=5
+QUERY_NUMBER_PADDED=05
diff --git a/benchmark/tpch/sf1-delta/q06.benchmark b/benchmark/tpch/sf1-delta/q06.benchmark
new file mode 100644
index 0000000..f0f4edf
--- /dev/null
+++ b/benchmark/tpch/sf1-delta/q06.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpch/sf1-delta/q06.benchmark
+# description: Run query 06 from the TPC-H benchmark
+# group: [sf1-parquet]
+
+template benchmark/tpch/sf1-delta/tpch_sf1_delta.benchmark.in
+QUERY_NUMBER=6
+QUERY_NUMBER_PADDED=06
diff --git a/benchmark/tpch/sf1-delta/q07.benchmark b/benchmark/tpch/sf1-delta/q07.benchmark
new file mode 100644
index 0000000..ef73acb
--- /dev/null
+++ b/benchmark/tpch/sf1-delta/q07.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpch/sf1-delta/q07.benchmark
+# description: Run query 07 from the TPC-H benchmark
+# group: [sf1-parquet]
+
+template benchmark/tpch/sf1-delta/tpch_sf1_delta.benchmark.in
+QUERY_NUMBER=7
+QUERY_NUMBER_PADDED=07
diff --git a/benchmark/tpch/sf1-delta/q08.benchmark b/benchmark/tpch/sf1-delta/q08.benchmark
new file mode 100644
index 0000000..ea07628
--- /dev/null
+++ b/benchmark/tpch/sf1-delta/q08.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpch/sf1-delta/q08.benchmark
+# description: Run query 08 from the TPC-H benchmark
+# group: [sf1-parquet]
+
+template benchmark/tpch/sf1-delta/tpch_sf1_delta.benchmark.in
+QUERY_NUMBER=8
+QUERY_NUMBER_PADDED=08
diff --git a/benchmark/tpch/sf1-delta/q09.benchmark b/benchmark/tpch/sf1-delta/q09.benchmark
new file mode 100644
index 0000000..d56ce49
--- /dev/null
+++ b/benchmark/tpch/sf1-delta/q09.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpch/sf1-delta/q09.benchmark
+# description: Run query 09 from the TPC-H benchmark
+# group: [sf1-parquet]
+
+template benchmark/tpch/sf1-delta/tpch_sf1_delta.benchmark.in
+QUERY_NUMBER=9
+QUERY_NUMBER_PADDED=09
diff --git a/benchmark/tpch/sf1-delta/q10.benchmark b/benchmark/tpch/sf1-delta/q10.benchmark
new file mode 100644
index 0000000..c288da4
--- /dev/null
+++ b/benchmark/tpch/sf1-delta/q10.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpch/sf1-delta/q10.benchmark
+# description: Run query 10 from the TPC-H benchmark
+# group: [sf1-parquet]
+
+template benchmark/tpch/sf1-delta/tpch_sf1_delta.benchmark.in
+QUERY_NUMBER=10
+QUERY_NUMBER_PADDED=10
diff --git a/benchmark/tpch/sf1-delta/q11.benchmark b/benchmark/tpch/sf1-delta/q11.benchmark
new file mode 100644
index 0000000..67840f1
--- /dev/null
+++ b/benchmark/tpch/sf1-delta/q11.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpch/sf1-delta/q11.benchmark
+# description: Run query 11 from the TPC-H benchmark
+# group: [sf1-parquet]
+
+template benchmark/tpch/sf1-delta/tpch_sf1_delta.benchmark.in
+QUERY_NUMBER=11
+QUERY_NUMBER_PADDED=11
diff --git a/benchmark/tpch/sf1-delta/q12.benchmark b/benchmark/tpch/sf1-delta/q12.benchmark
new file mode 100644
index 0000000..875acad
--- /dev/null
+++ b/benchmark/tpch/sf1-delta/q12.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpch/sf1-delta/q12.benchmark
+# description: Run query 12 from the TPC-H benchmark
+# group: [sf1-parquet]
+
+template benchmark/tpch/sf1-delta/tpch_sf1_delta.benchmark.in
+QUERY_NUMBER=12
+QUERY_NUMBER_PADDED=12
diff --git a/benchmark/tpch/sf1-delta/q13.benchmark b/benchmark/tpch/sf1-delta/q13.benchmark
new file mode 100644
index 0000000..cb28472
--- /dev/null
+++ b/benchmark/tpch/sf1-delta/q13.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpch/sf1-delta/q13.benchmark
+# description: Run query 13 from the TPC-H benchmark
+# group: [sf1-parquet]
+
+template benchmark/tpch/sf1-delta/tpch_sf1_delta.benchmark.in
+QUERY_NUMBER=13
+QUERY_NUMBER_PADDED=13
diff --git a/benchmark/tpch/sf1-delta/q14.benchmark b/benchmark/tpch/sf1-delta/q14.benchmark
new file mode 100644
index 0000000..8cd85a0
--- /dev/null
+++ b/benchmark/tpch/sf1-delta/q14.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpch/sf1-delta/q14.benchmark
+# description: Run query 14 from the TPC-H benchmark
+# group: [sf1-parquet]
+
+template benchmark/tpch/sf1-delta/tpch_sf1_delta.benchmark.in
+QUERY_NUMBER=14
+QUERY_NUMBER_PADDED=14
diff --git a/benchmark/tpch/sf1-delta/q15.benchmark b/benchmark/tpch/sf1-delta/q15.benchmark
new file mode 100644
index 0000000..b18fa75
--- /dev/null
+++ b/benchmark/tpch/sf1-delta/q15.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpch/sf1-delta/q15.benchmark
+# description: Run query 15 from the TPC-H benchmark
+# group: [sf1-parquet]
+
+template benchmark/tpch/sf1-delta/tpch_sf1_delta.benchmark.in
+QUERY_NUMBER=15
+QUERY_NUMBER_PADDED=15
diff --git a/benchmark/tpch/sf1-delta/q16.benchmark b/benchmark/tpch/sf1-delta/q16.benchmark
new file mode 100644
index 0000000..9fa0790
--- /dev/null
+++ b/benchmark/tpch/sf1-delta/q16.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpch/sf1-delta/q16.benchmark
+# description: Run query 16 from the TPC-H benchmark
+# group: [sf1-parquet]
+
+template benchmark/tpch/sf1-delta/tpch_sf1_delta.benchmark.in
+QUERY_NUMBER=16
+QUERY_NUMBER_PADDED=16
diff --git a/benchmark/tpch/sf1-delta/q17.benchmark b/benchmark/tpch/sf1-delta/q17.benchmark
new file mode 100644
index 0000000..fc64954
--- /dev/null
+++ b/benchmark/tpch/sf1-delta/q17.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpch/sf1-delta/q17.benchmark
+# description: Run query 17 from the TPC-H benchmark
+# group: [sf1-parquet]
+
+template benchmark/tpch/sf1-delta/tpch_sf1_delta.benchmark.in
+QUERY_NUMBER=17
+QUERY_NUMBER_PADDED=17
diff --git a/benchmark/tpch/sf1-delta/q18.benchmark b/benchmark/tpch/sf1-delta/q18.benchmark
new file mode 100644
index 0000000..13f17a1
--- /dev/null
+++ b/benchmark/tpch/sf1-delta/q18.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpch/sf1-delta/q18.benchmark
+# description: Run query 18 from the TPC-H benchmark
+# group: [sf1-parquet]
+
+template benchmark/tpch/sf1-delta/tpch_sf1_delta.benchmark.in
+QUERY_NUMBER=18
+QUERY_NUMBER_PADDED=18
diff --git a/benchmark/tpch/sf1-delta/q19.benchmark b/benchmark/tpch/sf1-delta/q19.benchmark
new file mode 100644
index 0000000..88f1bba
--- /dev/null
+++ b/benchmark/tpch/sf1-delta/q19.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpch/sf1-delta/q19.benchmark
+# description: Run query 19 from the TPC-H benchmark
+# group: [sf1-parquet]
+
+template benchmark/tpch/sf1-delta/tpch_sf1_delta.benchmark.in
+QUERY_NUMBER=19
+QUERY_NUMBER_PADDED=19
diff --git a/benchmark/tpch/sf1-delta/q20.benchmark b/benchmark/tpch/sf1-delta/q20.benchmark
new file mode 100644
index 0000000..3d07a89
--- /dev/null
+++ b/benchmark/tpch/sf1-delta/q20.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpch/sf1-delta/q20.benchmark
+# description: Run query 20 from the TPC-H benchmark
+# group: [sf1-parquet]
+
+template benchmark/tpch/sf1-delta/tpch_sf1_delta.benchmark.in
+QUERY_NUMBER=20
+QUERY_NUMBER_PADDED=20
diff --git a/benchmark/tpch/sf1-delta/q21.benchmark b/benchmark/tpch/sf1-delta/q21.benchmark
new file mode 100644
index 0000000..29c0162
--- /dev/null
+++ b/benchmark/tpch/sf1-delta/q21.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpch/sf1-delta/q21.benchmark
+# description: Run query 21 from the TPC-H benchmark
+# group: [sf1-parquet]
+
+template benchmark/tpch/sf1-delta/tpch_sf1_delta.benchmark.in
+QUERY_NUMBER=21
+QUERY_NUMBER_PADDED=21
diff --git a/benchmark/tpch/sf1-delta/q22.benchmark b/benchmark/tpch/sf1-delta/q22.benchmark
new file mode 100644
index 0000000..1d9cc53
--- /dev/null
+++ b/benchmark/tpch/sf1-delta/q22.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpch/sf1-delta/q22.benchmark
+# description: Run query 22 from the TPC-H benchmark
+# group: [sf1-parquet]
+
+template benchmark/tpch/sf1-delta/tpch_sf1_delta.benchmark.in
+QUERY_NUMBER=22
+QUERY_NUMBER_PADDED=22
diff --git a/benchmark/tpch/sf1-delta/tpch_sf1_delta.benchmark.in b/benchmark/tpch/sf1-delta/tpch_sf1_delta.benchmark.in
new file mode 100644
index 0000000..8b8ef40
--- /dev/null
+++ b/benchmark/tpch/sf1-delta/tpch_sf1_delta.benchmark.in
@@ -0,0 +1,17 @@
+# name: ${FILE_PATH}
+# description: ${DESCRIPTION}
+# group: [sf1]
+
+name Q${QUERY_NUMBER_PADDED}
+group tpch
+subgroup sf1
+
+require delta
+
+require parquet
+
+load benchmark/tpch/sf1-delta/load.sql
+
+run duckdb/extension/tpch/dbgen/queries/q${QUERY_NUMBER_PADDED}.sql
+
+result duckdb/extension/tpch/dbgen/answers/sf1/q${QUERY_NUMBER_PADDED}.csv
\ No newline at end of file
diff --git a/benchmark/tpch/sf1-parquet-remote/load.sql b/benchmark/tpch/sf1-parquet-remote/load.sql
new file mode 100644
index 0000000..23c8ed2
--- /dev/null
+++ b/benchmark/tpch/sf1-parquet-remote/load.sql
@@ -0,0 +1,8 @@
+create view customer as from parquet_scan('s3://test-bucket-ceiveran/delta_testing/tpch_sf0_01' || '/customer/parquet/**/*.parquet');
+create view lineitem as from parquet_scan('s3://test-bucket-ceiveran/delta_testing/tpch_sf0_01' ||  '/lineitem/parquet/**/*.parquet');
+create view nation as from parquet_scan('s3://test-bucket-ceiveran/delta_testing/tpch_sf0_01' || '/nation/parquet/**/*.parquet');
+create view orders as from parquet_scan('s3://test-bucket-ceiveran/delta_testing/tpch_sf0_01' || '/orders/parquet/**/*.parquet');
+create view part as from parquet_scan('s3://test-bucket-ceiveran/delta_testing/tpch_sf0_01' || '/part/parquet/**/*.parquet');
+create view partsupp as from parquet_scan('s3://test-bucket-ceiveran/delta_testing/tpch_sf0_01' || '/partsupp/parquet/**/*.parquet');
+create view region as from parquet_scan('s3://test-bucket-ceiveran/delta_testing/tpch_sf0_01' || '/region/parquet/**/*.parquet');
+create view supplier as from parquet_scan('s3://test-bucket-ceiveran/delta_testing/tpch_sf0_01' || '/supplier/parquet/**/*.parquet');
\ No newline at end of file
diff --git a/benchmark/tpch/sf1-parquet-remote/q01.benchmark b/benchmark/tpch/sf1-parquet-remote/q01.benchmark
new file mode 100644
index 0000000..bb83e6a
--- /dev/null
+++ b/benchmark/tpch/sf1-parquet-remote/q01.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpch/sf1-delta/q01.benchmark
+# description: Run query 01 from the TPC-H benchmark
+# group: [sf1-parquet]
+
+template benchmark/tpch/sf1-parquet-remote/tpch_sf1_delta.benchmark.in
+QUERY_NUMBER=1
+QUERY_NUMBER_PADDED=01
diff --git a/benchmark/tpch/sf1-parquet-remote/q02.benchmark b/benchmark/tpch/sf1-parquet-remote/q02.benchmark
new file mode 100644
index 0000000..577415f
--- /dev/null
+++ b/benchmark/tpch/sf1-parquet-remote/q02.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpch/sf1-delta/q02.benchmark
+# description: Run query 02 from the TPC-H benchmark
+# group: [sf1-parquet]
+
+template benchmark/tpch/sf1-parquet-remote/tpch_sf1_delta.benchmark.in
+QUERY_NUMBER=2
+QUERY_NUMBER_PADDED=02
diff --git a/benchmark/tpch/sf1-parquet-remote/q03.benchmark b/benchmark/tpch/sf1-parquet-remote/q03.benchmark
new file mode 100644
index 0000000..665980c
--- /dev/null
+++ b/benchmark/tpch/sf1-parquet-remote/q03.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpch/sf1-delta/q03.benchmark
+# description: Run query 03 from the TPC-H benchmark
+# group: [sf1-parquet]
+
+template benchmark/tpch/sf1-parquet-remote/tpch_sf1_delta.benchmark.in
+QUERY_NUMBER=3
+QUERY_NUMBER_PADDED=03
diff --git a/benchmark/tpch/sf1-parquet-remote/q04.benchmark b/benchmark/tpch/sf1-parquet-remote/q04.benchmark
new file mode 100644
index 0000000..537f15d
--- /dev/null
+++ b/benchmark/tpch/sf1-parquet-remote/q04.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpch/sf1-delta/q04.benchmark
+# description: Run query 04 from the TPC-H benchmark
+# group: [sf1-parquet]
+
+template benchmark/tpch/sf1-parquet-remote/tpch_sf1_delta.benchmark.in
+QUERY_NUMBER=4
+QUERY_NUMBER_PADDED=04
diff --git a/benchmark/tpch/sf1-parquet-remote/q05.benchmark b/benchmark/tpch/sf1-parquet-remote/q05.benchmark
new file mode 100644
index 0000000..616bc5d
--- /dev/null
+++ b/benchmark/tpch/sf1-parquet-remote/q05.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpch/sf1-delta/q05.benchmark
+# description: Run query 05 from the TPC-H benchmark
+# group: [sf1-parquet]
+
+template benchmark/tpch/sf1-parquet-remote/tpch_sf1_delta.benchmark.in
+QUERY_NUMBER=5
+QUERY_NUMBER_PADDED=05
diff --git a/benchmark/tpch/sf1-parquet-remote/q06.benchmark b/benchmark/tpch/sf1-parquet-remote/q06.benchmark
new file mode 100644
index 0000000..8a471a9
--- /dev/null
+++ b/benchmark/tpch/sf1-parquet-remote/q06.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpch/sf1-delta/q06.benchmark
+# description: Run query 06 from the TPC-H benchmark
+# group: [sf1-parquet]
+
+template benchmark/tpch/sf1-parquet-remote/tpch_sf1_delta.benchmark.in
+QUERY_NUMBER=6
+QUERY_NUMBER_PADDED=06
diff --git a/benchmark/tpch/sf1-parquet-remote/q07.benchmark b/benchmark/tpch/sf1-parquet-remote/q07.benchmark
new file mode 100644
index 0000000..02f87dd
--- /dev/null
+++ b/benchmark/tpch/sf1-parquet-remote/q07.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpch/sf1-delta/q07.benchmark
+# description: Run query 07 from the TPC-H benchmark
+# group: [sf1-parquet]
+
+template benchmark/tpch/sf1-parquet-remote/tpch_sf1_delta.benchmark.in
+QUERY_NUMBER=7
+QUERY_NUMBER_PADDED=07
diff --git a/benchmark/tpch/sf1-parquet-remote/q08.benchmark b/benchmark/tpch/sf1-parquet-remote/q08.benchmark
new file mode 100644
index 0000000..69a8b4f
--- /dev/null
+++ b/benchmark/tpch/sf1-parquet-remote/q08.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpch/sf1-delta/q08.benchmark
+# description: Run query 08 from the TPC-H benchmark
+# group: [sf1-parquet]
+
+template benchmark/tpch/sf1-parquet-remote/tpch_sf1_delta.benchmark.in
+QUERY_NUMBER=8
+QUERY_NUMBER_PADDED=08
diff --git a/benchmark/tpch/sf1-parquet-remote/q09.benchmark b/benchmark/tpch/sf1-parquet-remote/q09.benchmark
new file mode 100644
index 0000000..bbd0413
--- /dev/null
+++ b/benchmark/tpch/sf1-parquet-remote/q09.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpch/sf1-delta/q09.benchmark
+# description: Run query 09 from the TPC-H benchmark
+# group: [sf1-parquet]
+
+template benchmark/tpch/sf1-parquet-remote/tpch_sf1_delta.benchmark.in
+QUERY_NUMBER=9
+QUERY_NUMBER_PADDED=09
diff --git a/benchmark/tpch/sf1-parquet-remote/q10.benchmark b/benchmark/tpch/sf1-parquet-remote/q10.benchmark
new file mode 100644
index 0000000..8f0564e
--- /dev/null
+++ b/benchmark/tpch/sf1-parquet-remote/q10.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpch/sf1-delta/q10.benchmark
+# description: Run query 10 from the TPC-H benchmark
+# group: [sf1-parquet]
+
+template benchmark/tpch/sf1-parquet-remote/tpch_sf1_delta.benchmark.in
+QUERY_NUMBER=10
+QUERY_NUMBER_PADDED=10
diff --git a/benchmark/tpch/sf1-parquet-remote/q11.benchmark b/benchmark/tpch/sf1-parquet-remote/q11.benchmark
new file mode 100644
index 0000000..61f7a01
--- /dev/null
+++ b/benchmark/tpch/sf1-parquet-remote/q11.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpch/sf1-delta/q11.benchmark
+# description: Run query 11 from the TPC-H benchmark
+# group: [sf1-parquet]
+
+template benchmark/tpch/sf1-parquet-remote/tpch_sf1_delta.benchmark.in
+QUERY_NUMBER=11
+QUERY_NUMBER_PADDED=11
diff --git a/benchmark/tpch/sf1-parquet-remote/q12.benchmark b/benchmark/tpch/sf1-parquet-remote/q12.benchmark
new file mode 100644
index 0000000..48e2b2c
--- /dev/null
+++ b/benchmark/tpch/sf1-parquet-remote/q12.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpch/sf1-delta/q12.benchmark
+# description: Run query 12 from the TPC-H benchmark
+# group: [sf1-parquet]
+
+template benchmark/tpch/sf1-parquet-remote/tpch_sf1_delta.benchmark.in
+QUERY_NUMBER=12
+QUERY_NUMBER_PADDED=12
diff --git a/benchmark/tpch/sf1-parquet-remote/q13.benchmark b/benchmark/tpch/sf1-parquet-remote/q13.benchmark
new file mode 100644
index 0000000..f5b2ee3
--- /dev/null
+++ b/benchmark/tpch/sf1-parquet-remote/q13.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpch/sf1-delta/q13.benchmark
+# description: Run query 13 from the TPC-H benchmark
+# group: [sf1-parquet]
+
+template benchmark/tpch/sf1-parquet-remote/tpch_sf1_delta.benchmark.in
+QUERY_NUMBER=13
+QUERY_NUMBER_PADDED=13
diff --git a/benchmark/tpch/sf1-parquet-remote/q14.benchmark b/benchmark/tpch/sf1-parquet-remote/q14.benchmark
new file mode 100644
index 0000000..2ddcaa5
--- /dev/null
+++ b/benchmark/tpch/sf1-parquet-remote/q14.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpch/sf1-delta/q14.benchmark
+# description: Run query 14 from the TPC-H benchmark
+# group: [sf1-parquet]
+
+template benchmark/tpch/sf1-parquet-remote/tpch_sf1_delta.benchmark.in
+QUERY_NUMBER=14
+QUERY_NUMBER_PADDED=14
diff --git a/benchmark/tpch/sf1-parquet-remote/q15.benchmark b/benchmark/tpch/sf1-parquet-remote/q15.benchmark
new file mode 100644
index 0000000..f7414fd
--- /dev/null
+++ b/benchmark/tpch/sf1-parquet-remote/q15.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpch/sf1-delta/q15.benchmark
+# description: Run query 15 from the TPC-H benchmark
+# group: [sf1-parquet]
+
+template benchmark/tpch/sf1-parquet-remote/tpch_sf1_delta.benchmark.in
+QUERY_NUMBER=15
+QUERY_NUMBER_PADDED=15
diff --git a/benchmark/tpch/sf1-parquet-remote/q16.benchmark b/benchmark/tpch/sf1-parquet-remote/q16.benchmark
new file mode 100644
index 0000000..4375e61
--- /dev/null
+++ b/benchmark/tpch/sf1-parquet-remote/q16.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpch/sf1-delta/q16.benchmark
+# description: Run query 16 from the TPC-H benchmark
+# group: [sf1-parquet]
+
+template benchmark/tpch/sf1-parquet-remote/tpch_sf1_delta.benchmark.in
+QUERY_NUMBER=16
+QUERY_NUMBER_PADDED=16
diff --git a/benchmark/tpch/sf1-parquet-remote/q17.benchmark b/benchmark/tpch/sf1-parquet-remote/q17.benchmark
new file mode 100644
index 0000000..df38847
--- /dev/null
+++ b/benchmark/tpch/sf1-parquet-remote/q17.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpch/sf1-delta/q17.benchmark
+# description: Run query 17 from the TPC-H benchmark
+# group: [sf1-parquet]
+
+template benchmark/tpch/sf1-parquet-remote/tpch_sf1_delta.benchmark.in
+QUERY_NUMBER=17
+QUERY_NUMBER_PADDED=17
diff --git a/benchmark/tpch/sf1-parquet-remote/q18.benchmark b/benchmark/tpch/sf1-parquet-remote/q18.benchmark
new file mode 100644
index 0000000..d01d822
--- /dev/null
+++ b/benchmark/tpch/sf1-parquet-remote/q18.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpch/sf1-delta/q18.benchmark
+# description: Run query 18 from the TPC-H benchmark
+# group: [sf1-parquet]
+
+template benchmark/tpch/sf1-parquet-remote/tpch_sf1_delta.benchmark.in
+QUERY_NUMBER=18
+QUERY_NUMBER_PADDED=18
diff --git a/benchmark/tpch/sf1-parquet-remote/q19.benchmark b/benchmark/tpch/sf1-parquet-remote/q19.benchmark
new file mode 100644
index 0000000..7a5a33e
--- /dev/null
+++ b/benchmark/tpch/sf1-parquet-remote/q19.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpch/sf1-delta/q19.benchmark
+# description: Run query 19 from the TPC-H benchmark
+# group: [sf1-parquet]
+
+template benchmark/tpch/sf1-parquet-remote/tpch_sf1_delta.benchmark.in
+QUERY_NUMBER=19
+QUERY_NUMBER_PADDED=19
diff --git a/benchmark/tpch/sf1-parquet-remote/q20.benchmark b/benchmark/tpch/sf1-parquet-remote/q20.benchmark
new file mode 100644
index 0000000..e1ff9d0
--- /dev/null
+++ b/benchmark/tpch/sf1-parquet-remote/q20.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpch/sf1-delta/q20.benchmark
+# description: Run query 20 from the TPC-H benchmark
+# group: [sf1-parquet]
+
+template benchmark/tpch/sf1-parquet-remote/tpch_sf1_delta.benchmark.in
+QUERY_NUMBER=20
+QUERY_NUMBER_PADDED=20
diff --git a/benchmark/tpch/sf1-parquet-remote/q21.benchmark b/benchmark/tpch/sf1-parquet-remote/q21.benchmark
new file mode 100644
index 0000000..af179e8
--- /dev/null
+++ b/benchmark/tpch/sf1-parquet-remote/q21.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpch/sf1-delta/q21.benchmark
+# description: Run query 21 from the TPC-H benchmark
+# group: [sf1-parquet]
+
+template benchmark/tpch/sf1-parquet-remote/tpch_sf1_delta.benchmark.in
+QUERY_NUMBER=21
+QUERY_NUMBER_PADDED=21
diff --git a/benchmark/tpch/sf1-parquet-remote/q22.benchmark b/benchmark/tpch/sf1-parquet-remote/q22.benchmark
new file mode 100644
index 0000000..527adcf
--- /dev/null
+++ b/benchmark/tpch/sf1-parquet-remote/q22.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpch/sf1-delta/q22.benchmark
+# description: Run query 22 from the TPC-H benchmark
+# group: [sf1-parquet]
+
+template benchmark/tpch/sf1-parquet-remote/tpch_sf1_delta.benchmark.in
+QUERY_NUMBER=22
+QUERY_NUMBER_PADDED=22
diff --git a/benchmark/tpch/sf1-parquet-remote/tpch_sf1_delta.benchmark.in b/benchmark/tpch/sf1-parquet-remote/tpch_sf1_delta.benchmark.in
new file mode 100644
index 0000000..dbc4e0a
--- /dev/null
+++ b/benchmark/tpch/sf1-parquet-remote/tpch_sf1_delta.benchmark.in
@@ -0,0 +1,19 @@
+# name: ${FILE_PATH}
+# description: ${DESCRIPTION}
+# group: [sf1]
+
+name Q${QUERY_NUMBER_PADDED}
+group tpch
+subgroup sf1
+
+require delta
+
+require parquet
+
+require httpfs
+
+load benchmark/tpch/sf1-parquet-remote/load.sql
+
+run duckdb/extension/tpch/dbgen/queries/q${QUERY_NUMBER_PADDED}.sql
+
+result duckdb/extension/tpch/dbgen/answers/sf0.01/q${QUERY_NUMBER_PADDED}.csv
\ No newline at end of file
diff --git a/scripts/plot.py b/scripts/plot.py
new file mode 100644
index 0000000..c5ea201
--- /dev/null
+++ b/scripts/plot.py
@@ -0,0 +1,27 @@
+import duckdb
+
+### Parse Query Results
+parse_benchmark_result_query = """
+SELECT 
+    parse_filename(name, true) as benchmark, 
+    parse_filename(filename, true) as config, 
+    avg(timing) as timing
+FROM
+    read_csv('benchmark_results/*.csv', filename=1) 
+GROUP BY 
+    config, 
+    benchmark 
+ORDER BY 
+    config, 
+    benchmark
+"""
+
+benchmark_results = duckdb.execute(parse_benchmark_result_query).df()
+
+### Plot graph
+import matplotlib.pyplot as plt
+import numpy as np
+
+plt.rcParams["figure.figsize"] = [10, 5]
+fig = benchmark_results.pivot(index='benchmark', columns='config', values='timing').plot(kind='bar', title='', ylabel='runtime [s]').get_figure()
+fig.savefig('benchmark_results/result.png')
\ No newline at end of file

From 70aec0c524b5d426b983e190ed45d9fb542aa4af Mon Sep 17 00:00:00 2001
From: Sam Ansmink <samansmink@hotmail.com>
Date: Wed, 24 Jul 2024 11:02:35 +0200
Subject: [PATCH 36/38] small tweaks to benchmark readme and makefile

---
 benchmark/README.md          | 16 +++++++++++++++-
 benchmark/benchmark.Makefile |  1 +
 2 files changed, 16 insertions(+), 1 deletion(-)

diff --git a/benchmark/README.md b/benchmark/README.md
index 6ff3801..edc0497 100644
--- a/benchmark/README.md
+++ b/benchmark/README.md
@@ -8,6 +8,11 @@ To run the benchmarks, firstly run the build using:
 BUILD_BENCHMARK=1 make
 ```
 
+Then, make sure that the generated data is created using:
+```shell
+make generate-data
+```
+
 Then to run a benchmark, use one of the benchmark Makefile targets prefixed with `bench-run-`:
 ```shell
 make bench-run-tpch-sf1
@@ -19,9 +24,18 @@ To create a plot from the results run:
 make plot
 ```
 
-## Configurations options
+## More options
 Specific benchmarks can be run from a suite using the `BENCHMARK_PATTERN` variable. For example to compare
 only Q01 from TPCH SF1, run:
 ```shell
 BENCHMARK_PATTERN=q01.benchmark make bench-run-tpch-sf1
+```
+
+Also, we can run all local benchmarks using:
+```shell
+make  bench-run-all-local
+```
+Or all remote benchmarks using
+```shell
+make bench-run-all-remote
 ```
\ No newline at end of file
diff --git a/benchmark/benchmark.Makefile b/benchmark/benchmark.Makefile
index 3b88eb7..322d9eb 100644
--- a/benchmark/benchmark.Makefile
+++ b/benchmark/benchmark.Makefile
@@ -63,3 +63,4 @@ bench-run-tpcds-sf1: bench-run-tpcds-sf1-delta bench-run-tpcds-sf1-parquet
 ###
 bench-run-all-local: bench-run-tpcds-sf1 bench-run-tpch-sf1
 
+bench-run-all-remote: bench-run-tpch-sf1-remote

From 628c5ad73ff3c430c9972526fad05930a56e5af4 Mon Sep 17 00:00:00 2001
From: Sam Ansmink <samansmink@hotmail.com>
Date: Wed, 24 Jul 2024 14:06:52 +0200
Subject: [PATCH 37/38] fix accidentally borking `make`

---
 Makefile                     | 6 +++---
 benchmark/benchmark.Makefile | 2 ++
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/Makefile b/Makefile
index defa0b6..2ff6867 100644
--- a/Makefile
+++ b/Makefile
@@ -11,12 +11,12 @@ test_release: export DAT_PATH=./build/release/rust/src/delta_kernel/acceptance/t
 test_debug: export DELTA_KERNEL_TESTS_PATH=./build/debug/rust/src/delta_kernel/kernel/tests/data
 test_debug: export DAT_PATH=./build/debug/rust/src/delta_kernel/acceptance/tests/dat
 
-# Include the Makefile from the benchmark directory
-include benchmark/benchmark.Makefile
-
 # Include the Makefile from extension-ci-tools
 include extension-ci-tools/makefiles/duckdb_extension.Makefile
 
+# Include the Makefile from the benchmark directory
+include benchmark/benchmark.Makefile
+
 # Generate some test data to test with
 generate-data:
 	python3 -m pip install delta-spark duckdb pandas deltalake pyspark delta
diff --git a/benchmark/benchmark.Makefile b/benchmark/benchmark.Makefile
index 322d9eb..2852b03 100644
--- a/benchmark/benchmark.Makefile
+++ b/benchmark/benchmark.Makefile
@@ -1,3 +1,5 @@
+.PHONY: bench-output-dir clean_benchmark plot
+
 # Set this flag during building to enable the benchmark runner
 ifeq (${BUILD_BENCHMARK}, 1)
 	TOOLCHAIN_FLAGS:=${TOOLCHAIN_FLAGS} -DBUILD_BENCHMARKS=1

From cfdba36d098c66fab2cf647aa2f7ac6be86b33db Mon Sep 17 00:00:00 2001
From: Sam Ansmink <samansmink@hotmail.com>
Date: Wed, 24 Jul 2024 17:31:34 +0200
Subject: [PATCH 38/38] small fix to build_benchmark

---
 Makefile                     |  5 +++++
 benchmark/benchmark.Makefile | 28 +++++++++++++++-------------
 2 files changed, 20 insertions(+), 13 deletions(-)

diff --git a/Makefile b/Makefile
index 2ff6867..add6fe5 100644
--- a/Makefile
+++ b/Makefile
@@ -11,6 +11,11 @@ test_release: export DAT_PATH=./build/release/rust/src/delta_kernel/acceptance/t
 test_debug: export DELTA_KERNEL_TESTS_PATH=./build/debug/rust/src/delta_kernel/kernel/tests/data
 test_debug: export DAT_PATH=./build/debug/rust/src/delta_kernel/acceptance/tests/dat
 
+# Set this flag during building to enable the benchmark runner
+ifeq (${BUILD_BENCHMARK}, 1)
+	TOOLCHAIN_FLAGS:=${TOOLCHAIN_FLAGS} -DBUILD_BENCHMARKS=1
+endif
+
 # Include the Makefile from extension-ci-tools
 include extension-ci-tools/makefiles/duckdb_extension.Makefile
 
diff --git a/benchmark/benchmark.Makefile b/benchmark/benchmark.Makefile
index 2852b03..b3f4202 100644
--- a/benchmark/benchmark.Makefile
+++ b/benchmark/benchmark.Makefile
@@ -1,12 +1,7 @@
 .PHONY: bench-output-dir clean_benchmark plot
 
-# Set this flag during building to enable the benchmark runner
-ifeq (${BUILD_BENCHMARK}, 1)
-	TOOLCHAIN_FLAGS:=${TOOLCHAIN_FLAGS} -DBUILD_BENCHMARKS=1
-endif
-
-ifeq ("${BENCHMARK_PATTERN}", "")
-	BENCHMARK_PATTERN:=.*
+ifeq ("$(BENCHMARK_PATTERN)a", "a")
+    BENCHMARK_PATTERN:=.*
 endif
 
 bench-output-dir:
@@ -27,10 +22,13 @@ plot:
 
 # TPCH SF1 on delta table
 bench-run-tpch-sf1-delta: bench-output-dir
-	./build/release/benchmark/benchmark_runner --root-dir './' 'benchmark/tpch/sf1-delta/${BENCHMARK_PATTERN}' &> benchmark_results/tpch-sf1-delta.csv
+	./build/release/benchmark/benchmark_runner --root-dir './' 'benchmark/tpch/sf1-delta/$(BENCHMARK_PATTERN)' &> benchmark_results/tpch-sf1-delta.csv
 # TPCH SF1 on parquet files
 bench-run-tpch-sf1-parquet: bench-output-dir
-	./build/release/benchmark/benchmark_runner 'benchmark/tpch/sf1-parquet/${BENCHMARK_PATTERN}' &> benchmark_results/tpch-sf1-parquet.csv
+	./build/release/benchmark/benchmark_runner 'benchmark/tpch/sf1-parquet/$(BENCHMARK_PATTERN)' &> benchmark_results/tpch-sf1-parquet.csv
+# TPCH SF1 on duckdb file
+bench-run-tpch-sf1-duckdb: bench-output-dir
+	./build/release/benchmark/benchmark_runner 'benchmark/tpch/sf1/$(BENCHMARK_PATTERN)' &> benchmark_results/tpch-sf1-duckdb.csv
 # COMPARES TPCH SF1 on parquet file vs on delta files
 bench-run-tpch-sf1: bench-run-tpch-sf1-delta bench-run-tpch-sf1-parquet
 
@@ -40,10 +38,10 @@ bench-run-tpch-sf1: bench-run-tpch-sf1-delta bench-run-tpch-sf1-parquet
 
 # TPCH on remote delta table (set BENCHMARK_DATA_S3_LINEITEM_SF1)
 bench-run-tpch-sf1-remote-delta: bench-output-dir
-	./build/release/benchmark/benchmark_runner --root-dir './' 'benchmark/tpch/sf1-delta-remote/${BENCHMARK_PATTERN}' &> benchmark_results/tpch-sf1-remote-delta.csv
+	./build/release/benchmark/benchmark_runner --root-dir './' 'benchmark/tpch/sf1-delta-remote/$(BENCHMARK_PATTERN)' &> benchmark_results/tpch-sf1-remote-delta.csv
 # TPCH on remote parquet table (set BENCHMARK_DATA_S3_LINEITEM_SF1)
 bench-run-tpch-sf1-remote-parquet: bench-output-dir
-	./build/release/benchmark/benchmark_runner --root-dir './' 'benchmark/tpch/sf1-parquet-remote/${BENCHMARK_PATTERN}'  &> benchmark_results/tpch-sf1-remote-parquet.csv
+	./build/release/benchmark/benchmark_runner --root-dir './' 'benchmark/tpch/sf1-parquet-remote/$(BENCHMARK_PATTERN)'  &> benchmark_results/tpch-sf1-remote-parquet.csv
 # COMPARES TPCH SF1 on parquet file vs on delta files
 bench-run-tpch-sf1-remote: bench-run-tpch-sf1-remote-parquet bench-run-tpch-sf1-remote-delta
 
@@ -53,10 +51,14 @@ bench-run-tpch-sf1-remote: bench-run-tpch-sf1-remote-parquet bench-run-tpch-sf1-
 
 # TPCDS SF1 on delta table
 bench-run-tpcds-sf1-delta: bench-output-dir
-	./build/release/benchmark/benchmark_runner --root-dir './' 'benchmark/tpcds/sf1-delta/${BENCHMARK_PATTERN}' &> benchmark_results/tpcds-sf1-delta.csv
+	./build/release/benchmark/benchmark_runner --root-dir './' 'benchmark/tpcds/sf1-delta/$(BENCHMARK_PATTERN)' &> benchmark_results/tpcds-sf1-delta.csv
 # TPCDS SF1 on parquet files
 bench-run-tpcds-sf1-parquet: bench-output-dir
-	./build/release/benchmark/benchmark_runner --root-dir './' 'benchmark/tpcds/sf1-parquet/${BENCHMARK_PATTERN}' &> benchmark_results/tpcds-sf1-parquet.csv
+	./build/release/benchmark/benchmark_runner --root-dir './' 'benchmark/tpcds/sf1-parquet/$(BENCHMARK_PATTERN)' &> benchmark_results/tpcds-sf1-parquet.csv
+# TPCDS SF1 on duckdb files
+bench-run-tpcds-sf1-duckdb: bench-output-dir
+	./build/release/benchmark/benchmark_runner 'benchmark/tpcds/sf1/$(BENCHMARK_PATTERN)' &> benchmark_results/tpcds-sf1-duckdb.csv
+
 # COMPARES TPCDS SF1 on parquet file vs on delta files
 bench-run-tpcds-sf1: bench-run-tpcds-sf1-delta bench-run-tpcds-sf1-parquet