From ab618851c24c6e82333168de09419f103f0fd8ee Mon Sep 17 00:00:00 2001 From: Sam Ansmink Date: Mon, 10 Jun 2024 16:01:21 +0200 Subject: [PATCH 01/38] bump delta kernel --- CMakeLists.txt | 2 +- src/delta_utils.cpp | 4 ++-- test/sql/dat/basic_append.test | 7 ------- 3 files changed, 3 insertions(+), 10 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index f8d12ec..a37370c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -60,7 +60,7 @@ endif() ExternalProject_Add( ${KERNEL_NAME} GIT_REPOSITORY "https://github.com/delta-incubator/delta-kernel-rs" - GIT_TAG 08f0764a00e89f42136fd478823d28278adc7ee8 + GIT_TAG 823367e4dc13b627914412ee2ca7933a1c7b822a CONFIGURE_COMMAND "" UPDATE_COMMAND "" BUILD_IN_SOURCE 1 diff --git a/src/delta_utils.cpp b/src/delta_utils.cpp index b02e898..7f1d3a7 100644 --- a/src/delta_utils.cpp +++ b/src/delta_utils.cpp @@ -226,7 +226,7 @@ static unordered_map PrunePredicates(unordered_mapcolumn_filters); + auto filters = predicate->column_filters; auto it = filters.begin(); auto end = filters.end(); @@ -312,7 +312,7 @@ uintptr_t PredicateVisitor::VisitFilter(const string &col_name, const TableFilte case TableFilterType::CONJUNCTION_AND: return VisitAndFilter(col_name, static_cast(filter), state); default: - throw NotImplementedException("Attempted to push down unimplemented filter type: '%s'", EnumUtil::ToString(filter.filter_type)); + return ~0; } } diff --git a/test/sql/dat/basic_append.test b/test/sql/dat/basic_append.test index 4ff31bc..87930b8 100644 --- a/test/sql/dat/basic_append.test +++ b/test/sql/dat/basic_append.test @@ -56,9 +56,6 @@ FROM delta_scan('${DAT_PATH}/out/reader_tests/generated/basic_append/delta') 2 3 -# TODO: Figure out what's wrong here -mode skip - # Now we add a filter that filters out one of the files query II SELECT letter, number @@ -67,8 +64,6 @@ WHERE number < 2 ---- a 1 -mode unskip - # Now we add a filter that filters out the other file query III SELECT a_float, letter, number, @@ -77,8 +72,6 @@ WHERE number > 4 ---- 5.5 e 5 -mode skip - # Now we add a filter that filters out all columns query III SELECT a_float, number, letter From a6f85ef00572de92836b7b9ece90da8affaf25b1 Mon Sep 17 00:00:00 2001 From: Sam Ansmink Date: Mon, 10 Jun 2024 16:02:23 +0200 Subject: [PATCH 02/38] set correct duckdb version for submodule --- duckdb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/duckdb b/duckdb index 7b8efd3..1f98600 160000 --- a/duckdb +++ b/duckdb @@ -1 +1 @@ -Subproject commit 7b8efd3d0fab38ec9dae467861a317af3f1d7f3e +Subproject commit 1f98600c2cf8722a6d2f2d805bb4af5e701319fc From ef1dd70dee920175a559f69fc3ffa388b778c9ea Mon Sep 17 00:00:00 2001 From: Sam Ansmink Date: Thu, 13 Jun 2024 11:33:00 +0200 Subject: [PATCH 03/38] bump delta to c901665b98b --- CMakeLists.txt | 2 +- Makefile | 5 ----- extension-ci-tools | 2 +- scripts/generate_test_data.py | 8 +++++++- src/delta_utils.cpp | 36 ++++++++++++++++++++++++++++++++--- src/functions/delta_scan.cpp | 2 ++ src/include/delta_utils.hpp | 5 +++++ 7 files changed, 49 insertions(+), 11 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index a37370c..28ea1d2 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -60,7 +60,7 @@ endif() ExternalProject_Add( ${KERNEL_NAME} GIT_REPOSITORY "https://github.com/delta-incubator/delta-kernel-rs" - GIT_TAG 823367e4dc13b627914412ee2ca7933a1c7b822a + GIT_TAG c901665b98b2fed5ff1c713a9666eba9d16ea281 CONFIGURE_COMMAND "" UPDATE_COMMAND "" BUILD_IN_SOURCE 1 diff --git a/Makefile b/Makefile index 05db957..78144e6 100644 --- a/Makefile +++ b/Makefile @@ -14,11 +14,6 @@ test_debug: export DAT_PATH=./build/debug/rust/src/delta_kernel/acceptance/tests # Include the Makefile from extension-ci-tools include extension-ci-tools/makefiles/duckdb_extension.Makefile -reldebug: - mkdir -p build/reldebug && \ - cmake $(GENERATOR) $(BUILD_FLAGS) $(EXT_RELEASE_FLAGS) -DCMAKE_BUILD_TYPE=RelWithDebInfo -S ./duckdb/ -B build/reldebug && \ - cmake --build build/reldebug --config RelWithDebInfo - # Generate some test data to test with generate-data: python3 -m pip install delta-spark duckdb pandas deltalake pyspark delta diff --git a/extension-ci-tools b/extension-ci-tools index 71b8a60..c0cc931 160000 --- a/extension-ci-tools +++ b/extension-ci-tools @@ -1 +1 @@ -Subproject commit 71b8a603ea24b1ac8a2cff134aca28163576548f +Subproject commit c0cc9319492bfa38344c2f28bd35f2304c74cdde diff --git a/scripts/generate_test_data.py b/scripts/generate_test_data.py index 715e882..cb1d2f7 100644 --- a/scripts/generate_test_data.py +++ b/scripts/generate_test_data.py @@ -136,8 +136,14 @@ def generate_test_data_pyspark(name, current_path, input_path, delete_predicate for table in ["customer","lineitem","nation","orders","part","partsupp","region","supplier"]: generate_test_data_pyspark(f"tpch_sf0_01_{table}", f'tpch_sf0_01/{table}', f'{TMP_PATH}/tpch_sf0_01_export/{table}.parquet') +## TPCH SF1 full dataset +con = duckdb.connect() +con.query(f"call dbgen(sf=1); EXPORT DATABASE '{TMP_PATH}/tpch_sf1_export' (FORMAT parquet)") +for table in ["customer","lineitem","nation","orders","part","partsupp","region","supplier"]: + generate_test_data_pyspark(f"tpch_sf1_{table}", f'tpch_sf1/{table}', f'{TMP_PATH}/tpch_sf1_export/{table}.parquet') + ## TPCDS SF0.01 full dataset con = duckdb.connect() con.query(f"call dsdgen(sf=0.01); EXPORT DATABASE '{TMP_PATH}/tpcds_sf0_01_export' (FORMAT parquet)") for table in ["call_center","catalog_page","catalog_returns","catalog_sales","customer","customer_demographics","customer_address","date_dim","household_demographics","inventory","income_band","item","promotion","reason","ship_mode","store","store_returns","store_sales","time_dim","warehouse","web_page","web_returns","web_sales","web_site"]: - generate_test_data_pyspark(f"tpcds_sf0_01_{table}", f'tpcds_sf0_01/{table}', f'{TMP_PATH}/tpcds_sf0_01_export/{table}.parquet') + generate_test_data_pyspark(f"tpcds_sf0_01_{table}", f'tpcds_sf0_01/{table}', f'{TMP_PATH}/tpcds_sf0_01_export/{table}.parquet') \ No newline at end of file diff --git a/src/delta_utils.cpp b/src/delta_utils.cpp index 7f1d3a7..a805d15 100644 --- a/src/delta_utils.cpp +++ b/src/delta_utils.cpp @@ -3,6 +3,7 @@ #include "duckdb.hpp" #include "duckdb/main/extension_util.hpp" #include +#include namespace duckdb { @@ -257,8 +258,24 @@ uintptr_t PredicateVisitor::VisitConstantFilter(const string &col_name, const Co case LogicalType::BIGINT: right = visit_expression_literal_long(state, BigIntValue::Get(value)); break; - - + // case LogicalType::INTEGER: + // right = visit_expression_literal_int(state, IntegerValue::Get(value)); + // break; + // case LogicalType::SMALLINT: + // right = visit_expression_literal_short(state, SmallIntValue::Get(value)); + // break; + // case LogicalType::TINYINT: + // right = visit_expression_literal_byte(state, TinyIntValue::Get(value)); + // break; + // case LogicalType::FLOAT: + // right = visit_expression_literal_float(state, FloatValue::Get(value)); + // break; + // case LogicalType::DOUBLE: + // right = visit_expression_literal_double(state, DoubleValue::Get(value)); + // break; + // case LogicalType::BOOLEAN: + // right = visit_expression_literal_bool(state, BooleanValue::Get(value)); + // break; case LogicalType::VARCHAR: { // WARNING: C++ lifetime extension rules don't protect calls of the form foo(std::string(...).c_str()) auto str = StringValue::Get(value); @@ -266,7 +283,6 @@ uintptr_t PredicateVisitor::VisitConstantFilter(const string &col_name, const Co right = KernelUtils::UnpackResult(maybe_right, "VisitConstantFilter failed to visit_expression_literal_string"); break; } - default: break; // unsupported type } @@ -305,12 +321,26 @@ uintptr_t PredicateVisitor::VisitAndFilter(const string &col_name, const Conjunc return visit_expression_and(state, &eit); } +uintptr_t PredicateVisitor::VisitIsNull(const string &col_name, ffi::KernelExpressionVisitorState *state) { + auto maybe_left = ffi::visit_expression_column(state, KernelUtils::ToDeltaString(col_name), DuckDBEngineError::AllocateError); + uintptr_t left = KernelUtils::UnpackResult(maybe_left, "VisitIsNull failed to visit_expression_column"); + return ffi::visit_expression_is_null(state, left); +} + +uintptr_t PredicateVisitor::VisitIsNotNull(const string &col_name, ffi::KernelExpressionVisitorState *state) { + return ffi::visit_expression_not(state, VisitIsNull(col_name, state)); +} + uintptr_t PredicateVisitor::VisitFilter(const string &col_name, const TableFilter &filter, ffi::KernelExpressionVisitorState* state) { switch (filter.filter_type) { case TableFilterType::CONSTANT_COMPARISON: return VisitConstantFilter(col_name, static_cast(filter), state); case TableFilterType::CONJUNCTION_AND: return VisitAndFilter(col_name, static_cast(filter), state); + // case TableFilterType::IS_NULL: + // return VisitIsNull(col_name, state); + // case TableFilterType::IS_NOT_NULL: + // return VisitIsNotNull(col_name, state); default: return ~0; } diff --git a/src/functions/delta_scan.cpp b/src/functions/delta_scan.cpp index dd2a027..d4320e5 100644 --- a/src/functions/delta_scan.cpp +++ b/src/functions/delta_scan.cpp @@ -31,6 +31,8 @@ static void visit_callback(ffi::NullableCvoid engine_context, struct ffi::Kernel StringUtil::RTrim(path_string, "/"); path_string += "/" + KernelUtils::FromDeltaString(path); + printf("Got File %s\n", path_string.c_str()); + // First we append the file to our resolved files context->resolved_files.push_back(DeltaSnapshot::ToDuckDBPath(path_string)); context->metadata.emplace_back(make_uniq()); diff --git a/src/include/delta_utils.hpp b/src/include/delta_utils.hpp index bcb5f74..37dc289 100644 --- a/src/include/delta_utils.hpp +++ b/src/include/delta_utils.hpp @@ -5,6 +5,7 @@ #include "duckdb/planner/filter/conjunction_filter.hpp" #include "duckdb/common/enum_util.hpp" #include +#include // TODO: clean up this file as we go @@ -140,6 +141,10 @@ class PredicateVisitor : public ffi::EnginePredicate { uintptr_t VisitConstantFilter(const string &col_name, const ConstantFilter &filter, ffi::KernelExpressionVisitorState* state); uintptr_t VisitAndFilter(const string &col_name, const ConjunctionAndFilter &filter, ffi::KernelExpressionVisitorState* state); + + uintptr_t VisitIsNull(const string &col_name, ffi::KernelExpressionVisitorState* state); + uintptr_t VisitIsNotNull(const string &col_name, ffi::KernelExpressionVisitorState* state); + uintptr_t VisitFilter(const string &col_name, const TableFilter &filter, ffi::KernelExpressionVisitorState* state); }; From d6ec36945701e293b480d901255673ae451e1f89 Mon Sep 17 00:00:00 2001 From: Norman Foerster Date: Thu, 13 Jun 2024 20:28:23 +0200 Subject: [PATCH 04/38] azure support --- src/functions/delta_scan.cpp | 119 +++++++++++++++++++++++++++-------- 1 file changed, 94 insertions(+), 25 deletions(-) diff --git a/src/functions/delta_scan.cpp b/src/functions/delta_scan.cpp index dd2a027..41c38cf 100644 --- a/src/functions/delta_scan.cpp +++ b/src/functions/delta_scan.cpp @@ -69,27 +69,43 @@ static ffi::EngineBuilder* CreateBuilder(ClientContext &context, const string &p ffi::EngineBuilder* builder; // For "regular" paths we early out with the default builder config - if (!StringUtil::StartsWith(path, "s3://")) { + if (!StringUtil::StartsWith(path, "s3://") && !StringUtil::StartsWith(path, "azure://")) { auto interface_builder_res = ffi::get_engine_builder(KernelUtils::ToDeltaString(path), DuckDBEngineError::AllocateError); return KernelUtils::UnpackResult(interface_builder_res, "get_engine_interface_builder for path " + path); } - auto end_of_container = path.find('/',5); + string bucket; + string path_in_bucket; + string secret_type; - if(end_of_container == string::npos) { - throw IOException("Invalid s3 url passed to delta scan: %s", path); + if (StringUtil::StartsWith(path, "s3://")) { + auto end_of_container = path.find('/',5); + + if(end_of_container == string::npos) { + throw IOException("Invalid s3 url passed to delta scan: %s", path); + } + bucket = path.substr(5, end_of_container-5); + path_in_bucket = path.substr(end_of_container); + secret_type = "s3"; + } else if (StringUtil::StartsWith(path, "azure://")) { + auto end_of_container = path.find('/',8); + + if(end_of_container == string::npos) { + throw IOException("Invalid azure url passed to delta scan: %s", path); + } + bucket = path.substr(8, end_of_container-8); + path_in_bucket = path.substr(end_of_container); + secret_type = "azure"; } - auto bucket = path.substr(5, end_of_container-5); - auto path_in_bucket = path.substr(end_of_container); auto interface_builder_res = ffi::get_engine_builder(KernelUtils::ToDeltaString(path), DuckDBEngineError::AllocateError); builder = KernelUtils::UnpackResult(interface_builder_res, "get_engine_interface_builder for path " + path); - // For S3 paths we need to trim the url, set the container, and fetch a potential secret + // For S3 or Azure paths we need to trim the url, set the container, and fetch a potential secret auto &secret_manager = SecretManager::Get(context); auto transaction = CatalogTransaction::GetSystemCatalogTransaction(context); - auto secret_match = secret_manager.LookupSecret(transaction, path, "s3"); + auto secret_match = secret_manager.LookupSecret(transaction, path, secret_type); // No secret: nothing left to do here! if (!secret_match.HasMatch()) { @@ -97,26 +113,79 @@ static ffi::EngineBuilder* CreateBuilder(ClientContext &context, const string &p } const auto &kv_secret = dynamic_cast(*secret_match.secret_entry->secret); - auto key_id = kv_secret.TryGetValue("key_id").ToString(); - auto secret = kv_secret.TryGetValue("secret").ToString(); - auto session_token = kv_secret.TryGetValue("session_token").ToString(); - auto region = kv_secret.TryGetValue("region").ToString(); - if (key_id.empty() && secret.empty()) { - ffi::set_builder_option(builder, KernelUtils::ToDeltaString("skip_signature"), KernelUtils::ToDeltaString("true")); - } + // Here you would need to add the logic for setting the builder options for Azure + // This is just a placeholder and will need to be replaced with the actual logic + if (secret_type == "s3") { + auto key_id = kv_secret.TryGetValue("key_id").ToString(); + auto secret = kv_secret.TryGetValue("secret").ToString(); + auto session_token = kv_secret.TryGetValue("session_token").ToString(); + auto region = kv_secret.TryGetValue("region").ToString(); - if (!key_id.empty()) { - ffi::set_builder_option(builder, KernelUtils::ToDeltaString("aws_access_key_id"), KernelUtils::ToDeltaString(key_id)); - } - if (!secret.empty()) { - ffi::set_builder_option(builder, KernelUtils::ToDeltaString("aws_secret_access_key"), KernelUtils::ToDeltaString(secret)); - } - if (!session_token.empty()) { - ffi::set_builder_option(builder, KernelUtils::ToDeltaString("aws_session_token"), KernelUtils::ToDeltaString(session_token)); - } - ffi::set_builder_option(builder, KernelUtils::ToDeltaString("aws_region"), KernelUtils::ToDeltaString(region)); + if (key_id.empty() && secret.empty()) { + ffi::set_builder_option(builder, KernelUtils::ToDeltaString("skip_signature"), KernelUtils::ToDeltaString("true")); + } + + if (!key_id.empty()) { + ffi::set_builder_option(builder, KernelUtils::ToDeltaString("aws_access_key_id"), KernelUtils::ToDeltaString(key_id)); + } + if (!secret.empty()) { + ffi::set_builder_option(builder, KernelUtils::ToDeltaString("aws_secret_access_key"), KernelUtils::ToDeltaString(secret)); + } + if (!session_token.empty()) { + ffi::set_builder_option(builder, KernelUtils::ToDeltaString("aws_session_token"), KernelUtils::ToDeltaString(session_token)); + } + ffi::set_builder_option(builder, KernelUtils::ToDeltaString("aws_region"), KernelUtils::ToDeltaString(region)); + + } else if (secret_type == "azure") { + + auto connection_string = kv_secret.TryGetValue("connection_string").ToString(); + auto account_name = kv_secret.TryGetValue("account_name").ToString(); + auto account_key = kv_secret.TryGetValue("account_key").ToString(); + auto client_id = kv_secret.TryGetValue("client_id").ToString(); + auto client_secret = kv_secret.TryGetValue("client_secret").ToString(); + auto tenant_id = kv_secret.TryGetValue("tenant_id").ToString(); + auto azure_client_certificate_path = kv_secret.TryGetValue("certificate_path").ToString(); + auto sas_token = kv_secret.TryGetValue("sas_token").ToString(); + auto http_proxy = kv_secret.TryGetValue("http_proxy").ToString(); + auto proxy_user_name = kv_secret.TryGetValue("proxy_user_name").ToString(); + auto proxy_password = kv_secret.TryGetValue("proxy_password").ToString(); + + if (!connection_string.empty()) { + ffi::set_builder_option(builder, KernelUtils::ToDeltaString("azure_storage_connection_string"), KernelUtils::ToDeltaString(connection_string)); + } + if (!account_name.empty()) { + ffi::set_builder_option(builder, KernelUtils::ToDeltaString("azure_storage_account_name"), KernelUtils::ToDeltaString(account_name)); + } + if (!account_key.empty()) { + ffi::set_builder_option(builder, KernelUtils::ToDeltaString("azure_storage_account_key"), KernelUtils::ToDeltaString(account_key)); + } + if (!client_id.empty()) { + ffi::set_builder_option(builder, KernelUtils::ToDeltaString("azure_client_id"), KernelUtils::ToDeltaString(client_id)); + } + if (!client_secret.empty()) { + ffi::set_builder_option(builder, KernelUtils::ToDeltaString("azure_client_secret"), KernelUtils::ToDeltaString(client_secret)); + } + if (!tenant_id.empty()) { + ffi::set_builder_option(builder, KernelUtils::ToDeltaString("azure_tenant_id"), KernelUtils::ToDeltaString(tenant_id)); + } + if (!azure_client_certificate_path.empty()) { + ffi::set_builder_option(builder, KernelUtils::ToDeltaString("azure_client_certificate_path"), KernelUtils::ToDeltaString(azure_client_certificate_path)); + } + if (!sas_token.empty()) { + ffi::set_builder_option(builder, KernelUtils::ToDeltaString("azure_sas_token"), KernelUtils::ToDeltaString(sas_token)); + } + if (!http_proxy.empty()) { + ffi::set_builder_option(builder, KernelUtils::ToDeltaString("http_proxy"), KernelUtils::ToDeltaString(http_proxy)); + } + if (!proxy_user_name.empty()) { + ffi::set_builder_option(builder, KernelUtils::ToDeltaString("proxy_user_name"), KernelUtils::ToDeltaString(proxy_user_name)); + } + if (!proxy_password.empty()) { + ffi::set_builder_option(builder, KernelUtils::ToDeltaString("proxy_password"), KernelUtils::ToDeltaString(proxy_password)); + } + } return builder; } From a2ddb6c7d65cbae8e7466a4d35c315b2c34b8799 Mon Sep 17 00:00:00 2001 From: Norman Foerster Date: Thu, 13 Jun 2024 22:02:59 +0200 Subject: [PATCH 05/38] azure test impl --- src/functions/delta_scan.cpp | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/src/functions/delta_scan.cpp b/src/functions/delta_scan.cpp index 41c38cf..1b7d894 100644 --- a/src/functions/delta_scan.cpp +++ b/src/functions/delta_scan.cpp @@ -69,7 +69,7 @@ static ffi::EngineBuilder* CreateBuilder(ClientContext &context, const string &p ffi::EngineBuilder* builder; // For "regular" paths we early out with the default builder config - if (!StringUtil::StartsWith(path, "s3://") && !StringUtil::StartsWith(path, "azure://")) { + if (!StringUtil::StartsWith(path, "s3://") && !StringUtil::StartsWith(path, "azure://") && !StringUtil::StartsWith(path, "az://") && !StringUtil::StartsWith(path, "abfss://")) { auto interface_builder_res = ffi::get_engine_builder(KernelUtils::ToDeltaString(path), DuckDBEngineError::AllocateError); return KernelUtils::UnpackResult(interface_builder_res, "get_engine_interface_builder for path " + path); } @@ -90,6 +90,24 @@ static ffi::EngineBuilder* CreateBuilder(ClientContext &context, const string &p } else if (StringUtil::StartsWith(path, "azure://")) { auto end_of_container = path.find('/',8); + if(end_of_container == string::npos) { + throw IOException("Invalid azure url passed to delta scan: %s", path); + } + bucket = path.substr(8, end_of_container-8); + path_in_bucket = path.substr(end_of_container); + secret_type = "azure"; + } else if (StringUtil::StartsWith(path, "az://")) { + auto end_of_container = path.find('/',5); + + if(end_of_container == string::npos) { + throw IOException("Invalid azure url passed to delta scan: %s", path); + } + bucket = path.substr(5, end_of_container-5); + path_in_bucket = path.substr(end_of_container); + secret_type = "azure"; + } else if (StringUtil::StartsWith(path, "abfss://")) { + auto end_of_container = path.find('/',8); + if(end_of_container == string::npos) { throw IOException("Invalid azure url passed to delta scan: %s", path); } From efd4db01aeb81dec4900d9957a67360eb83fbd18 Mon Sep 17 00:00:00 2001 From: Norman Foerster Date: Fri, 14 Jun 2024 11:11:15 +0200 Subject: [PATCH 06/38] update azure values for azure extension --- src/functions/delta_scan.cpp | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/src/functions/delta_scan.cpp b/src/functions/delta_scan.cpp index 1b7d894..05b958e 100644 --- a/src/functions/delta_scan.cpp +++ b/src/functions/delta_scan.cpp @@ -159,12 +159,12 @@ static ffi::EngineBuilder* CreateBuilder(ClientContext &context, const string &p auto connection_string = kv_secret.TryGetValue("connection_string").ToString(); auto account_name = kv_secret.TryGetValue("account_name").ToString(); - auto account_key = kv_secret.TryGetValue("account_key").ToString(); + auto endpoint = kv_secret.TryGetValue("endpoint").ToString(); + auto credential_chain = kv_secret.TryGetValue("credential_chain").ToString(); auto client_id = kv_secret.TryGetValue("client_id").ToString(); auto client_secret = kv_secret.TryGetValue("client_secret").ToString(); auto tenant_id = kv_secret.TryGetValue("tenant_id").ToString(); - auto azure_client_certificate_path = kv_secret.TryGetValue("certificate_path").ToString(); - auto sas_token = kv_secret.TryGetValue("sas_token").ToString(); + auto certificate_path = kv_secret.TryGetValue("certificate_path").ToString(); auto http_proxy = kv_secret.TryGetValue("http_proxy").ToString(); auto proxy_user_name = kv_secret.TryGetValue("proxy_user_name").ToString(); auto proxy_password = kv_secret.TryGetValue("proxy_password").ToString(); @@ -175,8 +175,11 @@ static ffi::EngineBuilder* CreateBuilder(ClientContext &context, const string &p if (!account_name.empty()) { ffi::set_builder_option(builder, KernelUtils::ToDeltaString("azure_storage_account_name"), KernelUtils::ToDeltaString(account_name)); } - if (!account_key.empty()) { - ffi::set_builder_option(builder, KernelUtils::ToDeltaString("azure_storage_account_key"), KernelUtils::ToDeltaString(account_key)); + if (!endpoint.empty()) { + ffi::set_builder_option(builder, KernelUtils::ToDeltaString("azure_endpoint"), KernelUtils::ToDeltaString(endpoint)); + } + if (!credential_chain.empty()) { + ffi::set_builder_option(builder, KernelUtils::ToDeltaString("azure_credential_chain"), KernelUtils::ToDeltaString(credential_chain)); } if (!client_id.empty()) { ffi::set_builder_option(builder, KernelUtils::ToDeltaString("azure_client_id"), KernelUtils::ToDeltaString(client_id)); @@ -187,11 +190,8 @@ static ffi::EngineBuilder* CreateBuilder(ClientContext &context, const string &p if (!tenant_id.empty()) { ffi::set_builder_option(builder, KernelUtils::ToDeltaString("azure_tenant_id"), KernelUtils::ToDeltaString(tenant_id)); } - if (!azure_client_certificate_path.empty()) { - ffi::set_builder_option(builder, KernelUtils::ToDeltaString("azure_client_certificate_path"), KernelUtils::ToDeltaString(azure_client_certificate_path)); - } - if (!sas_token.empty()) { - ffi::set_builder_option(builder, KernelUtils::ToDeltaString("azure_sas_token"), KernelUtils::ToDeltaString(sas_token)); + if (!certificate_path.empty()) { + ffi::set_builder_option(builder, KernelUtils::ToDeltaString("azure_client_certificate_path"), KernelUtils::ToDeltaString(certificate_path)); } if (!http_proxy.empty()) { ffi::set_builder_option(builder, KernelUtils::ToDeltaString("http_proxy"), KernelUtils::ToDeltaString(http_proxy)); From 1563715a79c5b17db469ca233f28a30cd080ef4f Mon Sep 17 00:00:00 2001 From: Sam Ansmink Date: Sat, 15 Jun 2024 10:54:26 +0200 Subject: [PATCH 07/38] bump delta to 181232a45562, enable cardinalty estimation, fix varchar pushdown --- CMakeLists.txt | 4 +-- scripts/generate_test_data.py | 3 +- src/delta_utils.cpp | 57 +++++++++++++++++++---------------- src/functions/delta_scan.cpp | 3 -- 4 files changed, 35 insertions(+), 32 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 28ea1d2..58e3d39 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -59,8 +59,8 @@ endif() # Add rust_example as a CMake target ExternalProject_Add( ${KERNEL_NAME} - GIT_REPOSITORY "https://github.com/delta-incubator/delta-kernel-rs" - GIT_TAG c901665b98b2fed5ff1c713a9666eba9d16ea281 + GIT_REPOSITORY "https://github.com/nicklan/delta-kernel-rs" + GIT_TAG 181232a45562ca78be763c2f5fb46b88a2463b5c CONFIGURE_COMMAND "" UPDATE_COMMAND "" BUILD_IN_SOURCE 1 diff --git a/scripts/generate_test_data.py b/scripts/generate_test_data.py index cb1d2f7..e7bf588 100644 --- a/scripts/generate_test_data.py +++ b/scripts/generate_test_data.py @@ -78,7 +78,8 @@ def generate_test_data_pyspark(name, current_path, input_path, delete_predicate ## CREATE ## CONFIGURE USAGE OF DELETION VECTORS - spark.sql(f"ALTER TABLE test_table_{name} SET TBLPROPERTIES ('delta.enableDeletionVectors' = true);") + if (delete_predicate): + spark.sql(f"ALTER TABLE test_table_{name} SET TBLPROPERTIES ('delta.enableDeletionVectors' = true);") ## ADDING DELETES deltaTable = DeltaTable.forPath(spark, delta_table_path) diff --git a/src/delta_utils.cpp b/src/delta_utils.cpp index a805d15..104d65a 100644 --- a/src/delta_utils.cpp +++ b/src/delta_utils.cpp @@ -200,6 +200,10 @@ static bool CanHandleFilter(TableFilter *filter) { switch (filter->filter_type) { case TableFilterType::CONSTANT_COMPARISON: return true; + case TableFilterType::IS_NULL: + return true; + case TableFilterType::IS_NOT_NULL: + return true; case TableFilterType::CONJUNCTION_AND: { auto &conjunction = static_cast(*filter); bool can_handle = true; @@ -258,28 +262,28 @@ uintptr_t PredicateVisitor::VisitConstantFilter(const string &col_name, const Co case LogicalType::BIGINT: right = visit_expression_literal_long(state, BigIntValue::Get(value)); break; - // case LogicalType::INTEGER: - // right = visit_expression_literal_int(state, IntegerValue::Get(value)); - // break; - // case LogicalType::SMALLINT: - // right = visit_expression_literal_short(state, SmallIntValue::Get(value)); - // break; - // case LogicalType::TINYINT: - // right = visit_expression_literal_byte(state, TinyIntValue::Get(value)); - // break; - // case LogicalType::FLOAT: - // right = visit_expression_literal_float(state, FloatValue::Get(value)); - // break; - // case LogicalType::DOUBLE: - // right = visit_expression_literal_double(state, DoubleValue::Get(value)); - // break; - // case LogicalType::BOOLEAN: - // right = visit_expression_literal_bool(state, BooleanValue::Get(value)); - // break; + case LogicalType::INTEGER: + right = visit_expression_literal_int(state, IntegerValue::Get(value)); + break; + case LogicalType::SMALLINT: + right = visit_expression_literal_short(state, SmallIntValue::Get(value)); + break; + case LogicalType::TINYINT: + right = visit_expression_literal_byte(state, TinyIntValue::Get(value)); + break; + case LogicalType::FLOAT: + right = visit_expression_literal_float(state, FloatValue::Get(value)); + break; + case LogicalType::DOUBLE: + right = visit_expression_literal_double(state, DoubleValue::Get(value)); + break; + case LogicalType::BOOLEAN: + right = visit_expression_literal_bool(state, BooleanValue::Get(value)); + break; case LogicalType::VARCHAR: { // WARNING: C++ lifetime extension rules don't protect calls of the form foo(std::string(...).c_str()) auto str = StringValue::Get(value); - auto maybe_right = ffi::visit_expression_literal_string(state, KernelUtils::ToDeltaString(col_name), DuckDBEngineError::AllocateError); + auto maybe_right = ffi::visit_expression_literal_string(state, KernelUtils::ToDeltaString(str), DuckDBEngineError::AllocateError); right = KernelUtils::UnpackResult(maybe_right, "VisitConstantFilter failed to visit_expression_literal_string"); break; } @@ -315,6 +319,7 @@ uintptr_t PredicateVisitor::VisitAndFilter(const string &col_name, const Conjunc return 0; } auto &child_filter = *it++; + return VisitFilter(col_name, *child_filter, state); }; auto eit = EngineIteratorFromCallable(get_next); @@ -322,9 +327,9 @@ uintptr_t PredicateVisitor::VisitAndFilter(const string &col_name, const Conjunc } uintptr_t PredicateVisitor::VisitIsNull(const string &col_name, ffi::KernelExpressionVisitorState *state) { - auto maybe_left = ffi::visit_expression_column(state, KernelUtils::ToDeltaString(col_name), DuckDBEngineError::AllocateError); - uintptr_t left = KernelUtils::UnpackResult(maybe_left, "VisitIsNull failed to visit_expression_column"); - return ffi::visit_expression_is_null(state, left); + auto maybe_inner = ffi::visit_expression_column(state, KernelUtils::ToDeltaString(col_name), DuckDBEngineError::AllocateError); + uintptr_t inner = KernelUtils::UnpackResult(maybe_inner, "VisitIsNull failed to visit_expression_column"); + return ffi::visit_expression_is_null(state, inner); } uintptr_t PredicateVisitor::VisitIsNotNull(const string &col_name, ffi::KernelExpressionVisitorState *state) { @@ -337,10 +342,10 @@ uintptr_t PredicateVisitor::VisitFilter(const string &col_name, const TableFilte return VisitConstantFilter(col_name, static_cast(filter), state); case TableFilterType::CONJUNCTION_AND: return VisitAndFilter(col_name, static_cast(filter), state); - // case TableFilterType::IS_NULL: - // return VisitIsNull(col_name, state); - // case TableFilterType::IS_NOT_NULL: - // return VisitIsNotNull(col_name, state); + case TableFilterType::IS_NULL: + return VisitIsNull(col_name, state); + case TableFilterType::IS_NOT_NULL: + return VisitIsNotNull(col_name, state); default: return ~0; } diff --git a/src/functions/delta_scan.cpp b/src/functions/delta_scan.cpp index d4320e5..ed968a2 100644 --- a/src/functions/delta_scan.cpp +++ b/src/functions/delta_scan.cpp @@ -31,8 +31,6 @@ static void visit_callback(ffi::NullableCvoid engine_context, struct ffi::Kernel StringUtil::RTrim(path_string, "/"); path_string += "/" + KernelUtils::FromDeltaString(path); - printf("Got File %s\n", path_string.c_str()); - // First we append the file to our resolved files context->resolved_files.push_back(DeltaSnapshot::ToDuckDBPath(path_string)); context->metadata.emplace_back(make_uniq()); @@ -589,7 +587,6 @@ TableFunctionSet DeltaFunctions::GetDeltaScanFunction(DatabaseInstance &instance function.deserialize = nullptr; function.statistics = nullptr; function.table_scan_progress = nullptr; - function.cardinality = nullptr; function.get_bind_info = nullptr; // Schema param is just confusing here From 7291aa51e970107095eefd25936e6dfc547d7610 Mon Sep 17 00:00:00 2001 From: Sam Ansmink Date: Sat, 15 Jun 2024 13:04:53 +0200 Subject: [PATCH 08/38] add tests for pushdown all types --- scripts/generate_test_data.py | 5 +++ .../generated/file_skipping_all_types.test | 44 +++++++++++++++++++ 2 files changed, 49 insertions(+) create mode 100644 test/sql/generated/file_skipping_all_types.test diff --git a/scripts/generate_test_data.py b/scripts/generate_test_data.py index e7bf588..eaf9d30 100644 --- a/scripts/generate_test_data.py +++ b/scripts/generate_test_data.py @@ -116,6 +116,11 @@ def generate_test_data_pyspark(name, current_path, input_path, delete_predicate query = "CREATE table test_table AS SELECT {'i':i, 'j':i+1} as value, i%2 as part from range(0,10) tbl(i);" generate_test_data_delta_rs("simple_partitioned_with_structs", query, "part"); +## Partitioned table with all types we can file skip on +for type in ["bool", "int", "tinyint", "smallint", "bigint", "float", "double", "varchar"]: + query = f"CREATE table test_table as select i::{type} as value, i::{type} as part from range(0,2) tbl(i)" + generate_test_data_delta_rs(f"test_file_skipping/{type}", query, "part"); + ## Simple table with deletion vector con = duckdb.connect() con.query(f"COPY (SELECT i as id, ('val' || i::VARCHAR) as value FROM range(0,1000000) tbl(i))TO '{TMP_PATH}/simple_sf1_with_dv.parquet'") diff --git a/test/sql/generated/file_skipping_all_types.test b/test/sql/generated/file_skipping_all_types.test new file mode 100644 index 0000000..e4348e8 --- /dev/null +++ b/test/sql/generated/file_skipping_all_types.test @@ -0,0 +1,44 @@ +# name: test/sql/generated/file_skipping_all_types.test +# description: Test filter pushdown succeeds on all file types we can push down +# group: [delta_generated] + +require parquet + +require delta + +require-env GENERATED_DATA_AVAILABLE + +# TODO: this doesn't appear to skip files yet +# TODO: add tests once https://github.com/duckdb/duckdb/pull/12488 is available + +query I +select value +from delta_scan('./data/generated/test_file_skipping/bool/delta_lake') +where part != false +order by value +---- +true + +foreach type bool int tinyint smallint bigint varchar + +query I +select value +from delta_scan('./data/generated/test_file_skipping/${type}/delta_lake') +where part != 0 +order by value +---- +1 + +endloop + +foreach type float double + +query I +select value +from delta_scan('./data/generated/test_file_skipping/${type}/delta_lake') +where part > 0.5 +order by value +---- +1.0 + +endloop From 638292f1888f0e5911a65aaa34ac0163bbde1f36 Mon Sep 17 00:00:00 2001 From: Sam Ansmink Date: Mon, 17 Jun 2024 14:49:35 +0200 Subject: [PATCH 09/38] add linux arm64 builds --- .../workflows/MainDistributionPipeline.yml | 4 +-- .github/workflows/_extension_distribution.yml | 8 ++++- CMakeLists.txt | 29 ++++++++++++++++--- 3 files changed, 34 insertions(+), 7 deletions(-) diff --git a/.github/workflows/MainDistributionPipeline.yml b/.github/workflows/MainDistributionPipeline.yml index d0d7916..40ff65e 100644 --- a/.github/workflows/MainDistributionPipeline.yml +++ b/.github/workflows/MainDistributionPipeline.yml @@ -18,7 +18,7 @@ jobs: with: duckdb_version: v1.0.0 extension_name: delta - exclude_archs: 'wasm_mvp;wasm_eh;wasm_threads;windows_amd64_rtools;windows_amd64;linux_arm64' + exclude_archs: 'wasm_mvp;wasm_eh;wasm_threads;windows_amd64_rtools;windows_amd64' duckdb-stable-deploy: name: Deploy extension binaries @@ -28,5 +28,5 @@ jobs: with: extension_name: delta duckdb_version: v1.0.0 - exclude_archs: 'wasm_mvp;wasm_eh;wasm_threads;windows_amd64_rtools;windows_amd64;linux_arm64' + exclude_archs: 'wasm_mvp;wasm_eh;wasm_threads;windows_amd64_rtools;windows_amd64' deploy_latest: ${{ startsWith(github.ref, 'refs/tags/v') || github.ref == 'refs/heads/main' }} \ No newline at end of file diff --git a/.github/workflows/_extension_distribution.yml b/.github/workflows/_extension_distribution.yml index b20bbb8..f38506e 100644 --- a/.github/workflows/_extension_distribution.yml +++ b/.github/workflows/_extension_distribution.yml @@ -140,9 +140,15 @@ jobs: ./duckdb/scripts/setup_manylinux2014.sh general aws-cli ccache ssh python_alias openssl - name: Setup Rust - if: ${{ matrix.duckdb_arch == 'linux_amd64' }} + if: ${{ matrix.duckdb_arch == 'linux_amd64'}} uses: dtolnay/rust-toolchain@stable + - name: Setup Rust for cross compilation + if: ${{ matrix.duckdb_arch == 'linux_arm64'}} + uses: dtolnay/rust-toolchain@stable + with: + targets: aarch64-unknown-linux-gnu + - name: Setup Rust for manylinux (dtolnay/rust-toolchain doesn't work due to curl being old here) if: ${{ matrix.duckdb_arch == 'linux_amd64_gcc4' }} run: | diff --git a/CMakeLists.txt b/CMakeLists.txt index f8d12ec..3000a81 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -35,11 +35,19 @@ set(KERNEL_NAME delta_kernel) # Set default ExternalProject root directory set_directory_properties(PROPERTIES EP_PREFIX ${CMAKE_BINARY_DIR}/rust) +set(RUST_ENV_VARS "") + # Propagate arch to rust build for CI set(RUST_PLATFORM_TARGET "") if("${OS_NAME}" STREQUAL "linux") if ("${OS_ARCH}" STREQUAL "arm64") set(RUST_PLATFORM_TARGET "aarch64-unknown-linux-gnu") + elseif("${CMAKE_CXX_COMPILER}" MATCHES "aarch64") + set(RUST_ENV_VARS ${RUST_ENV_VARS} CFLAGS_aarch64_unknown_linux_gnu=--sysroot=/usr/aarch64-linux-gnu) + set(RUST_ENV_VARS ${RUST_ENV_VARS} CARGO_TARGET_AARCH64_UNKNOWN_LINUX_GNU_LINKER=aarch64-linux-gnu-gcc) + set(RUST_ENV_VARS ${RUST_ENV_VARS} OPENSSL_LIB_DIR=${CMAKE_BINARY_DIR}/vcpkg_installed/${VCPKG_TARGET_TRIPLET}/lib) + set(RUST_ENV_VARS ${RUST_ENV_VARS} OPENSSL_INCLUDE_DIR=${CMAKE_BINARY_DIR}/vcpkg_installed/${VCPKG_TARGET_TRIPLET}/include) + set(RUST_PLATFORM_TARGET "aarch64-unknown-linux-gnu") else() set(RUST_PLATFORM_TARGET "x86_64-unknown-linux-gnu") endif() @@ -56,20 +64,33 @@ elseif("${OS_NAME}" STREQUAL "osx") endif() endif() +string(STRIP "${RUST_ENV_VARS}" RUST_ENV_VARS) + +# Having these set will mess up cross compilation to linux arm +set(RUST_UNSET_ENV_VARS --unset=CC --unset=CXX --unset=LD) + # Add rust_example as a CMake target ExternalProject_Add( ${KERNEL_NAME} GIT_REPOSITORY "https://github.com/delta-incubator/delta-kernel-rs" GIT_TAG 08f0764a00e89f42136fd478823d28278adc7ee8 - CONFIGURE_COMMAND "" + # Prints the env variables passed to the cargo build to the terminal, useful in debugging because passing them + # through CMake is an error-prone mess + CONFIGURE_COMMAND ${CMAKE_COMMAND} -E env ${RUST_UNSET_ENV_VARS} ${RUST_ENV_VARS} env UPDATE_COMMAND "" BUILD_IN_SOURCE 1 # Build debug build - BUILD_COMMAND cargo build --package delta_kernel_ffi --workspace --all-features --target=${RUST_PLATFORM_TARGET} + BUILD_COMMAND + ${CMAKE_COMMAND} -E env ${RUST_UNSET_ENV_VARS} ${RUST_ENV_VARS} + cargo build --package delta_kernel_ffi --workspace --all-features --target=${RUST_PLATFORM_TARGET} # Build release build - COMMAND cargo build --package delta_kernel_ffi --workspace --all-features --release --target=${RUST_PLATFORM_TARGET} + COMMAND + ${CMAKE_COMMAND} -E env ${RUST_UNSET_ENV_VARS} ${RUST_ENV_VARS} + cargo build --package delta_kernel_ffi --workspace --all-features --release --target=${RUST_PLATFORM_TARGET} # Build DATs - COMMAND cargo build --manifest-path=${CMAKE_BINARY_DIR}/rust/src/delta_kernel/acceptance/Cargo.toml + COMMAND + ${CMAKE_COMMAND} -E env ${RUST_UNSET_ENV_VARS} ${RUST_ENV_VARS} + cargo build --manifest-path=${CMAKE_BINARY_DIR}/rust/src/delta_kernel/acceptance/Cargo.toml BUILD_BYPRODUCTS "${CMAKE_BINARY_DIR}/rust/src/delta_kernel/target/${RUST_PLATFORM_TARGET}/debug/libdelta_kernel_ffi.a" BUILD_BYPRODUCTS "${CMAKE_BINARY_DIR}/rust/src/delta_kernel/target/${RUST_PLATFORM_TARGET}/release/libdelta_kernel_ffi.a" BUILD_BYPRODUCTS "${CMAKE_BINARY_DIR}/rust/src/delta_kernel/target/ffi-headers/delta_kernel_ffi.h" From aa60e1d8a7411b890f0ee914f59b29a0d15f8f6d Mon Sep 17 00:00:00 2001 From: Sam Ansmink Date: Mon, 17 Jun 2024 14:49:35 +0200 Subject: [PATCH 10/38] add linux arm64 builds --- .../workflows/MainDistributionPipeline.yml | 4 +-- .github/workflows/_extension_distribution.yml | 8 ++++- CMakeLists.txt | 29 ++++++++++++++++--- 3 files changed, 34 insertions(+), 7 deletions(-) diff --git a/.github/workflows/MainDistributionPipeline.yml b/.github/workflows/MainDistributionPipeline.yml index d0d7916..40ff65e 100644 --- a/.github/workflows/MainDistributionPipeline.yml +++ b/.github/workflows/MainDistributionPipeline.yml @@ -18,7 +18,7 @@ jobs: with: duckdb_version: v1.0.0 extension_name: delta - exclude_archs: 'wasm_mvp;wasm_eh;wasm_threads;windows_amd64_rtools;windows_amd64;linux_arm64' + exclude_archs: 'wasm_mvp;wasm_eh;wasm_threads;windows_amd64_rtools;windows_amd64' duckdb-stable-deploy: name: Deploy extension binaries @@ -28,5 +28,5 @@ jobs: with: extension_name: delta duckdb_version: v1.0.0 - exclude_archs: 'wasm_mvp;wasm_eh;wasm_threads;windows_amd64_rtools;windows_amd64;linux_arm64' + exclude_archs: 'wasm_mvp;wasm_eh;wasm_threads;windows_amd64_rtools;windows_amd64' deploy_latest: ${{ startsWith(github.ref, 'refs/tags/v') || github.ref == 'refs/heads/main' }} \ No newline at end of file diff --git a/.github/workflows/_extension_distribution.yml b/.github/workflows/_extension_distribution.yml index b20bbb8..f38506e 100644 --- a/.github/workflows/_extension_distribution.yml +++ b/.github/workflows/_extension_distribution.yml @@ -140,9 +140,15 @@ jobs: ./duckdb/scripts/setup_manylinux2014.sh general aws-cli ccache ssh python_alias openssl - name: Setup Rust - if: ${{ matrix.duckdb_arch == 'linux_amd64' }} + if: ${{ matrix.duckdb_arch == 'linux_amd64'}} uses: dtolnay/rust-toolchain@stable + - name: Setup Rust for cross compilation + if: ${{ matrix.duckdb_arch == 'linux_arm64'}} + uses: dtolnay/rust-toolchain@stable + with: + targets: aarch64-unknown-linux-gnu + - name: Setup Rust for manylinux (dtolnay/rust-toolchain doesn't work due to curl being old here) if: ${{ matrix.duckdb_arch == 'linux_amd64_gcc4' }} run: | diff --git a/CMakeLists.txt b/CMakeLists.txt index 58e3d39..5484048 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -35,11 +35,19 @@ set(KERNEL_NAME delta_kernel) # Set default ExternalProject root directory set_directory_properties(PROPERTIES EP_PREFIX ${CMAKE_BINARY_DIR}/rust) +set(RUST_ENV_VARS "") + # Propagate arch to rust build for CI set(RUST_PLATFORM_TARGET "") if("${OS_NAME}" STREQUAL "linux") if ("${OS_ARCH}" STREQUAL "arm64") set(RUST_PLATFORM_TARGET "aarch64-unknown-linux-gnu") + elseif("${CMAKE_CXX_COMPILER}" MATCHES "aarch64") + set(RUST_ENV_VARS ${RUST_ENV_VARS} CFLAGS_aarch64_unknown_linux_gnu=--sysroot=/usr/aarch64-linux-gnu) + set(RUST_ENV_VARS ${RUST_ENV_VARS} CARGO_TARGET_AARCH64_UNKNOWN_LINUX_GNU_LINKER=aarch64-linux-gnu-gcc) + set(RUST_ENV_VARS ${RUST_ENV_VARS} OPENSSL_LIB_DIR=${CMAKE_BINARY_DIR}/vcpkg_installed/${VCPKG_TARGET_TRIPLET}/lib) + set(RUST_ENV_VARS ${RUST_ENV_VARS} OPENSSL_INCLUDE_DIR=${CMAKE_BINARY_DIR}/vcpkg_installed/${VCPKG_TARGET_TRIPLET}/include) + set(RUST_PLATFORM_TARGET "aarch64-unknown-linux-gnu") else() set(RUST_PLATFORM_TARGET "x86_64-unknown-linux-gnu") endif() @@ -56,20 +64,33 @@ elseif("${OS_NAME}" STREQUAL "osx") endif() endif() +string(STRIP "${RUST_ENV_VARS}" RUST_ENV_VARS) + +# Having these set will mess up cross compilation to linux arm +set(RUST_UNSET_ENV_VARS --unset=CC --unset=CXX --unset=LD) + # Add rust_example as a CMake target ExternalProject_Add( ${KERNEL_NAME} GIT_REPOSITORY "https://github.com/nicklan/delta-kernel-rs" GIT_TAG 181232a45562ca78be763c2f5fb46b88a2463b5c - CONFIGURE_COMMAND "" + # Prints the env variables passed to the cargo build to the terminal, useful in debugging because passing them + # through CMake is an error-prone mess + CONFIGURE_COMMAND ${CMAKE_COMMAND} -E env ${RUST_UNSET_ENV_VARS} ${RUST_ENV_VARS} env UPDATE_COMMAND "" BUILD_IN_SOURCE 1 # Build debug build - BUILD_COMMAND cargo build --package delta_kernel_ffi --workspace --all-features --target=${RUST_PLATFORM_TARGET} + BUILD_COMMAND + ${CMAKE_COMMAND} -E env ${RUST_UNSET_ENV_VARS} ${RUST_ENV_VARS} + cargo build --package delta_kernel_ffi --workspace --all-features --target=${RUST_PLATFORM_TARGET} # Build release build - COMMAND cargo build --package delta_kernel_ffi --workspace --all-features --release --target=${RUST_PLATFORM_TARGET} + COMMAND + ${CMAKE_COMMAND} -E env ${RUST_UNSET_ENV_VARS} ${RUST_ENV_VARS} + cargo build --package delta_kernel_ffi --workspace --all-features --release --target=${RUST_PLATFORM_TARGET} # Build DATs - COMMAND cargo build --manifest-path=${CMAKE_BINARY_DIR}/rust/src/delta_kernel/acceptance/Cargo.toml + COMMAND + ${CMAKE_COMMAND} -E env ${RUST_UNSET_ENV_VARS} ${RUST_ENV_VARS} + cargo build --manifest-path=${CMAKE_BINARY_DIR}/rust/src/delta_kernel/acceptance/Cargo.toml BUILD_BYPRODUCTS "${CMAKE_BINARY_DIR}/rust/src/delta_kernel/target/${RUST_PLATFORM_TARGET}/debug/libdelta_kernel_ffi.a" BUILD_BYPRODUCTS "${CMAKE_BINARY_DIR}/rust/src/delta_kernel/target/${RUST_PLATFORM_TARGET}/release/libdelta_kernel_ffi.a" BUILD_BYPRODUCTS "${CMAKE_BINARY_DIR}/rust/src/delta_kernel/target/ffi-headers/delta_kernel_ffi.h" From deff1b967ae722522f03efe183c239928abfd274 Mon Sep 17 00:00:00 2001 From: Sam Ansmink Date: Wed, 19 Jun 2024 22:49:29 +0200 Subject: [PATCH 11/38] added windows archs, making things ugly in the process --- .../workflows/MainDistributionPipeline.yml | 4 +- .github/workflows/_extension_distribution.yml | 18 +- CMakeLists.txt | 51 +- src/delta_utils.cpp | 10 +- src/include/delta_kernel_ffi.hpp | 537 ++++++++++++++++++ 5 files changed, 591 insertions(+), 29 deletions(-) create mode 100644 src/include/delta_kernel_ffi.hpp diff --git a/.github/workflows/MainDistributionPipeline.yml b/.github/workflows/MainDistributionPipeline.yml index 40ff65e..abdc095 100644 --- a/.github/workflows/MainDistributionPipeline.yml +++ b/.github/workflows/MainDistributionPipeline.yml @@ -18,7 +18,7 @@ jobs: with: duckdb_version: v1.0.0 extension_name: delta - exclude_archs: 'wasm_mvp;wasm_eh;wasm_threads;windows_amd64_rtools;windows_amd64' + exclude_archs: 'wasm_mvp;wasm_eh;wasm_threads;windows_amd64_rtools' duckdb-stable-deploy: name: Deploy extension binaries @@ -28,5 +28,5 @@ jobs: with: extension_name: delta duckdb_version: v1.0.0 - exclude_archs: 'wasm_mvp;wasm_eh;wasm_threads;windows_amd64_rtools;windows_amd64' + exclude_archs: 'wasm_mvp;wasm_eh;wasm_threads;windows_amd64_rtools' deploy_latest: ${{ startsWith(github.ref, 'refs/tags/v') || github.ref == 'refs/heads/main' }} \ No newline at end of file diff --git a/.github/workflows/_extension_distribution.yml b/.github/workflows/_extension_distribution.yml index f38506e..a536982 100644 --- a/.github/workflows/_extension_distribution.yml +++ b/.github/workflows/_extension_distribution.yml @@ -317,6 +317,9 @@ jobs: with: python-version: '3.11' + - name: Setup Rust + uses: dtolnay/rust-toolchain@stable + - uses: r-lib/actions/setup-r@v2 if: matrix.duckdb_arch == 'windows_amd64_rtools' with: @@ -340,16 +343,6 @@ jobs: with: vcpkgGitCommitId: ${{ inputs.vcpkg_commit }} - - name: Fix for MSVC issue - shell: bash - env: - OVERLAY_TRIPLET_SRC: ${{ github.workspace }}/vcpkg/triplets/community/x64-windows-static-md.cmake - OVERLAY_TRIPLET_DST: ${{ github.workspace }}/overlay_triplets/x64-windows-static-md.cmake - run: | - mkdir overlay_triplets - cp $OVERLAY_TRIPLET_SRC $OVERLAY_TRIPLET_DST - echo "set(VCPKG_PLATFORM_TOOLSET_VERSION "14.38")" >> $OVERLAY_TRIPLET_DST - - name: Build & test extension env: VCPKG_OVERLAY_TRIPLETS: "${{ github.workspace }}/overlay_triplets" @@ -357,6 +350,11 @@ jobs: run: | make test_release + - name: Error log + if: always() + run: | + cat build/release/rust/src/delta_kernel-stamp/delta_kernel-build-*.log + - uses: actions/upload-artifact@v2 with: name: ${{ inputs.extension_name }}-${{ inputs.duckdb_version }}-extension-${{matrix.duckdb_arch}}${{inputs.artifact_postfix}} diff --git a/CMakeLists.txt b/CMakeLists.txt index 3000a81..41ace4b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -24,7 +24,7 @@ if(APPLE) elseif(UNIX) set(PLATFORM_LIBS m c resolv) elseif(WIN32) - set(PLATFORM_LIBS ws2_32 userenv advapi32) + set(PLATFORM_LIBS ntdll ncrypt secur32 ws2_32 userenv bcrypt msvcrt advapi32) else() message(STATUS "UNKNOWN OS") endif() @@ -52,27 +52,53 @@ if("${OS_NAME}" STREQUAL "linux") set(RUST_PLATFORM_TARGET "x86_64-unknown-linux-gnu") endif() elseif("${OS_NAME}" STREQUAL "osx") - # TODO: clean up upstream; we are not correctly setting OS_ARCH for cross compile if ("${OSX_BUILD_ARCH}" STREQUAL "arm64") set(RUST_PLATFORM_TARGET "aarch64-apple-darwin") elseif ("${OSX_BUILD_ARCH}" STREQUAL "x86_64") set(RUST_PLATFORM_TARGET "x86_64-apple-darwin") elseif ("${OS_ARCH}" STREQUAL "arm64") set(RUST_PLATFORM_TARGET "aarch64-apple-darwin") - else() - set(RUST_PLATFORM_TARGET "x86_64-apple-darwin") + endif() +elseif(WIN32) + if (MINGW AND "${OS_ARCH}" STREQUAL "arm64") + set(RUST_PLATFORM_TARGET "aarch64-pc-windows-gnu") + elseif (MINGW AND "${OS_ARCH}" STREQUAL "amd64") + set(RUST_PLATFORM_TARGET "x86_64-pc-windows-gnu") + elseif (MSVC AND "${OS_ARCH}" STREQUAL "arm64") + set(RUST_PLATFORM_TARGET "aarch64-pc-windows-msvc") + elseif (MSVC AND "${OS_ARCH}" STREQUAL "amd64") + set(RUST_PLATFORM_TARGET "x86_64-pc-windows-msvc") endif() endif() +# We currently only support the predefined targets. +if ("${RUST_PLATFORM_TARGET}" STREQUAL "") + message(FATAL_ERROR "Failed to detect the correct platform") +endif() + +set(RUST_PLATFORM_PARAM "--target=${RUST_PLATFORM_TARGET}") +message(STATUS "Building for rust target: ${RUST_PLATFORM_TARGET}") + +# Remove whitespaces before and after to prevent messed up env variables string(STRIP "${RUST_ENV_VARS}" RUST_ENV_VARS) # Having these set will mess up cross compilation to linux arm set(RUST_UNSET_ENV_VARS --unset=CC --unset=CXX --unset=LD) +# Define all the relevant delta-kernel-rs paths/names +set(DELTA_KERNEL_LIBNAME "${CMAKE_STATIC_LIBRARY_PREFIX}delta_kernel_ffi${CMAKE_STATIC_LIBRARY_SUFFIX}") +set(DELTA_KERNEL_LIBPATH_DEBUG "${CMAKE_BINARY_DIR}/rust/src/delta_kernel/target/${RUST_PLATFORM_TARGET}/debug/${DELTA_KERNEL_LIBNAME}") +set(DELTA_KERNEL_LIBPATH_RELEASE "${CMAKE_BINARY_DIR}/rust/src/delta_kernel/target/${RUST_PLATFORM_TARGET}/release/${DELTA_KERNEL_LIBNAME}") +set(DELTA_KERNEL_FFI_HEADER_PATH "${CMAKE_BINARY_DIR}/rust/src/delta_kernel/target/ffi-headers") +set(DELTA_KERNEL_FFI_HEADER_C "${CMAKE_BINARY_DIR}/rust/src/delta_kernel/target/ffi-headers/delta_kernel_ffi.h") +set(DELTA_KERNEL_FFI_HEADER_CXX "${CMAKE_BINARY_DIR}/rust/src/delta_kernel/target/ffi-headers/delta_kernel_ffi.hpp") + # Add rust_example as a CMake target ExternalProject_Add( ${KERNEL_NAME} GIT_REPOSITORY "https://github.com/delta-incubator/delta-kernel-rs" + # WARNING: the FFI headers are currently pinned due to the C linkage issue of the c++ headers. Currently, when bumping + # the kernel version, the produced header in ./src/include/delta_kernel_ffi.hpp should be also bumped, applying the fix GIT_TAG 08f0764a00e89f42136fd478823d28278adc7ee8 # Prints the env variables passed to the cargo build to the terminal, useful in debugging because passing them # through CMake is an error-prone mess @@ -82,27 +108,28 @@ ExternalProject_Add( # Build debug build BUILD_COMMAND ${CMAKE_COMMAND} -E env ${RUST_UNSET_ENV_VARS} ${RUST_ENV_VARS} - cargo build --package delta_kernel_ffi --workspace --all-features --target=${RUST_PLATFORM_TARGET} + cargo build --package delta_kernel_ffi --workspace --all-features ${RUST_PLATFORM_PARAM} # Build release build COMMAND ${CMAKE_COMMAND} -E env ${RUST_UNSET_ENV_VARS} ${RUST_ENV_VARS} - cargo build --package delta_kernel_ffi --workspace --all-features --release --target=${RUST_PLATFORM_TARGET} + cargo build --package delta_kernel_ffi --workspace --all-features --release ${RUST_PLATFORM_PARAM} # Build DATs COMMAND ${CMAKE_COMMAND} -E env ${RUST_UNSET_ENV_VARS} ${RUST_ENV_VARS} cargo build --manifest-path=${CMAKE_BINARY_DIR}/rust/src/delta_kernel/acceptance/Cargo.toml - BUILD_BYPRODUCTS "${CMAKE_BINARY_DIR}/rust/src/delta_kernel/target/${RUST_PLATFORM_TARGET}/debug/libdelta_kernel_ffi.a" - BUILD_BYPRODUCTS "${CMAKE_BINARY_DIR}/rust/src/delta_kernel/target/${RUST_PLATFORM_TARGET}/release/libdelta_kernel_ffi.a" - BUILD_BYPRODUCTS "${CMAKE_BINARY_DIR}/rust/src/delta_kernel/target/ffi-headers/delta_kernel_ffi.h" - BUILD_BYPRODUCTS "${CMAKE_BINARY_DIR}/rust/src/delta_kernel/target/ffi-headers/delta_kernel_ffi.hpp" + # Define the byproducts, required for building with Ninja + BUILD_BYPRODUCTS "${DELTA_KERNEL_LIBPATH_DEBUG}" + BUILD_BYPRODUCTS "${DELTA_KERNEL_LIBPATH_RELEASE}" + BUILD_BYPRODUCTS "${DELTA_KERNEL_FFI_HEADER_C}" + BUILD_BYPRODUCTS "${DELTA_KERNEL_FFI_HEADER_CXX}" INSTALL_COMMAND "" LOG_BUILD ON) build_static_extension(${TARGET_NAME} ${EXTENSION_SOURCES}) build_loadable_extension(${TARGET_NAME} " " ${EXTENSION_SOURCES}) -include_directories(${CMAKE_BINARY_DIR}/rust/src/delta_kernel/target/ffi-headers) -include_directories(${CMAKE_BINARY_DIR}/rust/src/delta_kernel/target/ffi-headers) +# TODO: when C linkage issue is resolved, we should switch back to using the generated headers +#include_directories(${DELTA_KERNEL_FFI_HEADER_PATH}) # Hides annoying linker warnings set(CMAKE_OSX_DEPLOYMENT_TARGET 13.3 CACHE STRING "Minimum OS X deployment version" FORCE) diff --git a/src/delta_utils.cpp b/src/delta_utils.cpp index b02e898..57d34d0 100644 --- a/src/delta_utils.cpp +++ b/src/delta_utils.cpp @@ -169,10 +169,10 @@ vector KernelUtils::FromDeltaBoolSlice(const struct ffi::KernelBoolSlice s return result; } -PredicateVisitor::PredicateVisitor(const vector &column_names, optional_ptr filters) : EnginePredicate { - .predicate = this, - .visitor = (uintptr_t (*)(void*, ffi::KernelExpressionVisitorState*)) &VisitPredicate} -{ +PredicateVisitor::PredicateVisitor(const vector &column_names, optional_ptr filters) { + predicate = this; + visitor = (uintptr_t (*)(void*, ffi::KernelExpressionVisitorState*)) &VisitPredicate; + if (filters) { for (auto& filter : filters->filters) { column_filters[column_names[filter.first]] = filter.second.get(); @@ -190,7 +190,7 @@ static auto GetNextFromCallable(Callable* callable) -> decltype(std::declval ffi::EngineIterator EngineIteratorFromCallable(Callable& callable) { auto* get_next = &GetNextFromCallable; - return {.data = &callable, .get_next = (const void *(*)(void*)) get_next}; + return {&callable, (const void *(*)(void*)) get_next}; }; // Helper function to prevent pushing down filters kernel cant handle diff --git a/src/include/delta_kernel_ffi.hpp b/src/include/delta_kernel_ffi.hpp new file mode 100644 index 0000000..813d31e --- /dev/null +++ b/src/include/delta_kernel_ffi.hpp @@ -0,0 +1,537 @@ +#pragma once + +#include +#include +#include +#include +#include + +namespace ffi { + +enum class KernelError { + UnknownError, + FFIError, +#if (defined(DEFINE_DEFAULT_ENGINE) || defined(DEFINE_SYNC_ENGINE)) + ArrowError, +#endif + EngineDataTypeError, + ExtractError, + GenericError, + IOErrorError, +#if (defined(DEFINE_DEFAULT_ENGINE) || defined(DEFINE_SYNC_ENGINE)) + ParquetError, +#endif +#if defined(DEFINE_DEFAULT_ENGINE) + ObjectStoreError, +#endif +#if defined(DEFINE_DEFAULT_ENGINE) + ObjectStorePathError, +#endif +#if defined(DEFINE_DEFAULT_ENGINE) + Reqwest, +#endif + FileNotFoundError, + MissingColumnError, + UnexpectedColumnTypeError, + MissingDataError, + MissingVersionError, + DeletionVectorError, + InvalidUrlError, + MalformedJsonError, + MissingMetadataError, + MissingProtocolError, + MissingMetadataAndProtocolError, + ParseError, + JoinFailureError, + Utf8Error, + ParseIntError, + InvalidColumnMappingMode, + InvalidTableLocation, + InvalidDecimalError, +}; + +#if defined(DEFINE_DEFAULT_ENGINE) +/// Struct to allow binding to the arrow [C Data +/// Interface](https://arrow.apache.org/docs/format/CDataInterface.html). This includes the data and +/// the schema. +struct ArrowFFIData; +#endif + +struct CStringMap; + +/// this struct can be used by an engine to materialize a selection vector +struct DvInfo; + +#if (defined(DEFINE_DEFAULT_ENGINE) || defined(DEFINE_SYNC_ENGINE)) +/// A builder that allows setting options on the `Engine` before actually building it +struct EngineBuilder; +#endif + +/// an opaque struct that encapsulates data read by an engine. this handle can be passed back into +/// some kernel calls to operate on the data, or can be converted into the raw data as read by the +/// [`delta_kernel::Engine`] by calling [`get_raw_engine_data`] +struct EngineData; + +struct KernelExpressionVisitorState; + +struct SharedExternEngine; + +struct SharedGlobalScanState; + +struct SharedScan; + +struct SharedScanDataIterator; + +struct SharedSnapshot; + +/// Represents an owned slice of boolean values allocated by the kernel. Any time the engine +/// receives a `KernelBoolSlice` as a return value from a kernel method, engine is responsible +/// to free that slice, by calling [super::drop_bool_slice] exactly once. +struct KernelBoolSlice { + bool *ptr; + uintptr_t len; +}; + +/// An error that can be returned to the engine. Engines that wish to associate additional +/// information can define and use any type that is [pointer +/// interconvertible](https://en.cppreference.com/w/cpp/language/static_cast#pointer-interconvertible) +/// with this one -- e.g. by subclassing this struct or by embedding this struct as the first member +/// of a [standard layout](https://en.cppreference.com/w/cpp/language/data_members#Standard-layout) +/// class. +struct EngineError { + KernelError etype; +}; + +/// Semantics: Kernel will always immediately return the leaked engine error to the engine (if it +/// allocated one at all), and engine is responsible for freeing it. +template +struct ExternResult { + enum class Tag { + Ok, + Err, + }; + + struct Ok_Body { + T _0; + }; + + struct Err_Body { + EngineError *_0; + }; + + Tag tag; + union { + Ok_Body ok; + Err_Body err; + }; +}; + +/// A non-owned slice of a UTF8 string, intended for arg-passing between kernel and engine. The +/// slice is only valid until the function it was passed into returns, and should not be copied. +/// +/// # Safety +/// +/// Intentionally not Copy, Clone, Send, nor Sync. +/// +/// Whoever instantiates the struct must ensure it does not outlive the data it points to. The +/// compiler cannot help us here, because raw pointers don't have lifetimes. To reduce the risk of +/// accidental misuse, it is recommended to only instantiate this struct as a function arg, by +/// converting a string slice `Into` a `KernelStringSlice`. That way, the borrowed reference at call +/// site protects the `KernelStringSlice` until the function returns. Meanwhile, the callee should +/// assume that the slice is only valid until the function returns, and must not retain any +/// references to the slice or its data that could outlive the function call. +/// +/// ``` +/// # use delta_kernel_ffi::KernelStringSlice; +/// fn wants_slice(slice: KernelStringSlice) { } +/// let msg = String::from("hello"); +/// wants_slice(msg.into()); +/// ``` +struct KernelStringSlice { + const char *ptr; + uintptr_t len; +}; + +using AllocateErrorFn = EngineError*(*)(KernelError etype, KernelStringSlice msg); + +/// Represents an object that crosses the FFI boundary and which outlives the scope that created +/// it. It can be passed freely between rust code and external code. The +/// +/// An accompanying [`HandleDescriptor`] trait defines the behavior of each handle type: +/// +/// * The true underlying ("target") type the handle represents. For safety reasons, target type +/// must always be [`Send`]. +/// +/// * Mutable (`Box`-like) vs. shared (`Arc`-like). For safety reasons, the target type of a +/// shared handle must always be [`Send`]+[`Sync`]. +/// +/// * Sized vs. unsized. Sized types allow handle operations to be implemented more efficiently. +/// +/// # Validity +/// +/// A `Handle` is _valid_ if all of the following hold: +/// +/// * It was created by a call to [`Handle::from`] +/// * Not yet dropped by a call to [`Handle::drop_handle`] +/// * Not yet consumed by a call to [`Handle::into_inner`] +/// +/// Additionally, in keeping with the [`Send`] contract, multi-threaded external code must +/// enforce mutual exclusion -- no mutable handle should ever be passed to more than one kernel +/// API call at a time. If thread races are possible, the handle should be protected with a +/// mutex. Due to Rust [reference +/// rules](https://doc.rust-lang.org/book/ch04-02-references-and-borrowing.html#the-rules-of-references), +/// this requirement applies even for API calls that appear to be read-only (because Rust code +/// always receives the handle as mutable). +/// +/// NOTE: Because the underlying type is always [`Sync`], multi-threaded external code can +/// freely access shared (non-mutable) handles. +/// +template +using Handle = H*; + +/// The `EngineSchemaVisitor` defines a visitor system to allow engines to build their own +/// representation of a schema from a particular schema within kernel. +/// +/// The model is list based. When the kernel needs a list, it will ask engine to allocate one of a +/// particular size. Once allocated the engine returns an `id`, which can be any integer identifier +/// ([`usize`]) the engine wants, and will be passed back to the engine to identify the list in the +/// future. +/// +/// Every schema element the kernel visits belongs to some list of "sibling" elements. The schema +/// itself is a list of schema elements, and every complex type (struct, map, array) contains a list +/// of "child" elements. +/// 1. Before visiting schema or any complex type, the kernel asks the engine to allocate a list to +/// hold its children +/// 2. When visiting any schema element, the kernel passes its parent's "child list" as the +/// "sibling list" the element should be appended to: +/// - For the top-level schema, visit each top-level column, passing the column's name and type +/// - For a struct, first visit each struct field, passing the field's name, type, nullability, +/// and metadata +/// - For a map, visit the key and value, passing its special name ("map_key" or "map_value"), +/// type, and value nullability (keys are never nullable) +/// - For a list, visit the element, passing its special name ("array_element"), type, and +/// nullability +/// 3. When visiting a complex schema element, the kernel also passes the "child list" containing +/// that element's (already-visited) children. +/// 4. The [`visit_schema`] method returns the id of the list of top-level columns +struct EngineSchemaVisitor { + /// opaque state pointer + void *data; + /// Creates a new field list, optionally reserving capacity up front + uintptr_t (*make_field_list)(void *data, uintptr_t reserve); + /// Indicate that the schema contains a `Struct` type. The top level of a Schema is always a + /// `Struct`. The fields of the `Struct` are in the list identified by `child_list_id`. + void (*visit_struct)(void *data, + uintptr_t sibling_list_id, + KernelStringSlice name, + uintptr_t child_list_id); + /// Indicate that the schema contains an Array type. `child_list_id` will be a _one_ item list + /// with the array's element type + void (*visit_array)(void *data, + uintptr_t sibling_list_id, + KernelStringSlice name, + bool contains_null, + uintptr_t child_list_id); + /// Indicate that the schema contains an Map type. `child_list_id` will be a _two_ item list + /// where the first element is the map's key type and the second element is the + /// map's value type + void (*visit_map)(void *data, + uintptr_t sibling_list_id, + KernelStringSlice name, + bool value_contains_null, + uintptr_t child_list_id); + /// visit a `decimal` with the specified `precision` and `scale` + void (*visit_decimal)(void *data, + uintptr_t sibling_list_id, + KernelStringSlice name, + uint8_t precision, + uint8_t scale); + /// Visit a `string` belonging to the list identified by `sibling_list_id`. + void (*visit_string)(void *data, uintptr_t sibling_list_id, KernelStringSlice name); + /// Visit a `long` belonging to the list identified by `sibling_list_id`. + void (*visit_long)(void *data, uintptr_t sibling_list_id, KernelStringSlice name); + /// Visit an `integer` belonging to the list identified by `sibling_list_id`. + void (*visit_integer)(void *data, uintptr_t sibling_list_id, KernelStringSlice name); + /// Visit a `short` belonging to the list identified by `sibling_list_id`. + void (*visit_short)(void *data, uintptr_t sibling_list_id, KernelStringSlice name); + /// Visit a `byte` belonging to the list identified by `sibling_list_id`. + void (*visit_byte)(void *data, uintptr_t sibling_list_id, KernelStringSlice name); + /// Visit a `float` belonging to the list identified by `sibling_list_id`. + void (*visit_float)(void *data, uintptr_t sibling_list_id, KernelStringSlice name); + /// Visit a `double` belonging to the list identified by `sibling_list_id`. + void (*visit_double)(void *data, uintptr_t sibling_list_id, KernelStringSlice name); + /// Visit a `boolean` belonging to the list identified by `sibling_list_id`. + void (*visit_boolean)(void *data, uintptr_t sibling_list_id, KernelStringSlice name); + /// Visit `binary` belonging to the list identified by `sibling_list_id`. + void (*visit_binary)(void *data, uintptr_t sibling_list_id, KernelStringSlice name); + /// Visit a `date` belonging to the list identified by `sibling_list_id`. + void (*visit_date)(void *data, uintptr_t sibling_list_id, KernelStringSlice name); + /// Visit a `timestamp` belonging to the list identified by `sibling_list_id`. + void (*visit_timestamp)(void *data, uintptr_t sibling_list_id, KernelStringSlice name); + /// Visit a `timestamp` with no timezone belonging to the list identified by `sibling_list_id`. + void (*visit_timestamp_ntz)(void *data, uintptr_t sibling_list_id, KernelStringSlice name); +}; + +/// Model iterators. This allows an engine to specify iteration however it likes, and we simply wrap +/// the engine functions. The engine retains ownership of the iterator. +struct EngineIterator { + void *data; + /// A function that should advance the iterator and return the next time from the data + /// If the iterator is complete, it should return null. It should be safe to + /// call `get_next()` multiple times if it returns null. + const void *(*get_next)(void *data); +}; + +/// A predicate that can be used to skip data when scanning. +/// +/// When invoking [`scan::scan`], The engine provides a pointer to the (engine's native) predicate, +/// along with a visitor function that can be invoked to recursively visit the predicate. This +/// engine state must be valid until the call to `scan::scan` returns. Inside that method, the +/// kernel allocates visitor state, which becomes the second argument to the predicate visitor +/// invocation along with the engine-provided predicate pointer. The visitor state is valid for the +/// lifetime of the predicate visitor invocation. Thanks to this double indirection, engine and +/// kernel each retain ownership of their respective objects, with no need to coordinate memory +/// lifetimes with the other. +struct EnginePredicate { + void *predicate; + uintptr_t (*visitor)(void *predicate, KernelExpressionVisitorState *state); +}; + +using NullableCvoid = void*; + +/// Allow engines to allocate strings of their own type. the contract of calling a passed allocate +/// function is that `kernel_str` is _only_ valid until the return from this function +using AllocateStringFn = NullableCvoid(*)(KernelStringSlice kernel_str); + +using CScanCallback = void(*)(NullableCvoid engine_context, + KernelStringSlice path, + int64_t size, + const DvInfo *dv_info, + const CStringMap *partition_map); + +// This trickery is from https://github.com/mozilla/cbindgen/issues/402#issuecomment-578680163 +struct im_an_unused_struct_that_tricks_msvc_into_compilation { + ExternResult field; + ExternResult field2; + ExternResult field3; + ExternResult> field4; + ExternResult> field5; + ExternResult field6; + ExternResult field7; + ExternResult> field8; + ExternResult> field9; + ExternResult> field10; +}; + + +extern "C" { + +/// # Safety +/// +/// Caller is responsible for passing a valid handle. +void drop_bool_slice(KernelBoolSlice slice); + +#if defined(DEFINE_DEFAULT_ENGINE) +/// Get a "builder" that can be used to construct an engine. The function +/// [`set_builder_option`] can be used to set options on the builder prior to constructing the +/// actual engine +/// +/// # Safety +/// Caller is responsible for passing a valid path pointer. +ExternResult get_engine_builder(KernelStringSlice path, + AllocateErrorFn allocate_error); +#endif + +#if defined(DEFINE_DEFAULT_ENGINE) +/// Set an option on the builder +/// +/// # Safety +/// +/// Caller must pass a valid EngineBuilder pointer, and valid slices for key and value +void set_builder_option(EngineBuilder *builder, KernelStringSlice key, KernelStringSlice value); +#endif + +#if defined(DEFINE_DEFAULT_ENGINE) +/// Consume the builder and return an engine. After calling, the passed pointer is _no +/// longer valid_. +/// +/// # Safety +/// +/// Caller is responsible to pass a valid EngineBuilder pointer, and to not use it again afterwards +ExternResult> builder_build(EngineBuilder *builder); +#endif + +#if defined(DEFINE_DEFAULT_ENGINE) +/// # Safety +/// +/// Caller is responsible for passing a valid path pointer. +ExternResult> get_default_engine(KernelStringSlice path, + AllocateErrorFn allocate_error); +#endif + +/// # Safety +/// +/// Caller is responsible for passing a valid handle. +void drop_engine(Handle engine); + +/// Get the latest snapshot from the specified table +/// +/// # Safety +/// +/// Caller is responsible for passing valid handles and path pointer. +ExternResult> snapshot(KernelStringSlice path, + Handle engine); + +/// # Safety +/// +/// Caller is responsible for passing a valid handle. +void drop_snapshot(Handle snapshot); + +/// Get the version of the specified snapshot +/// +/// # Safety +/// +/// Caller is responsible for passing a valid handle. +uint64_t version(Handle snapshot); + +/// Visit the schema of the passed `SnapshotHandle`, using the provided `visitor`. See the +/// documentation of [`EngineSchemaVisitor`] for a description of how this visitor works. +/// +/// This method returns the id of the list allocated to hold the top level schema columns. +/// +/// # Safety +/// +/// Caller is responsible for passing a valid snapshot handle and schema visitor. +uintptr_t visit_schema(Handle snapshot, EngineSchemaVisitor *visitor); + +uintptr_t visit_expression_and(KernelExpressionVisitorState *state, EngineIterator *children); + +uintptr_t visit_expression_lt(KernelExpressionVisitorState *state, uintptr_t a, uintptr_t b); + +uintptr_t visit_expression_le(KernelExpressionVisitorState *state, uintptr_t a, uintptr_t b); + +uintptr_t visit_expression_gt(KernelExpressionVisitorState *state, uintptr_t a, uintptr_t b); + +uintptr_t visit_expression_ge(KernelExpressionVisitorState *state, uintptr_t a, uintptr_t b); + +uintptr_t visit_expression_eq(KernelExpressionVisitorState *state, uintptr_t a, uintptr_t b); + +/// # Safety +/// The string slice must be valid +ExternResult visit_expression_column(KernelExpressionVisitorState *state, + KernelStringSlice name, + AllocateErrorFn allocate_error); + +/// # Safety +/// The string slice must be valid +ExternResult visit_expression_literal_string(KernelExpressionVisitorState *state, + KernelStringSlice value, + AllocateErrorFn allocate_error); + +uintptr_t visit_expression_literal_long(KernelExpressionVisitorState *state, int64_t value); + +/// Allow an engine to "unwrap" an [`EngineData`] into the raw pointer for the case it wants +/// to use its own engine data format +/// +/// # Safety +/// +/// `data_handle` must be a valid pointer to a kernel allocated `EngineData`. The Engine must +/// ensure the handle outlives the returned pointer. +void *get_raw_engine_data(Handle data); + +#if defined(DEFINE_DEFAULT_ENGINE) +/// Get an [`ArrowFFIData`] to allow binding to the arrow [C Data +/// Interface](https://arrow.apache.org/docs/format/CDataInterface.html). This includes the data and +/// the schema. +/// +/// # Safety +/// data_handle must be a valid EngineData as read by the +/// [`delta_kernel::engine::default::DefaultEngine`] obtained from `get_default_engine`. +ExternResult get_raw_arrow_data(Handle data, + Handle engine); +#endif + +/// Drops a scan. +/// # Safety +/// Caller is responsible for passing a [valid][Handle#Validity] scan handle. +void drop_scan(Handle scan); + +/// Get a [`Scan`] over the table specified by the passed snapshot. +/// # Safety +/// +/// Caller is responsible for passing a valid snapshot pointer, and engine pointer +ExternResult> scan(Handle snapshot, + Handle engine, + EnginePredicate *predicate); + +/// Get the global state for a scan. See the docs for [`delta_kernel::scan::state::GlobalScanState`] +/// for more information. +/// +/// # Safety +/// Engine is responsible for providing a valid scan pointer +Handle get_global_scan_state(Handle scan); + +/// # Safety +/// +/// Caller is responsible for passing a valid global scan pointer. +void drop_global_scan_state(Handle state); + +/// Get an iterator over the data needed to perform a scan. This will return a +/// [`KernelScanDataIterator`] which can be passed to [`kernel_scan_data_next`] to get the actual +/// data in the iterator. +/// +/// # Safety +/// +/// Engine is responsible for passing a valid [`SharedExternEngine`] and [`SharedScan`] +ExternResult> kernel_scan_data_init(Handle engine, + Handle scan); + +/// # Safety +/// +/// The iterator must be valid (returned by [kernel_scan_data_init]) and not yet freed by +/// [kernel_scan_data_free]. The visitor function pointer must be non-null. +ExternResult kernel_scan_data_next(Handle data, + NullableCvoid engine_context, + void (*engine_visitor)(NullableCvoid engine_context, + Handle engine_data, + KernelBoolSlice selection_vector)); + +/// # Safety +/// +/// Caller is responsible for (at most once) passing a valid pointer returned by a call to +/// [`kernel_scan_data_init`]. +void kernel_scan_data_free(Handle data); + +/// allow probing into a CStringMap. If the specified key is in the map, kernel will call +/// allocate_fn with the value associated with the key and return the value returned from that +/// function. If the key is not in the map, this will return NULL +/// +/// # Safety +/// +/// The engine is responsible for providing a valid [`CStringMap`] pointer and [`KernelStringSlice`] +NullableCvoid get_from_map(const CStringMap *map, + KernelStringSlice key, + AllocateStringFn allocate_fn); + +/// Get a selection vector out of a [`DvInfo`] struct +/// +/// # Safety +/// Engine is responsible for providing valid pointers for each argument +ExternResult selection_vector_from_dv(const DvInfo *dv_info, + Handle engine, + Handle state); + +/// Shim for ffi to call visit_scan_data. This will generally be called when iterating through scan +/// data which provides the data handle and selection vector as each element in the iterator. +/// +/// # Safety +/// engine is responsbile for passing a valid [`EngineData`] and selection vector. +void visit_scan_data(Handle data, + KernelBoolSlice selection_vec, + NullableCvoid engine_context, + CScanCallback callback); + +} // extern "C" + + +} // namespace ffi From 6c9dc2163954cfa689fdd654533078352a684f11 Mon Sep 17 00:00:00 2001 From: Sam Ansmink Date: Wed, 19 Jun 2024 23:20:04 +0200 Subject: [PATCH 12/38] update inlined ffi header --- src/include/delta_kernel_ffi.hpp | 30 +++++++++++++++++++++++++++--- 1 file changed, 27 insertions(+), 3 deletions(-) diff --git a/src/include/delta_kernel_ffi.hpp b/src/include/delta_kernel_ffi.hpp index 813d31e..8c56f74 100644 --- a/src/include/delta_kernel_ffi.hpp +++ b/src/include/delta_kernel_ffi.hpp @@ -48,6 +48,7 @@ enum class KernelError { InvalidColumnMappingMode, InvalidTableLocation, InvalidDecimalError, + InvalidStructData, }; #if defined(DEFINE_DEFAULT_ENGINE) @@ -62,7 +63,7 @@ struct CStringMap; /// this struct can be used by an engine to materialize a selection vector struct DvInfo; -#if (defined(DEFINE_DEFAULT_ENGINE) || defined(DEFINE_SYNC_ENGINE)) +#if defined(DEFINE_DEFAULT_ENGINE) /// A builder that allows setting options on the `Engine` before actually building it struct EngineBuilder; #endif @@ -352,9 +353,10 @@ void set_builder_option(EngineBuilder *builder, KernelStringSlice key, KernelStr #endif #if defined(DEFINE_DEFAULT_ENGINE) -/// Consume the builder and return an engine. After calling, the passed pointer is _no +/// Consume the builder and return a `default` engine. After calling, the passed pointer is _no /// longer valid_. /// +/// /// # Safety /// /// Caller is responsible to pass a valid EngineBuilder pointer, and to not use it again afterwards @@ -369,6 +371,13 @@ ExternResult> get_default_engine(KernelStringSlice pa AllocateErrorFn allocate_error); #endif +#if defined(DEFINE_SYNC_ENGINE) +/// # Safety +/// +/// Caller is responsible for passing a valid path pointer. +ExternResult> get_sync_engine(AllocateErrorFn allocate_error); +#endif + /// # Safety /// /// Caller is responsible for passing a valid handle. @@ -422,14 +431,30 @@ ExternResult visit_expression_column(KernelExpressionVisitorState *st KernelStringSlice name, AllocateErrorFn allocate_error); +uintptr_t visit_expression_not(KernelExpressionVisitorState *state, uintptr_t inner_expr); + +uintptr_t visit_expression_is_null(KernelExpressionVisitorState *state, uintptr_t inner_expr); + /// # Safety /// The string slice must be valid ExternResult visit_expression_literal_string(KernelExpressionVisitorState *state, KernelStringSlice value, AllocateErrorFn allocate_error); +uintptr_t visit_expression_literal_int(KernelExpressionVisitorState *state, int32_t value); + uintptr_t visit_expression_literal_long(KernelExpressionVisitorState *state, int64_t value); +uintptr_t visit_expression_literal_short(KernelExpressionVisitorState *state, int16_t value); + +uintptr_t visit_expression_literal_byte(KernelExpressionVisitorState *state, int8_t value); + +uintptr_t visit_expression_literal_float(KernelExpressionVisitorState *state, float value); + +uintptr_t visit_expression_literal_double(KernelExpressionVisitorState *state, double value); + +uintptr_t visit_expression_literal_bool(KernelExpressionVisitorState *state, bool value); + /// Allow an engine to "unwrap" an [`EngineData`] into the raw pointer for the case it wants /// to use its own engine data format /// @@ -533,5 +558,4 @@ void visit_scan_data(Handle data, } // extern "C" - } // namespace ffi From 76f832b938e4cb5e8eec2a66772772c18ab087bc Mon Sep 17 00:00:00 2001 From: Sam Ansmink Date: Wed, 19 Jun 2024 23:44:16 +0200 Subject: [PATCH 13/38] remove accidentally duplicated condition --- CMakeLists.txt | 6 ------ 1 file changed, 6 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index c3d259c..54b84b4 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -48,12 +48,6 @@ if("${OS_NAME}" STREQUAL "linux") set(RUST_ENV_VARS ${RUST_ENV_VARS} OPENSSL_LIB_DIR=${CMAKE_BINARY_DIR}/vcpkg_installed/${VCPKG_TARGET_TRIPLET}/lib) set(RUST_ENV_VARS ${RUST_ENV_VARS} OPENSSL_INCLUDE_DIR=${CMAKE_BINARY_DIR}/vcpkg_installed/${VCPKG_TARGET_TRIPLET}/include) set(RUST_PLATFORM_TARGET "aarch64-unknown-linux-gnu") - elseif("${CMAKE_CXX_COMPILER}" MATCHES "aarch64") - set(RUST_ENV_VARS ${RUST_ENV_VARS} CFLAGS_aarch64_unknown_linux_gnu=--sysroot=/usr/aarch64-linux-gnu) - set(RUST_ENV_VARS ${RUST_ENV_VARS} CARGO_TARGET_AARCH64_UNKNOWN_LINUX_GNU_LINKER=aarch64-linux-gnu-gcc) - set(RUST_ENV_VARS ${RUST_ENV_VARS} OPENSSL_LIB_DIR=${CMAKE_BINARY_DIR}/vcpkg_installed/${VCPKG_TARGET_TRIPLET}/lib) - set(RUST_ENV_VARS ${RUST_ENV_VARS} OPENSSL_INCLUDE_DIR=${CMAKE_BINARY_DIR}/vcpkg_installed/${VCPKG_TARGET_TRIPLET}/include) - set(RUST_PLATFORM_TARGET "aarch64-unknown-linux-gnu") else() set(RUST_PLATFORM_TARGET "x86_64-unknown-linux-gnu") endif() From 7229c0ceb616e14688c70f9de1a7c1faa24fb5fd Mon Sep 17 00:00:00 2001 From: Sam Ansmink Date: Thu, 20 Jun 2024 10:26:03 +0200 Subject: [PATCH 14/38] fix another small merge issue --- CMakeLists.txt | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 54b84b4..390f876 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -139,15 +139,15 @@ add_compile_definitions(DEFINE_DEFAULT_ENGINE) # Link delta-kernal-rs to static lib target_link_libraries(${EXTENSION_NAME} - debug "${CMAKE_BINARY_DIR}/rust/src/delta_kernel/target/${RUST_PLATFORM_TARGET}/debug/libdelta_kernel_ffi.a" - optimized "${CMAKE_BINARY_DIR}/rust/src/delta_kernel/target/${RUST_PLATFORM_TARGET}/release/libdelta_kernel_ffi.a" + debug ${DELTA_KERNEL_LIBPATH_DEBUG} + optimized ${DELTA_KERNEL_LIBPATH_RELEASE} ${PLATFORM_LIBS}) add_dependencies(${EXTENSION_NAME} delta_kernel) # Link delta-kernal-rs to dynamic lib target_link_libraries(${LOADABLE_EXTENSION_NAME} - debug "${CMAKE_BINARY_DIR}/rust/src/delta_kernel/target/${RUST_PLATFORM_TARGET}/debug/libdelta_kernel_ffi.a" - optimized "${CMAKE_BINARY_DIR}/rust/src/delta_kernel/target/${RUST_PLATFORM_TARGET}/release/libdelta_kernel_ffi.a" + debug ${DELTA_KERNEL_LIBPATH_DEBUG} + optimized ${DELTA_KERNEL_LIBPATH_RELEASE} ${PLATFORM_LIBS}) add_dependencies(${LOADABLE_EXTENSION_NAME} delta_kernel) From 5cb6f1dc87317b5c87bb9177c99a75c0c94df8ec Mon Sep 17 00:00:00 2001 From: Sam Ansmink Date: Thu, 20 Jun 2024 16:29:03 +0200 Subject: [PATCH 15/38] bump kernel --- CMakeLists.txt | 4 ++-- src/include/delta_kernel_ffi.hpp | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 390f876..1f94ea9 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -96,10 +96,10 @@ set(DELTA_KERNEL_FFI_HEADER_CXX "${CMAKE_BINARY_DIR}/rust/src/delta_kernel/targe # Add rust_example as a CMake target ExternalProject_Add( ${KERNEL_NAME} - GIT_REPOSITORY "https://github.com/nicklan/delta-kernel-rs" + GIT_REPOSITORY "https://github.com/delta-incubator/delta-kernel-rs" # WARNING: the FFI headers are currently pinned due to the C linkage issue of the c++ headers. Currently, when bumping # the kernel version, the produced header in ./src/include/delta_kernel_ffi.hpp should be also bumped, applying the fix - GIT_TAG 181232a45562ca78be763c2f5fb46b88a2463b5c + GIT_TAG 6f95fd3bfaaa57698d72f539f8c6a0475a52c4e7 # Prints the env variables passed to the cargo build to the terminal, useful in debugging because passing them # through CMake is an error-prone mess CONFIGURE_COMMAND ${CMAKE_COMMAND} -E env ${RUST_UNSET_ENV_VARS} ${RUST_ENV_VARS} env diff --git a/src/include/delta_kernel_ffi.hpp b/src/include/delta_kernel_ffi.hpp index 8c56f74..12833be 100644 --- a/src/include/delta_kernel_ffi.hpp +++ b/src/include/delta_kernel_ffi.hpp @@ -28,7 +28,7 @@ enum class KernelError { ObjectStorePathError, #endif #if defined(DEFINE_DEFAULT_ENGINE) - Reqwest, + ReqwestError, #endif FileNotFoundError, MissingColumnError, @@ -45,10 +45,10 @@ enum class KernelError { JoinFailureError, Utf8Error, ParseIntError, - InvalidColumnMappingMode, - InvalidTableLocation, + InvalidColumnMappingModeError, + InvalidTableLocationError, InvalidDecimalError, - InvalidStructData, + InvalidStructDataError, }; #if defined(DEFINE_DEFAULT_ENGINE) From c1f44a31c092ef5907f336dec0d9ef6ca3a983b9 Mon Sep 17 00:00:00 2001 From: Sam Ansmink Date: Thu, 20 Jun 2024 18:16:45 +0200 Subject: [PATCH 16/38] apply workaround for when partition values are NULL --- src/functions/delta_scan.cpp | 68 ++++++++++++++++++- test/sql/dat/all.test | 14 ++-- .../delta_kernel_rs/basic_partitioned.test | 12 ++-- 3 files changed, 82 insertions(+), 12 deletions(-) diff --git a/src/functions/delta_scan.cpp b/src/functions/delta_scan.cpp index ed968a2..1065a7e 100644 --- a/src/functions/delta_scan.cpp +++ b/src/functions/delta_scan.cpp @@ -467,13 +467,79 @@ unique_ptr DeltaMultiFileReader::InitializeGlobalSta return std::move(res); } +// This code is duplicated from MultiFileReader::CreateNameMapping the difference is that for columns that are not found +// in the parquet files, we just add null constant columns +static void CustomMulfiFileNameMapping(const string &file_name, const vector &local_types, + const vector &local_names, const vector &global_types, + const vector &global_names, const vector &global_column_ids, + MultiFileReaderData &reader_data, const string &initial_file, + optional_ptr global_state) { + D_ASSERT(global_types.size() == global_names.size()); + D_ASSERT(local_types.size() == local_names.size()); + // we have expected types: create a map of name -> column index + case_insensitive_map_t name_map; + for (idx_t col_idx = 0; col_idx < local_names.size(); col_idx++) { + name_map[local_names[col_idx]] = col_idx; + } + for (idx_t i = 0; i < global_column_ids.size(); i++) { + // check if this is a constant column + bool constant = false; + for (auto &entry : reader_data.constant_map) { + if (entry.column_id == i) { + constant = true; + break; + } + } + if (constant) { + // this column is constant for this file + continue; + } + // not constant - look up the column in the name map + auto global_id = global_column_ids[i]; + if (global_id >= global_types.size()) { + throw InternalException( + "MultiFileReader::CreatePositionalMapping - global_id is out of range in global_types for this file"); + } + auto &global_name = global_names[global_id]; + auto entry = name_map.find(global_name); + if (entry == name_map.end()) { + string candidate_names; + for (auto &local_name : local_names) { + if (!candidate_names.empty()) { + candidate_names += ", "; + } + candidate_names += local_name; + } + // FIXME: this override is pretty hacky: for missing columns we just insert NULL constants + auto &global_type = global_types[global_id]; + Value val (global_type); + reader_data.constant_map.push_back({i, val}); + continue; + } + // we found the column in the local file - check if the types are the same + auto local_id = entry->second; + D_ASSERT(global_id < global_types.size()); + D_ASSERT(local_id < local_types.size()); + auto &global_type = global_types[global_id]; + auto &local_type = local_types[local_id]; + if (global_type != local_type) { + reader_data.cast_map[local_id] = global_type; + } + // the types are the same - create the mapping + reader_data.column_mapping.push_back(i); + reader_data.column_ids.push_back(local_id); + } + + reader_data.empty_columns = reader_data.column_ids.empty(); +} + void DeltaMultiFileReader::CreateNameMapping(const string &file_name, const vector &local_types, const vector &local_names, const vector &global_types, const vector &global_names, const vector &global_column_ids, MultiFileReaderData &reader_data, const string &initial_file, optional_ptr global_state) { // First call the base implementation to do most mapping - MultiFileReader::CreateNameMapping(file_name, local_types, local_names, global_types, global_names, global_column_ids, reader_data, initial_file, global_state); + CustomMulfiFileNameMapping(file_name, local_types, local_names, global_types, global_names, global_column_ids, reader_data, initial_file, global_state); // Then we handle delta specific mapping D_ASSERT(global_state); diff --git a/test/sql/dat/all.test b/test/sql/dat/all.test index 6afeb84..b3ba2d8 100644 --- a/test/sql/dat/all.test +++ b/test/sql/dat/all.test @@ -54,12 +54,6 @@ SELECT * FROM parquet_scan('${DAT_PATH}/out/reader_tests/generated/with_schema_change/expected/latest/**/*.parquet') ---- - -### FAILING DAT TESTS - -# TODO fix all of these -mode skip - # basic_partitioned query I rowsort basic_partitioned SELECT * @@ -71,6 +65,14 @@ SELECT * FROM parquet_scan('${DAT_PATH}/out/reader_tests/generated/basic_partitioned/expected/latest/**/*.parquet') ---- +### FAILING DAT TESTS + +# TODO fix all of these +mode skip + +# Fetches path containing`letter=%252F%252520%2525f` from kernel +# Should be letter= %2F%2520%25f, which means its doubly url encoded + # multi_partitioned query I rowsort multi_partitioned SELECT * diff --git a/test/sql/delta_kernel_rs/basic_partitioned.test b/test/sql/delta_kernel_rs/basic_partitioned.test index 79804d1..d66d012 100644 --- a/test/sql/delta_kernel_rs/basic_partitioned.test +++ b/test/sql/delta_kernel_rs/basic_partitioned.test @@ -8,10 +8,12 @@ require delta require-env DELTA_KERNEL_TESTS_PATH -# FIXME: this fails due some weird error -mode skip - -statement error +query III SELECT * FROM delta_scan('${DELTA_KERNEL_TESTS_PATH}/basic_partitioned') ---- -Failed to read file "/Users/sam/Development/delta-kernel-testing/delta-kernel-rs/kernel/tests/data/basic_partitioned/letter=__HIVE_DEFAULT_PARTITION__ +NULL 6 6.6 +a 4 4.4 +e 5 5.5 +a 1 1.1 +b 2 2.2 +c 3 3.3 From 90455e5f55f4caf682a81156c7ccdae2e47f7471 Mon Sep 17 00:00:00 2001 From: Norman Foerster Date: Tue, 25 Jun 2024 10:28:47 +0200 Subject: [PATCH 17/38] working azure setting --- .gitignore | 4 ++ extension_config.cmake | 3 + src/functions/delta_scan.cpp | 87 ++++++++++++++++++++--------- test/sql/generated/azure.emulator.x | 25 +++++++++ vcpkg.json | 3 + 5 files changed, 96 insertions(+), 26 deletions(-) create mode 100644 test/sql/generated/azure.emulator.x diff --git a/.gitignore b/.gitignore index 2cf38b5..31bc287 100644 --- a/.gitignore +++ b/.gitignore @@ -7,3 +7,7 @@ testext test/python/__pycache__/ .Rhistory data/generated +__azurite*__.json +__blobstorage__ +.venv +.vscode \ No newline at end of file diff --git a/extension_config.cmake b/extension_config.cmake index 46e7a27..6cfa12c 100644 --- a/extension_config.cmake +++ b/extension_config.cmake @@ -9,6 +9,9 @@ duckdb_extension_load(delta # Build the httpfs extension to test with s3/http duckdb_extension_load(httpfs) +# Build the azure extension to test with azure +duckdb_extension_load(azure) + # Build the tpch and tpcds extension for testing/benchmarking duckdb_extension_load(tpch) duckdb_extension_load(tpcds) diff --git a/src/functions/delta_scan.cpp b/src/functions/delta_scan.cpp index 05b958e..fb3643c 100644 --- a/src/functions/delta_scan.cpp +++ b/src/functions/delta_scan.cpp @@ -18,6 +18,7 @@ #include #include +#include namespace duckdb { @@ -65,11 +66,23 @@ static void visit_callback(ffi::NullableCvoid engine_context, struct ffi::Kernel ffi::visit_scan_data(engine_data, selection_vec, engine_context, visit_callback); } +std::string parseFromConnectionString(const std::string& connectionString, const std::string& key) { + std::regex pattern(key + "=([^;]+);"); + std::smatch matches; + if (std::regex_search(connectionString, matches, pattern) && matches.size() > 1) { + // The second match ([1]) contains the access key + return matches[1].str(); + } else { + // If no access key is found, return an empty string or handle as needed + return ""; + } +} + static ffi::EngineBuilder* CreateBuilder(ClientContext &context, const string &path) { ffi::EngineBuilder* builder; // For "regular" paths we early out with the default builder config - if (!StringUtil::StartsWith(path, "s3://") && !StringUtil::StartsWith(path, "azure://") && !StringUtil::StartsWith(path, "az://") && !StringUtil::StartsWith(path, "abfss://")) { + if (!StringUtil::StartsWith(path, "s3://") && !StringUtil::StartsWith(path, "azure://") && !StringUtil::StartsWith(path, "az://") && !StringUtil::StartsWith(path, "abfs://") && !StringUtil::StartsWith(path, "abfss://")) { auto interface_builder_res = ffi::get_engine_builder(KernelUtils::ToDeltaString(path), DuckDBEngineError::AllocateError); return KernelUtils::UnpackResult(interface_builder_res, "get_engine_interface_builder for path " + path); } @@ -87,7 +100,7 @@ static ffi::EngineBuilder* CreateBuilder(ClientContext &context, const string &p bucket = path.substr(5, end_of_container-5); path_in_bucket = path.substr(end_of_container); secret_type = "s3"; - } else if (StringUtil::StartsWith(path, "azure://")) { + } else if ((StringUtil::StartsWith(path, "azure://")) || (StringUtil::StartsWith(path, "abfss://"))) { auto end_of_container = path.find('/',8); if(end_of_container == string::npos) { @@ -105,8 +118,8 @@ static ffi::EngineBuilder* CreateBuilder(ClientContext &context, const string &p bucket = path.substr(5, end_of_container-5); path_in_bucket = path.substr(end_of_container); secret_type = "azure"; - } else if (StringUtil::StartsWith(path, "abfss://")) { - auto end_of_container = path.find('/',8); + } else if (StringUtil::StartsWith(path, "abfs://")) { + auto end_of_container = path.find('/',7); if(end_of_container == string::npos) { throw IOException("Invalid azure url passed to delta scan: %s", path); @@ -157,10 +170,12 @@ static ffi::EngineBuilder* CreateBuilder(ClientContext &context, const string &p } else if (secret_type == "azure") { + // azure seems to be super complicated as we need to cover duckdb azure plugin and delta RS builder + // and both require different settings + auto connection_string = kv_secret.TryGetValue("connection_string").ToString(); auto account_name = kv_secret.TryGetValue("account_name").ToString(); auto endpoint = kv_secret.TryGetValue("endpoint").ToString(); - auto credential_chain = kv_secret.TryGetValue("credential_chain").ToString(); auto client_id = kv_secret.TryGetValue("client_id").ToString(); auto client_secret = kv_secret.TryGetValue("client_secret").ToString(); auto tenant_id = kv_secret.TryGetValue("tenant_id").ToString(); @@ -168,40 +183,60 @@ static ffi::EngineBuilder* CreateBuilder(ClientContext &context, const string &p auto http_proxy = kv_secret.TryGetValue("http_proxy").ToString(); auto proxy_user_name = kv_secret.TryGetValue("proxy_user_name").ToString(); auto proxy_password = kv_secret.TryGetValue("proxy_password").ToString(); + auto chain = kv_secret.TryGetValue("chain").ToString(); + + if (account_name == "devstoreaccount1" || connection_string.find("devstoreaccount1") != string::npos) { + ffi::set_builder_option(builder, KernelUtils::ToDeltaString("use_emulator"), KernelUtils::ToDeltaString("true")); //needed for delta RS builder + } - if (!connection_string.empty()) { - ffi::set_builder_option(builder, KernelUtils::ToDeltaString("azure_storage_connection_string"), KernelUtils::ToDeltaString(connection_string)); + if (!connection_string.empty() && connection_string != "NULL") { + ffi::set_builder_option(builder, KernelUtils::ToDeltaString("azure_storage_connection_string"), KernelUtils::ToDeltaString(connection_string)); //needed for duckdb azure plugin + ffi::set_builder_option(builder, KernelUtils::ToDeltaString("connection_string"), KernelUtils::ToDeltaString(connection_string)); //needed for duckdb azure plugin + account_name = parseFromConnectionString(connection_string, "AccountName"); + ffi::set_builder_option(builder, KernelUtils::ToDeltaString("access_key"), KernelUtils::ToDeltaString(parseFromConnectionString(connection_string, "AccountKey"))); //needed for delta RS builder } - if (!account_name.empty()) { - ffi::set_builder_option(builder, KernelUtils::ToDeltaString("azure_storage_account_name"), KernelUtils::ToDeltaString(account_name)); + if (!account_name.empty() && account_name != "NULL") { + ffi::set_builder_option(builder, KernelUtils::ToDeltaString("azure_account_name"), KernelUtils::ToDeltaString(account_name)); //needed for duckdb azure plugin + ffi::set_builder_option(builder, KernelUtils::ToDeltaString("account_name"), KernelUtils::ToDeltaString(account_name)); //needed for delta RS builder } - if (!endpoint.empty()) { + if (!endpoint.empty() && endpoint != "NULL") { ffi::set_builder_option(builder, KernelUtils::ToDeltaString("azure_endpoint"), KernelUtils::ToDeltaString(endpoint)); + } else { + ffi::set_builder_option(builder, KernelUtils::ToDeltaString("azure_endpoint"), KernelUtils::ToDeltaString("https://" + account_name + ".blob.core.windows.net/")); //needed? Does that work with dfs files system? } - if (!credential_chain.empty()) { - ffi::set_builder_option(builder, KernelUtils::ToDeltaString("azure_credential_chain"), KernelUtils::ToDeltaString(credential_chain)); + if (!chain.empty() && chain != "NULL") { + ffi::set_builder_option(builder, KernelUtils::ToDeltaString("provider"), KernelUtils::ToDeltaString("credential_chain")); //needed for duckdb azure plugin + + if (chain.find("cli") != std::string::npos) { + ffi::set_builder_option(builder, KernelUtils::ToDeltaString("use_azure_cli"), KernelUtils::ToDeltaString("true")); //dont know if that is the right way, but we need to tell delta RS builder to authenticate with azure cli + } + + ffi::set_builder_option(builder, KernelUtils::ToDeltaString("azure_credential_chain"), KernelUtils::ToDeltaString(chain)); //needed for duckdb azure plugin, dont know if all three are necessary + ffi::set_builder_option(builder, KernelUtils::ToDeltaString("credential_chain"), KernelUtils::ToDeltaString(chain)); //needed for duckdb azure plugin, dont know if all three are necessary + ffi::set_builder_option(builder, KernelUtils::ToDeltaString("chain"), KernelUtils::ToDeltaString(chain)); //needed for duckdb azure plugin, dont know if all three are necessary } - if (!client_id.empty()) { - ffi::set_builder_option(builder, KernelUtils::ToDeltaString("azure_client_id"), KernelUtils::ToDeltaString(client_id)); + if (!client_id.empty() && client_id != "NULL") { + ffi::set_builder_option(builder, KernelUtils::ToDeltaString("azure_client_id"), KernelUtils::ToDeltaString(client_id)); //untested } - if (!client_secret.empty()) { - ffi::set_builder_option(builder, KernelUtils::ToDeltaString("azure_client_secret"), KernelUtils::ToDeltaString(client_secret)); + if (!client_secret.empty() && client_secret != "NULL") { + ffi::set_builder_option(builder, KernelUtils::ToDeltaString("azure_client_secret"), KernelUtils::ToDeltaString(client_secret)); //untested } - if (!tenant_id.empty()) { - ffi::set_builder_option(builder, KernelUtils::ToDeltaString("azure_tenant_id"), KernelUtils::ToDeltaString(tenant_id)); + if (!tenant_id.empty() && tenant_id != "NULL") { + ffi::set_builder_option(builder, KernelUtils::ToDeltaString("azure_tenant_id"), KernelUtils::ToDeltaString(tenant_id)); //needed for duckdb azure plugin } - if (!certificate_path.empty()) { - ffi::set_builder_option(builder, KernelUtils::ToDeltaString("azure_client_certificate_path"), KernelUtils::ToDeltaString(certificate_path)); + if (!certificate_path.empty() && certificate_path != "NULL") { + ffi::set_builder_option(builder, KernelUtils::ToDeltaString("azure_client_certificate_path"), KernelUtils::ToDeltaString(certificate_path)); //untested } - if (!http_proxy.empty()) { - ffi::set_builder_option(builder, KernelUtils::ToDeltaString("http_proxy"), KernelUtils::ToDeltaString(http_proxy)); + if (!http_proxy.empty() && http_proxy != "NULL") { + ffi::set_builder_option(builder, KernelUtils::ToDeltaString("http_proxy"), KernelUtils::ToDeltaString(http_proxy)); //untested } - if (!proxy_user_name.empty()) { - ffi::set_builder_option(builder, KernelUtils::ToDeltaString("proxy_user_name"), KernelUtils::ToDeltaString(proxy_user_name)); + if (!proxy_user_name.empty() && proxy_user_name != "NULL") { + ffi::set_builder_option(builder, KernelUtils::ToDeltaString("proxy_user_name"), KernelUtils::ToDeltaString(proxy_user_name)); //untested } - if (!proxy_password.empty()) { - ffi::set_builder_option(builder, KernelUtils::ToDeltaString("proxy_password"), KernelUtils::ToDeltaString(proxy_password)); + if (!proxy_password.empty() && proxy_password != "NULL") { + ffi::set_builder_option(builder, KernelUtils::ToDeltaString("proxy_password"), KernelUtils::ToDeltaString(proxy_password)); //untested } + ffi::set_builder_option(builder, KernelUtils::ToDeltaString("container_name"), KernelUtils::ToDeltaString(bucket)); // needed ? } return builder; diff --git a/test/sql/generated/azure.emulator.x b/test/sql/generated/azure.emulator.x new file mode 100644 index 0000000..86b67ec --- /dev/null +++ b/test/sql/generated/azure.emulator.x @@ -0,0 +1,25 @@ +# name: test/sql/generated/azure.emulator +# description: test delta scan on azure emulator data using secret +# group: [delta_generated] + +require parquet + +require httpfs + +require azure + +require delta + +require-env GENERATED_AZURE_DATA_AVAILABLE + +statement ok +CREATE SECRET azure_1 (TYPE AZURE, CONNECTION_STRING 'AccountName=devstoreaccount1;AccountKey=Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==;DefaultEndpointsProtocol=http;BlobEndpoint=http://127.0.0.1:10000/devstoreaccount1;QueueEndpoint=http://127.0.0.1:10001/devstoreaccount1;TableEndpoint=http://127.0.0.1:10002/devstoreaccount1') + +# Run modified tpch q06 against the remote data +query I rowsort q1 +SELECT + * +FROM + delta_scan('az://test-bucket-ceiveran/delta_testing/lineitem_sf0_01/delta_lake/') +LIMIT 100 +---- \ No newline at end of file diff --git a/vcpkg.json b/vcpkg.json index 85936bf..0cefd94 100644 --- a/vcpkg.json +++ b/vcpkg.json @@ -1,5 +1,8 @@ { "dependencies": [ + "azure-identity-cpp", + "azure-storage-blobs-cpp", + "azure-storage-files-datalake-cpp", "openssl" ] } \ No newline at end of file From 4688190ce50e0adadee097a46a28aee5b3cdd5cc Mon Sep 17 00:00:00 2001 From: Norman Foerster Date: Tue, 25 Jun 2024 14:03:17 +0200 Subject: [PATCH 18/38] load azure functionality from duckdb azure plugin --- .gitmodules | 3 +++ duckdb | 2 +- duckdb_azure | 1 + extension-ci-tools | 2 +- extension_config.cmake | 4 +++- 5 files changed, 9 insertions(+), 3 deletions(-) create mode 160000 duckdb_azure diff --git a/.gitmodules b/.gitmodules index a55d71e..5131848 100644 --- a/.gitmodules +++ b/.gitmodules @@ -6,3 +6,6 @@ path = extension-ci-tools url = git@github.com:duckdb/extension-ci-tools.git branch = main +[submodule "duckdb_azure"] + path = duckdb_azure + url = https://github.com/duckdb/duckdb_azure.git diff --git a/duckdb b/duckdb index 1f98600..7b8efd3 160000 --- a/duckdb +++ b/duckdb @@ -1 +1 @@ -Subproject commit 1f98600c2cf8722a6d2f2d805bb4af5e701319fc +Subproject commit 7b8efd3d0fab38ec9dae467861a317af3f1d7f3e diff --git a/duckdb_azure b/duckdb_azure new file mode 160000 index 0000000..097bb13 --- /dev/null +++ b/duckdb_azure @@ -0,0 +1 @@ +Subproject commit 097bb13aadb186ca43ae9b5dc6a21c20e56ad4dd diff --git a/extension-ci-tools b/extension-ci-tools index c0cc931..71b8a60 160000 --- a/extension-ci-tools +++ b/extension-ci-tools @@ -1 +1 @@ -Subproject commit c0cc9319492bfa38344c2f28bd35f2304c74cdde +Subproject commit 71b8a603ea24b1ac8a2cff134aca28163576548f diff --git a/extension_config.cmake b/extension_config.cmake index 6cfa12c..369abd0 100644 --- a/extension_config.cmake +++ b/extension_config.cmake @@ -10,7 +10,9 @@ duckdb_extension_load(delta duckdb_extension_load(httpfs) # Build the azure extension to test with azure -duckdb_extension_load(azure) +duckdb_extension_load(azure + SOURCE_DIR ${CMAKE_CURRENT_LIST_DIR}/duckdb_azure +) # Build the tpch and tpcds extension for testing/benchmarking duckdb_extension_load(tpch) From 49810dac2fb9fbf8ef856a7b58e39c14981fb427 Mon Sep 17 00:00:00 2001 From: Norman Foerster Date: Tue, 25 Jun 2024 14:58:45 +0200 Subject: [PATCH 19/38] added tests --- .../{azure.emulator.x => azure.emulator.test} | 0 .../azure_emulator_with_partitions.test | 25 +++++++++++++++++++ 2 files changed, 25 insertions(+) rename test/sql/generated/{azure.emulator.x => azure.emulator.test} (100%) create mode 100644 test/sql/generated/azure_emulator_with_partitions.test diff --git a/test/sql/generated/azure.emulator.x b/test/sql/generated/azure.emulator.test similarity index 100% rename from test/sql/generated/azure.emulator.x rename to test/sql/generated/azure.emulator.test diff --git a/test/sql/generated/azure_emulator_with_partitions.test b/test/sql/generated/azure_emulator_with_partitions.test new file mode 100644 index 0000000..78946b4 --- /dev/null +++ b/test/sql/generated/azure_emulator_with_partitions.test @@ -0,0 +1,25 @@ +# name: test/sql/generated/azure.emulator +# description: test delta scan on azure emulator data using secret +# group: [delta_generated] + +require parquet + +require httpfs + +require azure + +require delta + +require-env GENERATED_AZURE_DATA_AVAILABLE + +statement ok +CREATE SECRET azure_1 (TYPE AZURE, CONNECTION_STRING 'AccountName=devstoreaccount1;AccountKey=Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==;DefaultEndpointsProtocol=http;BlobEndpoint=http://127.0.0.1:10000/devstoreaccount1;QueueEndpoint=http://127.0.0.1:10001/devstoreaccount1;TableEndpoint=http://127.0.0.1:10002/devstoreaccount1') + +# Run modified tpch q06 against the remote data +query I rowsort q1 +SELECT + * +FROM + delta_scan('az://test-bucket-ceiveran/delta_testing/simple_partitioned/delta_lake/') +where part=1 +---- \ No newline at end of file From aa0b52b14d459bece34efb305d39ca61a17c7ebc Mon Sep 17 00:00:00 2001 From: Sam Ansmink Date: Thu, 27 Jun 2024 12:10:00 +0200 Subject: [PATCH 20/38] add testing for azure --- .github/workflows/CloudTesting.yml | 80 ++++++++++++ .github/workflows/LocalTesting.yml | 66 ++++++++++ .gitmodules | 5 +- duckdb_azure | 1 - extension_config.cmake | 4 +- scripts/upload_test_files_to_azurite.sh | 21 ++++ src/functions/delta_scan.cpp | 114 +++++++++--------- test/sql/cloud/azure/cli_auth.test | 37 ++++++ .../cloud/azure/hierarchical_namespace.test | 42 +++++++ test/sql/cloud/azure/spn_auth.test | 38 ++++++ test/sql/cloud/azure/unauthenticated.test | 47 ++++++++ test/sql/cloud/azurite/azurite.test | 31 +++++ 12 files changed, 426 insertions(+), 60 deletions(-) create mode 100644 .github/workflows/CloudTesting.yml create mode 100644 .github/workflows/LocalTesting.yml delete mode 160000 duckdb_azure create mode 100755 scripts/upload_test_files_to_azurite.sh create mode 100644 test/sql/cloud/azure/cli_auth.test create mode 100644 test/sql/cloud/azure/hierarchical_namespace.test create mode 100644 test/sql/cloud/azure/spn_auth.test create mode 100644 test/sql/cloud/azure/unauthenticated.test create mode 100644 test/sql/cloud/azurite/azurite.test diff --git a/.github/workflows/CloudTesting.yml b/.github/workflows/CloudTesting.yml new file mode 100644 index 0000000..f75a37d --- /dev/null +++ b/.github/workflows/CloudTesting.yml @@ -0,0 +1,80 @@ +name: Cloud functional tests +on: [push, repository_dispatch] +concurrency: + group: ${{ github.workflow }}-${{ github.ref }}-${{ github.head_ref || '' }}-${{ github.base_ref || '' }}-${{ github.ref != 'refs/heads/main' || github.sha }} + cancel-in-progress: true +defaults: + run: + shell: bash + +jobs: + azure-tests-linux: + name: Azure tests (Linux) + runs-on: ubuntu-latest + env: + VCPKG_TARGET_TRIPLET: x64-linux + VCPKG_TOOLCHAIN_PATH: ${{ github.workspace }}/vcpkg/scripts/buildsystems/vcpkg.cmake + GEN: Ninja + DUCKDB_PLATFORM: linux_amd64 + + steps: + - name: Install required ubuntu packages + run: | + sudo apt-get update -y -qq + sudo apt-get install -y -qq software-properties-common + sudo add-apt-repository ppa:git-core/ppa + sudo apt-get update -y -qq + sudo apt-get install -y -qq ninja-build make gcc-multilib g++-multilib zip unzip build-essential checkinstall curl libz-dev openssh-client + + - uses: actions/checkout@v3 + with: + fetch-depth: 0 + submodules: 'true' + + - name: Setup Ccache + uses: hendrikmuhs/ccache-action@v1.2.11 # Note: pinned due to GLIBC incompatibility in later releases + with: + key: ${{ github.job }}-${{ matrix.duckdb_arch }} + + - name: Setup vcpkg + uses: lukka/run-vcpkg@v11.1 + with: + vcpkgGitCommitId: a1a1cbc975abf909a6c8985a6a2b8fe20bbd9bd6 + + - name: Setup Rust + uses: dtolnay/rust-toolchain@stable + + - name: Build extension + env: + GEN: ninja + run: | + make release + + - name: Test with Service Principal (SPN) in env vars + env: + AZURE_CLIENT_ID: ${{secrets.AZURE_CLIENT_ID}} + AZURE_CLIENT_SECRET: ${{secrets.AZURE_CLIENT_SECRET}} + AZURE_TENANT_ID: ${{secrets.AZURE_TENANT_ID}} + AZURE_STORAGE_ACCOUNT: ${{secrets.AZURE_STORAGE_ACCOUNT}} + run: | + python3 duckdb/scripts/run_tests_one_by_one.py ./build/release/test/unittest "*test/sql/cloud/*" + + - name: Test with SPN logged in in azure-cli + env: + AZURE_STORAGE_ACCOUNT: ${{secrets.AZURE_STORAGE_ACCOUNT}} + DUCKDB_AZ_CLI_LOGGED_IN: 1 + run: | + az login --service-principal -u ${{secrets.AZURE_CLIENT_ID}} -p ${{secrets.AZURE_CLIENT_SECRET}} --tenant ${{secrets.AZURE_TENANT_ID}} + python3 duckdb/scripts/run_tests_one_by_one.py ./build/release/test/unittest "*test/sql/cloud/*" + + - name: Log out azure-cli + if: always() + run: | + az logout + + - name: Tests that focus on public non-authenticated requests + env: + AZURE_STORAGE_ACCOUNT: ${{secrets.AZURE_STORAGE_ACCOUNT}} + DUCKDB_AZURE_PUBLIC_CONTAINER_AVAILABLE: 1 + run: | + python3 duckdb/scripts/run_tests_one_by_one.py ./build/release/test/unittest "*test/sql/cloud/*" \ No newline at end of file diff --git a/.github/workflows/LocalTesting.yml b/.github/workflows/LocalTesting.yml new file mode 100644 index 0000000..23b31ac --- /dev/null +++ b/.github/workflows/LocalTesting.yml @@ -0,0 +1,66 @@ +name: Local functional tests +on: [push, pull_request,repository_dispatch] +concurrency: + group: ${{ github.workflow }}-${{ github.ref }}-${{ github.head_ref || '' }}-${{ github.base_ref || '' }}-${{ github.ref != 'refs/heads/main' || github.sha }} + cancel-in-progress: true +defaults: + run: + shell: bash + +jobs: + azurite-tests-linux: + name: Azurite (local azure test server) tests (Linux) + runs-on: ubuntu-latest + container: 'quay.io/pypa/manylinux2014_x86_64' + env: + VCPKG_TARGET_TRIPLET: 'x64-linux' + GEN: Ninja + VCPKG_TOOLCHAIN_PATH: ${{ github.workspace }}/vcpkg/scripts/buildsystems/vcpkg.cmake + AZURE_STORAGE_CONNECTION_STRING: 'DefaultEndpointsProtocol=http;AccountName=devstoreaccount1;AccountKey=Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==;BlobEndpoint=http://127.0.0.1:10000/devstoreaccount1;QueueEndpoint=http://127.0.0.1:10001/devstoreaccount1;TableEndpoint=http://127.0.0.1:10002/devstoreaccount1;' + AZURE_STORAGE_ACCOUNT: devstoreaccount1 + + steps: + - uses: actions/checkout@v3 + with: + fetch-depth: 0 + submodules: 'true' + + - name: install Azure test service + run: | + yum install -y nodejs npm + npm install -g azurite + echo -e "[azure-cli]\nname=Azure CLI\nbaseurl=https://packages.microsoft.com/yumrepos/azure-cli\nenabled=1\ngpgcheck=1\ngpgkey=https://packages.microsoft.com/keys/microsoft.asc" | tee /etc/yum.repos.d/azure-cli.repo + yum install -y azure-cli + + - name: Setup ManyLinux2014 + run: | + ./duckdb/scripts/setup_manylinux2014.sh general aws-cli ccache ssh openssl python_alias + + - name: Setup vcpkg + uses: lukka/run-vcpkg@v11.1 + with: + vcpkgGitCommitId: a1a1cbc975abf909a6c8985a6a2b8fe20bbd9bd6 + + # Build extension + - name: Build extension + env: + GEN: ninja + run: | + make release + + - name: Launch & populate Azure test service + run: | + azurite > azurite_log.txt 2>&1 & + sleep 10 + ./scripts/upload_test_files_to_azurite.sh + + - name: Test extension + run: | + make test + + - name: Azure test server log + if: always() + shell: bash + run: | + echo "## azurite" + cat azurite_log.txt \ No newline at end of file diff --git a/.gitmodules b/.gitmodules index 5131848..cd15846 100644 --- a/.gitmodules +++ b/.gitmodules @@ -5,7 +5,4 @@ [submodule "extension-ci-tools"] path = extension-ci-tools url = git@github.com:duckdb/extension-ci-tools.git - branch = main -[submodule "duckdb_azure"] - path = duckdb_azure - url = https://github.com/duckdb/duckdb_azure.git + branch = main \ No newline at end of file diff --git a/duckdb_azure b/duckdb_azure deleted file mode 160000 index 097bb13..0000000 --- a/duckdb_azure +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 097bb13aadb186ca43ae9b5dc6a21c20e56ad4dd diff --git a/extension_config.cmake b/extension_config.cmake index 369abd0..16571c2 100644 --- a/extension_config.cmake +++ b/extension_config.cmake @@ -11,7 +11,9 @@ duckdb_extension_load(httpfs) # Build the azure extension to test with azure duckdb_extension_load(azure - SOURCE_DIR ${CMAKE_CURRENT_LIST_DIR}/duckdb_azure + LOAD_TESTS + GIT_URL https://github.com/duckdb/duckdb_azure + GIT_TAG 49b63dc8cd166952a0a34dfd54e6cfe5b823e05e ) # Build the tpch and tpcds extension for testing/benchmarking diff --git a/scripts/upload_test_files_to_azurite.sh b/scripts/upload_test_files_to_azurite.sh new file mode 100755 index 0000000..f1ae34e --- /dev/null +++ b/scripts/upload_test_files_to_azurite.sh @@ -0,0 +1,21 @@ +#!/bin/bash + +# Default Azurite connection string (see: https://github.com/Azure/Azurite) +conn_string="DefaultEndpointsProtocol=http;AccountName=devstoreaccount1;AccountKey=Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==;BlobEndpoint=http://127.0.0.1:10000/devstoreaccount1;QueueEndpoint=http://127.0.0.1:10001/devstoreaccount1;TableEndpoint=http://127.0.0.1:10002/devstoreaccount1;" + +# Create container +az storage container create -n delta-testing-private --connection-string "${conn_string}" +az storage container create -n delta-testing-public --connection-string "${conn_string}" --public-access blob + +copy_file() { + local from="${1}" + local to="${2}" + az storage blob upload --file "${from}" --name "${to}" --container-name "delta-testing-private" --connection-string "${conn_string}" + az storage blob upload --file "${from}" --name "${to}" --container-name "delta-testing-public" --connection-string "${conn_string}" +} + +cd ./build/debug/rust/src/delta_kernel/acceptance/tests/dat/out/reader_tests/generated +while read filepath; do + remote_filepath=dat/"$(echo "${filepath}" | cut -c 3-)" + copy_file "${filepath}" "${remote_filepath}" +done < <(find . -type f) \ No newline at end of file diff --git a/src/functions/delta_scan.cpp b/src/functions/delta_scan.cpp index 40dd143..3929c57 100644 --- a/src/functions/delta_scan.cpp +++ b/src/functions/delta_scan.cpp @@ -66,16 +66,25 @@ static void visit_callback(ffi::NullableCvoid engine_context, struct ffi::Kernel ffi::visit_scan_data(engine_data, selection_vec, engine_context, visit_callback); } -std::string parseFromConnectionString(const std::string& connectionString, const std::string& key) { - std::regex pattern(key + "=([^;]+);"); +string ParseAccountNameFromEndpoint(const string& endpoint) { + if (!StringUtil::StartsWith(endpoint, "https://")) { + return ""; + } + auto result = endpoint.find('.', 8); + if (result == endpoint.npos) { + return ""; + } + return endpoint.substr(8,result-8); +} + +string parseFromConnectionString(const string& connectionString, const string& key) { + std::regex pattern(key + "=([^;]+)(?=;|$)"); std::smatch matches; if (std::regex_search(connectionString, matches, pattern) && matches.size() > 1) { // The second match ([1]) contains the access key return matches[1].str(); - } else { - // If no access key is found, return an empty string or handle as needed - return ""; } + return ""; } static ffi::EngineBuilder* CreateBuilder(ClientContext &context, const string &path) { @@ -169,75 +178,72 @@ static ffi::EngineBuilder* CreateBuilder(ClientContext &context, const string &p ffi::set_builder_option(builder, KernelUtils::ToDeltaString("aws_region"), KernelUtils::ToDeltaString(region)); } else if (secret_type == "azure") { - // azure seems to be super complicated as we need to cover duckdb azure plugin and delta RS builder // and both require different settings - auto connection_string = kv_secret.TryGetValue("connection_string").ToString(); auto account_name = kv_secret.TryGetValue("account_name").ToString(); auto endpoint = kv_secret.TryGetValue("endpoint").ToString(); auto client_id = kv_secret.TryGetValue("client_id").ToString(); auto client_secret = kv_secret.TryGetValue("client_secret").ToString(); auto tenant_id = kv_secret.TryGetValue("tenant_id").ToString(); - auto certificate_path = kv_secret.TryGetValue("certificate_path").ToString(); - auto http_proxy = kv_secret.TryGetValue("http_proxy").ToString(); - auto proxy_user_name = kv_secret.TryGetValue("proxy_user_name").ToString(); - auto proxy_password = kv_secret.TryGetValue("proxy_password").ToString(); auto chain = kv_secret.TryGetValue("chain").ToString(); + auto provider = kv_secret.GetProvider(); - if (account_name == "devstoreaccount1" || connection_string.find("devstoreaccount1") != string::npos) { - ffi::set_builder_option(builder, KernelUtils::ToDeltaString("use_emulator"), KernelUtils::ToDeltaString("true")); //needed for delta RS builder - } + if (provider == "credential_chain") { + // Authentication option 1a: using the cli authentication + if (chain.find("cli") != std::string::npos) { + ffi::set_builder_option(builder, KernelUtils::ToDeltaString("use_azure_cli"), KernelUtils::ToDeltaString("true")); + } + // Authentication option 1b: non-cli credential chains will just "hope for the best" technically since we are using the default + // credential chain provider duckDB and delta-kernel-rs should find the same auth + } else if (!connection_string.empty() && connection_string != "NULL") { - if (!connection_string.empty() && connection_string != "NULL") { - ffi::set_builder_option(builder, KernelUtils::ToDeltaString("azure_storage_connection_string"), KernelUtils::ToDeltaString(connection_string)); //needed for duckdb azure plugin - ffi::set_builder_option(builder, KernelUtils::ToDeltaString("connection_string"), KernelUtils::ToDeltaString(connection_string)); //needed for duckdb azure plugin + // Authentication option 2: a connection string based on account key + auto account_key = parseFromConnectionString(connection_string, "AccountKey"); account_name = parseFromConnectionString(connection_string, "AccountName"); - ffi::set_builder_option(builder, KernelUtils::ToDeltaString("access_key"), KernelUtils::ToDeltaString(parseFromConnectionString(connection_string, "AccountKey"))); //needed for delta RS builder + // Authentication option 2: a connection string based on account key + if (!account_name.empty() && !account_key.empty()) { + ffi::set_builder_option(builder, KernelUtils::ToDeltaString("account_key"), + KernelUtils::ToDeltaString(account_key)); + } else { + // Authentication option 2b: a connection string based on SAS token + endpoint = parseFromConnectionString(connection_string, "BlobEndpoint"); + if (account_name.empty()) { + account_name = ParseAccountNameFromEndpoint(endpoint); + } + auto sas_token = parseFromConnectionString(connection_string, "SharedAccessSignature"); + if (!sas_token.empty()) { + ffi::set_builder_option(builder, KernelUtils::ToDeltaString("sas_token"), + KernelUtils::ToDeltaString(sas_token)); + } + } + } else if (provider == "service_principal") { + if (!client_id.empty() && client_id != "NULL") { + ffi::set_builder_option(builder, KernelUtils::ToDeltaString("azure_client_id"), KernelUtils::ToDeltaString(client_id)); + } + if (!client_secret.empty() && client_secret != "NULL") { + ffi::set_builder_option(builder, KernelUtils::ToDeltaString("azure_client_secret"), KernelUtils::ToDeltaString(client_secret)); + } + if (!tenant_id.empty() && tenant_id != "NULL") { + ffi::set_builder_option(builder, KernelUtils::ToDeltaString("azure_tenant_id"), KernelUtils::ToDeltaString(tenant_id)); + } + } else { + // Authentication option 3: no authentication, just an account name + ffi::set_builder_option(builder, KernelUtils::ToDeltaString("azure_skip_signature"), KernelUtils::ToDeltaString("true")); + } + // Set the use_emulator option for when the azurite test server is used + if (account_name == "devstoreaccount1" || connection_string.find("devstoreaccount1") != string::npos) { + ffi::set_builder_option(builder, KernelUtils::ToDeltaString("use_emulator"), KernelUtils::ToDeltaString("true")); } if (!account_name.empty() && account_name != "NULL") { - ffi::set_builder_option(builder, KernelUtils::ToDeltaString("azure_account_name"), KernelUtils::ToDeltaString(account_name)); //needed for duckdb azure plugin ffi::set_builder_option(builder, KernelUtils::ToDeltaString("account_name"), KernelUtils::ToDeltaString(account_name)); //needed for delta RS builder } if (!endpoint.empty() && endpoint != "NULL") { ffi::set_builder_option(builder, KernelUtils::ToDeltaString("azure_endpoint"), KernelUtils::ToDeltaString(endpoint)); } else { - ffi::set_builder_option(builder, KernelUtils::ToDeltaString("azure_endpoint"), KernelUtils::ToDeltaString("https://" + account_name + ".blob.core.windows.net/")); //needed? Does that work with dfs files system? - } - if (!chain.empty() && chain != "NULL") { - ffi::set_builder_option(builder, KernelUtils::ToDeltaString("provider"), KernelUtils::ToDeltaString("credential_chain")); //needed for duckdb azure plugin - - if (chain.find("cli") != std::string::npos) { - ffi::set_builder_option(builder, KernelUtils::ToDeltaString("use_azure_cli"), KernelUtils::ToDeltaString("true")); //dont know if that is the right way, but we need to tell delta RS builder to authenticate with azure cli - } - - ffi::set_builder_option(builder, KernelUtils::ToDeltaString("azure_credential_chain"), KernelUtils::ToDeltaString(chain)); //needed for duckdb azure plugin, dont know if all three are necessary - ffi::set_builder_option(builder, KernelUtils::ToDeltaString("credential_chain"), KernelUtils::ToDeltaString(chain)); //needed for duckdb azure plugin, dont know if all three are necessary - ffi::set_builder_option(builder, KernelUtils::ToDeltaString("chain"), KernelUtils::ToDeltaString(chain)); //needed for duckdb azure plugin, dont know if all three are necessary + ffi::set_builder_option(builder, KernelUtils::ToDeltaString("azure_endpoint"), KernelUtils::ToDeltaString("https://" + account_name + ".blob.core.windows.net/")); } - if (!client_id.empty() && client_id != "NULL") { - ffi::set_builder_option(builder, KernelUtils::ToDeltaString("azure_client_id"), KernelUtils::ToDeltaString(client_id)); //untested - } - if (!client_secret.empty() && client_secret != "NULL") { - ffi::set_builder_option(builder, KernelUtils::ToDeltaString("azure_client_secret"), KernelUtils::ToDeltaString(client_secret)); //untested - } - if (!tenant_id.empty() && tenant_id != "NULL") { - ffi::set_builder_option(builder, KernelUtils::ToDeltaString("azure_tenant_id"), KernelUtils::ToDeltaString(tenant_id)); //needed for duckdb azure plugin - } - if (!certificate_path.empty() && certificate_path != "NULL") { - ffi::set_builder_option(builder, KernelUtils::ToDeltaString("azure_client_certificate_path"), KernelUtils::ToDeltaString(certificate_path)); //untested - } - if (!http_proxy.empty() && http_proxy != "NULL") { - ffi::set_builder_option(builder, KernelUtils::ToDeltaString("http_proxy"), KernelUtils::ToDeltaString(http_proxy)); //untested - } - if (!proxy_user_name.empty() && proxy_user_name != "NULL") { - ffi::set_builder_option(builder, KernelUtils::ToDeltaString("proxy_user_name"), KernelUtils::ToDeltaString(proxy_user_name)); //untested - } - if (!proxy_password.empty() && proxy_password != "NULL") { - ffi::set_builder_option(builder, KernelUtils::ToDeltaString("proxy_password"), KernelUtils::ToDeltaString(proxy_password)); //untested - } - ffi::set_builder_option(builder, KernelUtils::ToDeltaString("container_name"), KernelUtils::ToDeltaString(bucket)); // needed ? - + ffi::set_builder_option(builder, KernelUtils::ToDeltaString("container_name"), KernelUtils::ToDeltaString(bucket)); } return builder; } diff --git a/test/sql/cloud/azure/cli_auth.test b/test/sql/cloud/azure/cli_auth.test new file mode 100644 index 0000000..fffa36a --- /dev/null +++ b/test/sql/cloud/azure/cli_auth.test @@ -0,0 +1,37 @@ +# name: test/sql/cloud/basic.test +# description: confirm queried data is correct +# group: [azure] + +require azure + +require parquet + +require delta + +require-env DUCKDB_AZ_CLI_LOGGED_IN + +require-env AZURE_STORAGE_ACCOUNT + +statement ok +set allow_persistent_secrets=false + +statement ok +CREATE SECRET az1 ( + TYPE AZURE, + PROVIDER CREDENTIAL_CHAIN, + CHAIN 'cli', + ACCOUNT_NAME '${AZURE_STORAGE_ACCOUNT}' +) + +mode output_result + +# Run a remote DAT test +query I rowsort all_primitive_types +SELECT * +FROM delta_scan('azure://delta-testing-private/dat/all_primitive_types/delta') +---- + +query I rowsort all_primitive_types +SELECT * +FROM parquet_scan('azure://delta-testing-private/dat/all_primitive_types/expected/latest/**/*.parquet') +---- diff --git a/test/sql/cloud/azure/hierarchical_namespace.test b/test/sql/cloud/azure/hierarchical_namespace.test new file mode 100644 index 0000000..470a325 --- /dev/null +++ b/test/sql/cloud/azure/hierarchical_namespace.test @@ -0,0 +1,42 @@ +# name: test/sql/hierarchical_namespace.test +# description: test azure extension with ADLS GEN2 storage +# group: [azure] + +# Require statement will ensure this test is run with this extension loaded +require azure + +require parquet + +require delta + +require-env AZURE_TENANT_ID + +require-env AZURE_CLIENT_ID + +require-env AZURE_CLIENT_SECRET + +require-env AZURE_STORAGE_ACCOUNT + +statement ok +set allow_persistent_secrets=false + +statement ok +CREATE SECRET spn ( + TYPE AZURE, + PROVIDER SERVICE_PRINCIPAL, + TENANT_ID '${AZURE_TENANT_ID}', + CLIENT_ID '${AZURE_CLIENT_ID}', + CLIENT_SECRET '${AZURE_CLIENT_SECRET}', + ACCOUNT_NAME '${AZURE_STORAGE_ACCOUNT}' +); + +# Run a remote DAT test on abfss +query I +SELECT int32 +FROM delta_scan('abfss://delta-testing-private/dat/all_primitive_types/delta') +---- +0 +1 +2 +3 +4 diff --git a/test/sql/cloud/azure/spn_auth.test b/test/sql/cloud/azure/spn_auth.test new file mode 100644 index 0000000..11ed035 --- /dev/null +++ b/test/sql/cloud/azure/spn_auth.test @@ -0,0 +1,38 @@ +# name: test/sql/cloud/spn_auth.test +# description: test azure extension with service principal authentication +# group: [azure] + +require azure + +require parquet + +require delta + +require-env AZURE_CLIENT_ID + +require-env AZURE_CLIENT_SECRET + +require-env AZURE_TENANT_ID + +require-env AZURE_STORAGE_ACCOUNT + +statement ok +CREATE SECRET spn ( + TYPE AZURE, + PROVIDER SERVICE_PRINCIPAL, + TENANT_ID '${AZURE_TENANT_ID}', + CLIENT_ID '${AZURE_CLIENT_ID}', + CLIENT_SECRET '${AZURE_CLIENT_SECRET}', + ACCOUNT_NAME '${AZURE_STORAGE_ACCOUNT}' +); + +# Run a remote DAT test +query I rowsort all_primitive_types +SELECT * +FROM delta_scan('azure://delta-testing-private/dat/all_primitive_types/delta') +---- + +query I rowsort all_primitive_types +SELECT * +FROM parquet_scan('azure://delta-testing-private/dat/all_primitive_types/expected/latest/**/*.parquet') +---- diff --git a/test/sql/cloud/azure/unauthenticated.test b/test/sql/cloud/azure/unauthenticated.test new file mode 100644 index 0000000..84c1f5f --- /dev/null +++ b/test/sql/cloud/azure/unauthenticated.test @@ -0,0 +1,47 @@ +# name: test/sql/cloud/unauthenticated.test +# description: test unauthenticated queries +# group: [azure] + +require azure + +require parquet + +require delta + +require-env DUCKDB_AZURE_PUBLIC_CONTAINER_AVAILABLE + +require-env AZURE_STORAGE_ACCOUNT + +statement ok +set allow_persistent_secrets=false + +# TODO: this doesn't work yet +mode skip + +query I +SELECT int32 +FROM delta_scan('azure://${AZURE_STORAGE_ACCOUNT}.blob.core.windows.net/dat/all_primitive_types/delta') +---- +0 +1 +2 +3 +4 + +mode unskip + +# Using a secret to set the account name, we can omit the fully qualified url +statement ok +CREATE SECRET s1 (TYPE AZURE, ACCOUNT_NAME '${AZURE_STORAGE_ACCOUNT}') + +query I +SELECT int32 +FROM delta_scan('azure://delta-testing-public/dat/all_primitive_types/delta') +---- +0 +1 +2 +3 +4 + + diff --git a/test/sql/cloud/azurite/azurite.test b/test/sql/cloud/azurite/azurite.test new file mode 100644 index 0000000..169615b --- /dev/null +++ b/test/sql/cloud/azurite/azurite.test @@ -0,0 +1,31 @@ +# name: test/sql/cloud/azurite/azurite.test +# description: test with azurite test server +# group: [azure] + +# Require statement will ensure this test is run with this extension loaded +require azure + +require parquet + +require delta + +require-env AZURE_STORAGE_CONNECTION_STRING + +# Set connection string from env var +statement ok +CREATE SECRET (TYPE AZURE, CONNECTION_STRING '${AZURE_STORAGE_CONNECTION_STRING}'); + +# We need a connection string to do requests +foreach prefix azure:// az:// + +query I +SELECT int32 +FROM delta_scan('${prefix}delta-testing-private/dat/all_primitive_types/delta') +---- +0 +1 +2 +3 +4 + +endloop From 53e6d95751f95349af3efe9b08a169e9f7e09dfa Mon Sep 17 00:00:00 2001 From: Sam Ansmink Date: Thu, 27 Jun 2024 14:30:19 +0200 Subject: [PATCH 21/38] fix string encoding-related dat failures --- CMakeLists.txt | 2 +- src/functions/delta_scan.cpp | 23 ++++++++++++++++++++++- src/include/delta_kernel_ffi.hpp | 11 ++++++++++- test/sql/dat/all.test | 26 ++++++-------------------- 4 files changed, 39 insertions(+), 23 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 1f94ea9..6797b39 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -99,7 +99,7 @@ ExternalProject_Add( GIT_REPOSITORY "https://github.com/delta-incubator/delta-kernel-rs" # WARNING: the FFI headers are currently pinned due to the C linkage issue of the c++ headers. Currently, when bumping # the kernel version, the produced header in ./src/include/delta_kernel_ffi.hpp should be also bumped, applying the fix - GIT_TAG 6f95fd3bfaaa57698d72f539f8c6a0475a52c4e7 + GIT_TAG ed2b80b127984481adba8e59879f39b9e5f871d1 # Prints the env variables passed to the cargo build to the terminal, useful in debugging because passing them # through CMake is an error-prone mess CONFIGURE_COMMAND ${CMAKE_COMMAND} -E env ${RUST_UNSET_ENV_VARS} ${RUST_ENV_VARS} env diff --git a/src/functions/delta_scan.cpp b/src/functions/delta_scan.cpp index 1065a7e..feae74c 100644 --- a/src/functions/delta_scan.cpp +++ b/src/functions/delta_scan.cpp @@ -25,12 +25,33 @@ static void* allocate_string(const struct ffi::KernelStringSlice slice) { return new string(slice.ptr, slice.len); } -static void visit_callback(ffi::NullableCvoid engine_context, struct ffi::KernelStringSlice path, int64_t size, const ffi::DvInfo *dv_info, const struct ffi::CStringMap *partition_values) { +string url_decode(string input) { + string result; + result.reserve(input.size()); + char ch; + replace(input.begin(), input.end(), '+', ' '); + for (idx_t i = 0; i < input.length(); i++) { + if (int(input[i]) == 37) { + unsigned int ii; + sscanf(input.substr(i + 1, 2).c_str(), "%x", &ii); + ch = static_cast(ii); + result += ch; + i += 2; + } else { + result += input[i]; + } + } + return result; +} + +static void visit_callback(ffi::NullableCvoid engine_context, struct ffi::KernelStringSlice path, int64_t size, const ffi::Stats *, const ffi::DvInfo *dv_info, const struct ffi::CStringMap *partition_values) { auto context = (DeltaSnapshot *) engine_context; auto path_string = context->GetPath(); StringUtil::RTrim(path_string, "/"); path_string += "/" + KernelUtils::FromDeltaString(path); + path_string = url_decode(path_string); + // First we append the file to our resolved files context->resolved_files.push_back(DeltaSnapshot::ToDuckDBPath(path_string)); context->metadata.emplace_back(make_uniq()); diff --git a/src/include/delta_kernel_ffi.hpp b/src/include/delta_kernel_ffi.hpp index 12833be..de22390 100644 --- a/src/include/delta_kernel_ffi.hpp +++ b/src/include/delta_kernel_ffi.hpp @@ -304,9 +304,19 @@ using NullableCvoid = void*; /// function is that `kernel_str` is _only_ valid until the return from this function using AllocateStringFn = NullableCvoid(*)(KernelStringSlice kernel_str); +/// Give engines an easy way to consume stats +struct Stats { + /// For any file where the deletion vector is not present (see [`DvInfo::has_vector`]), the + /// `num_records` statistic must be present and accurate, and must equal the number of records + /// in the data file. In the presence of Deletion Vectors the statistics may be somewhat + /// outdated, i.e. not reflecting deleted rows yet. + uint64_t num_records; +}; + using CScanCallback = void(*)(NullableCvoid engine_context, KernelStringSlice path, int64_t size, + const Stats *stats, const DvInfo *dv_info, const CStringMap *partition_map); @@ -324,7 +334,6 @@ struct im_an_unused_struct_that_tricks_msvc_into_compilation { ExternResult> field10; }; - extern "C" { /// # Safety diff --git a/test/sql/dat/all.test b/test/sql/dat/all.test index b3ba2d8..676047b 100644 --- a/test/sql/dat/all.test +++ b/test/sql/dat/all.test @@ -65,14 +65,6 @@ SELECT * FROM parquet_scan('${DAT_PATH}/out/reader_tests/generated/basic_partitioned/expected/latest/**/*.parquet') ---- -### FAILING DAT TESTS - -# TODO fix all of these -mode skip - -# Fetches path containing`letter=%252F%252520%2525f` from kernel -# Should be letter= %2F%2520%25f, which means its doubly url encoded - # multi_partitioned query I rowsort multi_partitioned SELECT * @@ -80,18 +72,7 @@ FROM delta_scan('${DAT_PATH}/out/reader_tests/generated/multi_partitioned/delta' ---- query I rowsort multi_partitioned -SELECT * -FROM parquet_scan('${DAT_PATH}/out/reader_tests/generated/multi_partitioned/expected/latest/**/*.parquet') ----- - -# multi_partitioned -query I rowsort multi_partitioned -SELECT * -FROM delta_scan('${DAT_PATH}/out/reader_tests/generated/multi_partitioned/delta') ----- - -query I rowsort multi_partitioned -SELECT * +SELECT letter, date, decode(data) as data, number FROM parquet_scan('${DAT_PATH}/out/reader_tests/generated/multi_partitioned/expected/latest/**/*.parquet') ---- @@ -106,6 +87,11 @@ SELECT * FROM parquet_scan('${DAT_PATH}/out/reader_tests/generated/multi_partitioned_2/expected/latest/**/*.parquet') ---- +### FAILING DAT TESTS + +# TODO fix all of these +mode skip + # no_replay query I rowsort no_replay SELECT * From d33690e61901336f9e586008a1db7f3fdc1f6d6f Mon Sep 17 00:00:00 2001 From: Sam Ansmink Date: Thu, 27 Jun 2024 14:34:00 +0200 Subject: [PATCH 22/38] add rust to local test --- .github/workflows/LocalTesting.yml | 3 +++ test/sql/generated/azure.emulator.test | 25 ------------------- .../azure_emulator_with_partitions.test | 25 ------------------- 3 files changed, 3 insertions(+), 50 deletions(-) delete mode 100644 test/sql/generated/azure.emulator.test delete mode 100644 test/sql/generated/azure_emulator_with_partitions.test diff --git a/.github/workflows/LocalTesting.yml b/.github/workflows/LocalTesting.yml index 23b31ac..a08f0c3 100644 --- a/.github/workflows/LocalTesting.yml +++ b/.github/workflows/LocalTesting.yml @@ -41,6 +41,9 @@ jobs: with: vcpkgGitCommitId: a1a1cbc975abf909a6c8985a6a2b8fe20bbd9bd6 + - name: Setup Rust + uses: dtolnay/rust-toolchain@stable + # Build extension - name: Build extension env: diff --git a/test/sql/generated/azure.emulator.test b/test/sql/generated/azure.emulator.test deleted file mode 100644 index 86b67ec..0000000 --- a/test/sql/generated/azure.emulator.test +++ /dev/null @@ -1,25 +0,0 @@ -# name: test/sql/generated/azure.emulator -# description: test delta scan on azure emulator data using secret -# group: [delta_generated] - -require parquet - -require httpfs - -require azure - -require delta - -require-env GENERATED_AZURE_DATA_AVAILABLE - -statement ok -CREATE SECRET azure_1 (TYPE AZURE, CONNECTION_STRING 'AccountName=devstoreaccount1;AccountKey=Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==;DefaultEndpointsProtocol=http;BlobEndpoint=http://127.0.0.1:10000/devstoreaccount1;QueueEndpoint=http://127.0.0.1:10001/devstoreaccount1;TableEndpoint=http://127.0.0.1:10002/devstoreaccount1') - -# Run modified tpch q06 against the remote data -query I rowsort q1 -SELECT - * -FROM - delta_scan('az://test-bucket-ceiveran/delta_testing/lineitem_sf0_01/delta_lake/') -LIMIT 100 ----- \ No newline at end of file diff --git a/test/sql/generated/azure_emulator_with_partitions.test b/test/sql/generated/azure_emulator_with_partitions.test deleted file mode 100644 index 78946b4..0000000 --- a/test/sql/generated/azure_emulator_with_partitions.test +++ /dev/null @@ -1,25 +0,0 @@ -# name: test/sql/generated/azure.emulator -# description: test delta scan on azure emulator data using secret -# group: [delta_generated] - -require parquet - -require httpfs - -require azure - -require delta - -require-env GENERATED_AZURE_DATA_AVAILABLE - -statement ok -CREATE SECRET azure_1 (TYPE AZURE, CONNECTION_STRING 'AccountName=devstoreaccount1;AccountKey=Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==;DefaultEndpointsProtocol=http;BlobEndpoint=http://127.0.0.1:10000/devstoreaccount1;QueueEndpoint=http://127.0.0.1:10001/devstoreaccount1;TableEndpoint=http://127.0.0.1:10002/devstoreaccount1') - -# Run modified tpch q06 against the remote data -query I rowsort q1 -SELECT - * -FROM - delta_scan('az://test-bucket-ceiveran/delta_testing/simple_partitioned/delta_lake/') -where part=1 ----- \ No newline at end of file From 06798f3a9ad8369e8401fbc54dd8328a1addc3b0 Mon Sep 17 00:00:00 2001 From: Sam Ansmink Date: Thu, 27 Jun 2024 14:42:32 +0200 Subject: [PATCH 23/38] small refactor --- .github/workflows/GeneratedTests.yml | 53 ---------------------------- .github/workflows/LocalTesting.yml | 51 +++++++++++++++++++++++--- 2 files changed, 47 insertions(+), 57 deletions(-) delete mode 100644 .github/workflows/GeneratedTests.yml diff --git a/.github/workflows/GeneratedTests.yml b/.github/workflows/GeneratedTests.yml deleted file mode 100644 index bd106a5..0000000 --- a/.github/workflows/GeneratedTests.yml +++ /dev/null @@ -1,53 +0,0 @@ -# -# This workflow calls the main distribution pipeline from DuckDB to build, test and (optionally) release the extension -# -name: GeneratedTests -on: - push: - pull_request: - workflow_dispatch: - -concurrency: - group: ${{ github.workflow }}-${{ github.ref }}-${{ github.head_ref || '' }}-${{ github.base_ref || '' }}-${{ github.ref != 'refs/heads/main' || github.sha }} - cancel-in-progress: true - -jobs: - generated-tests-linux: - name: Generated Tests (Linux) - runs-on: ubuntu-latest - env: - GEN: ninja - VCPKG_TOOLCHAIN_PATH: ${{ github.workspace }}/vcpkg/scripts/buildsystems/vcpkg.cmake - - steps: - - uses: actions/checkout@v3 - with: - fetch-depth: 0 - submodules: 'true' - - - name: Install - shell: bash - run: sudo apt-get update -y -qq && sudo apt-get install -y -qq ninja-build - - - name: Setup Ccache - uses: hendrikmuhs/ccache-action@main - with: - key: ${{ github.job }} - - - uses: actions/setup-python@v5 - with: - python-version: '3.11' - - - name: Setup vcpkg - uses: lukka/run-vcpkg@v11.1 - with: - vcpkgGitCommitId: a1a1cbc975abf909a6c8985a6a2b8fe20bbd9bd6 - - - name: Build - shell: bash - run: make generate-data - - - name: Test - shell: bash - run: | - GENERATED_DATA_AVAILABLE=1 make test \ No newline at end of file diff --git a/.github/workflows/LocalTesting.yml b/.github/workflows/LocalTesting.yml index a08f0c3..95a7a09 100644 --- a/.github/workflows/LocalTesting.yml +++ b/.github/workflows/LocalTesting.yml @@ -36,14 +36,17 @@ jobs: run: | ./duckdb/scripts/setup_manylinux2014.sh general aws-cli ccache ssh openssl python_alias + - name: Setup Rust for manylinux (dtolnay/rust-toolchain doesn't work due to curl being old here) + if: ${{ matrix.duckdb_arch == 'linux_amd64_gcc4' }} + run: | + curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y + echo "$HOME/.cargo/bin" >> $GITHUB_PATH + - name: Setup vcpkg uses: lukka/run-vcpkg@v11.1 with: vcpkgGitCommitId: a1a1cbc975abf909a6c8985a6a2b8fe20bbd9bd6 - - name: Setup Rust - uses: dtolnay/rust-toolchain@stable - # Build extension - name: Build extension env: @@ -66,4 +69,44 @@ jobs: shell: bash run: | echo "## azurite" - cat azurite_log.txt \ No newline at end of file + cat azurite_log.txt + + generated-tests-linux: + name: Generated Tests (Linux) + runs-on: ubuntu-latest + env: + GEN: ninja + VCPKG_TOOLCHAIN_PATH: ${{ github.workspace }}/vcpkg/scripts/buildsystems/vcpkg.cmake + + steps: + - uses: actions/checkout@v3 + with: + fetch-depth: 0 + submodules: 'true' + + - name: Install + shell: bash + run: sudo apt-get update -y -qq && sudo apt-get install -y -qq ninja-build + + - name: Setup Ccache + uses: hendrikmuhs/ccache-action@main + with: + key: ${{ github.job }} + + - uses: actions/setup-python@v5 + with: + python-version: '3.11' + + - name: Setup vcpkg + uses: lukka/run-vcpkg@v11.1 + with: + vcpkgGitCommitId: a1a1cbc975abf909a6c8985a6a2b8fe20bbd9bd6 + + - name: Build + shell: bash + run: make generate-data + + - name: Test + shell: bash + run: | + GENERATED_DATA_AVAILABLE=1 make test \ No newline at end of file From 7c296837d93b8c21780d98ba287fe2a1e5361e7c Mon Sep 17 00:00:00 2001 From: Sam Ansmink Date: Thu, 27 Jun 2024 15:33:46 +0200 Subject: [PATCH 24/38] add missing openssl dep --- .github/workflows/LocalTesting.yml | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/.github/workflows/LocalTesting.yml b/.github/workflows/LocalTesting.yml index 95a7a09..d74c7dd 100644 --- a/.github/workflows/LocalTesting.yml +++ b/.github/workflows/LocalTesting.yml @@ -34,7 +34,7 @@ jobs: - name: Setup ManyLinux2014 run: | - ./duckdb/scripts/setup_manylinux2014.sh general aws-cli ccache ssh openssl python_alias + ./duckdb/scripts/setup_manylinux2014.sh general aws-cli ccache ssh python_alias openssl - name: Setup Rust for manylinux (dtolnay/rust-toolchain doesn't work due to curl being old here) if: ${{ matrix.duckdb_arch == 'linux_amd64_gcc4' }} @@ -47,6 +47,17 @@ jobs: with: vcpkgGitCommitId: a1a1cbc975abf909a6c8985a6a2b8fe20bbd9bd6 + - name: Handle OpenSSL dependency for rust build + run: | + echo "OPENSSL_ROOT_DIR=`pwd`/build/release/vcpkg_installed/${{ matrix.vcpkg_triplet }}" >> $GITHUB_ENV + echo "OPENSSL_DIR=`pwd`/build/release/vcpkg_installed/${{ matrix.vcpkg_triplet }}" >> $GITHUB_ENV + echo "OPENSSL_USE_STATIC_LIBS=true" >> $GITHUB_ENV + + - name: Set Openssl dir + if: inputs.openssl_path != '' + shell: bash + run: | + # Build extension - name: Build extension env: From 20ad323e144e09d421bc35dd78fec04cdf293974 Mon Sep 17 00:00:00 2001 From: Sam Ansmink Date: Thu, 27 Jun 2024 17:38:56 +0200 Subject: [PATCH 25/38] correct openssl path --- .github/workflows/LocalTesting.yml | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/.github/workflows/LocalTesting.yml b/.github/workflows/LocalTesting.yml index d74c7dd..0b2ea85 100644 --- a/.github/workflows/LocalTesting.yml +++ b/.github/workflows/LocalTesting.yml @@ -49,15 +49,10 @@ jobs: - name: Handle OpenSSL dependency for rust build run: | - echo "OPENSSL_ROOT_DIR=`pwd`/build/release/vcpkg_installed/${{ matrix.vcpkg_triplet }}" >> $GITHUB_ENV - echo "OPENSSL_DIR=`pwd`/build/release/vcpkg_installed/${{ matrix.vcpkg_triplet }}" >> $GITHUB_ENV + echo "OPENSSL_ROOT_DIR=`pwd`/build/release/vcpkg_installed/x64-linux" >> $GITHUB_ENV + echo "OPENSSL_DIR=`pwd`/build/release/vcpkg_installed/x64-linux" >> $GITHUB_ENV echo "OPENSSL_USE_STATIC_LIBS=true" >> $GITHUB_ENV - - name: Set Openssl dir - if: inputs.openssl_path != '' - shell: bash - run: | - # Build extension - name: Build extension env: From 9b0b86fd343fc59d3d96dfaa8f1689f4cddc8caa Mon Sep 17 00:00:00 2001 From: Sam Ansmink Date: Thu, 27 Jun 2024 18:16:28 +0200 Subject: [PATCH 26/38] actually run rust install --- .github/workflows/LocalTesting.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/LocalTesting.yml b/.github/workflows/LocalTesting.yml index 0b2ea85..ecdc23c 100644 --- a/.github/workflows/LocalTesting.yml +++ b/.github/workflows/LocalTesting.yml @@ -37,7 +37,6 @@ jobs: ./duckdb/scripts/setup_manylinux2014.sh general aws-cli ccache ssh python_alias openssl - name: Setup Rust for manylinux (dtolnay/rust-toolchain doesn't work due to curl being old here) - if: ${{ matrix.duckdb_arch == 'linux_amd64_gcc4' }} run: | curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y echo "$HOME/.cargo/bin" >> $GITHUB_PATH From d4454da51b22320bd7b0fa53539e46b95b5e39cf Mon Sep 17 00:00:00 2001 From: Sam Ansmink Date: Fri, 28 Jun 2024 09:35:27 +0200 Subject: [PATCH 27/38] fix upload script --- scripts/upload_test_files_to_azurite.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/upload_test_files_to_azurite.sh b/scripts/upload_test_files_to_azurite.sh index f1ae34e..f3631ba 100755 --- a/scripts/upload_test_files_to_azurite.sh +++ b/scripts/upload_test_files_to_azurite.sh @@ -14,7 +14,7 @@ copy_file() { az storage blob upload --file "${from}" --name "${to}" --container-name "delta-testing-public" --connection-string "${conn_string}" } -cd ./build/debug/rust/src/delta_kernel/acceptance/tests/dat/out/reader_tests/generated +cd ./build/release/rust/src/delta_kernel/acceptance/tests/dat/out/reader_tests/generated && while read filepath; do remote_filepath=dat/"$(echo "${filepath}" | cut -c 3-)" copy_file "${filepath}" "${remote_filepath}" From d23079edd39a1afee20e7e7e0c687337a479b79b Mon Sep 17 00:00:00 2001 From: Sam Ansmink Date: Fri, 31 May 2024 16:09:44 +0200 Subject: [PATCH 28/38] skip test case for windows for now --- src/delta_utils.cpp | 21 ++------------------- test/sql/dat/all.test | 3 +++ 2 files changed, 5 insertions(+), 19 deletions(-) diff --git a/src/delta_utils.cpp b/src/delta_utils.cpp index 03f6562..f6f8b3d 100644 --- a/src/delta_utils.cpp +++ b/src/delta_utils.cpp @@ -218,20 +218,8 @@ static bool CanHandleFilter(TableFilter *filter) { } } -// Prunes the list of predicates to ones that we can handle -static unordered_map PrunePredicates(unordered_map predicates) { - unordered_map result; - for (const auto &predicate : predicates) { - if (CanHandleFilter(predicate.second)) { - result[predicate.first] = predicate.second; - } - - } - return result; -} - uintptr_t PredicateVisitor::VisitPredicate(PredicateVisitor* predicate, ffi::KernelExpressionVisitorState* state) { - auto filters = predicate->column_filters; + auto &filters = predicate->column_filters; auto it = filters.begin(); auto end = filters.end(); @@ -244,12 +232,7 @@ uintptr_t PredicateVisitor::VisitPredicate(PredicateVisitor* predicate, ffi::Ker }; auto eit = EngineIteratorFromCallable(get_next); - // TODO: this should be fixed upstream? - try { - return visit_expression_and(state, &eit); - } catch (...) { - return ~0; - } + return visit_expression_and(state, &eit); } uintptr_t PredicateVisitor::VisitConstantFilter(const string &col_name, const ConstantFilter &filter, ffi::KernelExpressionVisitorState* state) { diff --git a/test/sql/dat/all.test b/test/sql/dat/all.test index 676047b..c25c646 100644 --- a/test/sql/dat/all.test +++ b/test/sql/dat/all.test @@ -76,6 +76,9 @@ SELECT letter, date, decode(data) as data, number FROM parquet_scan('${DAT_PATH}/out/reader_tests/generated/multi_partitioned/expected/latest/**/*.parquet') ---- +# TODO: fix this +require notwindows + # multi_partitioned_2 query I rowsort multi_partitioned_2 SELECT * From 230dfa387e36870d32e42bd1e0bb72c454cdc7f5 Mon Sep 17 00:00:00 2001 From: Sam Ansmink Date: Tue, 2 Jul 2024 15:28:41 +0200 Subject: [PATCH 29/38] remove workflow override --- .../workflows/MainDistributionPipeline.yml | 3 +- .github/workflows/_extension_distribution.yml | 411 ------------------ 2 files changed, 2 insertions(+), 412 deletions(-) delete mode 100644 .github/workflows/_extension_distribution.yml diff --git a/.github/workflows/MainDistributionPipeline.yml b/.github/workflows/MainDistributionPipeline.yml index abdc095..c2644d4 100644 --- a/.github/workflows/MainDistributionPipeline.yml +++ b/.github/workflows/MainDistributionPipeline.yml @@ -14,10 +14,11 @@ concurrency: jobs: duckdb-stable-build: name: Build extension binaries - uses: ./.github/workflows/_extension_distribution.yml # Overridden due to rust dependency during build + uses: duckdb/extension-ci-tools/.github/workflows/_extension_distribution.yml@v1.0.0 with: duckdb_version: v1.0.0 extension_name: delta + enable_rust: true exclude_archs: 'wasm_mvp;wasm_eh;wasm_threads;windows_amd64_rtools' duckdb-stable-deploy: diff --git a/.github/workflows/_extension_distribution.yml b/.github/workflows/_extension_distribution.yml deleted file mode 100644 index a536982..0000000 --- a/.github/workflows/_extension_distribution.yml +++ /dev/null @@ -1,411 +0,0 @@ -# Reusable workflow for extension building - -name: Extension distribution -on: - workflow_call: - inputs: - # The name with which the extension will be built - extension_name: - required: true - type: string - # DuckDB version to build against, should in most cases be identical to - duckdb_version: - required: true - type: string - # ';' separated list of architectures to exclude, for example: 'linux_amd64;osx_arm64' - exclude_archs: - required: false - type: string - default: "" - # Postfix added to artifact names. Can be used to guarantee unique names when this workflow is called multiple times - artifact_postfix: - required: false - type: string - default: "" - # Override the default vcpkg commit used by this version of DuckDB - vcpkg_commit: - required: false - type: string - default: "a1a1cbc975abf909a6c8985a6a2b8fe20bbd9bd6" - # Override the default script producing the matrices. Allows specifying custom matrices. - matrix_parse_script: - required: false - type: string - default: "./duckdb/scripts/modify_distribution_matrix.py" - # Enable building the DuckDB Shell - build_duckdb_shell: - required: false - type: boolean - default: true - -jobs: - generate_matrix: - name: Generate matrix - runs-on: ubuntu-latest - outputs: - linux_matrix: ${{ steps.set-matrix-linux.outputs.linux_matrix }} - windows_matrix: ${{ steps.set-matrix-windows.outputs.windows_matrix }} - osx_matrix: ${{ steps.set-matrix-osx.outputs.osx_matrix }} - wasm_matrix: ${{ steps.set-matrix-wasm.outputs.wasm_matrix }} - steps: - - uses: actions/checkout@v3 - with: - fetch-depth: 0 - submodules: 'true' - - - name: Checkout DuckDB to version - run: | - cd duckdb - git checkout ${{ inputs.duckdb_version }} - - - id: parse-matrices - run: | - python3 ${{ inputs.matrix_parse_script }} --input ./duckdb/.github/config/distribution_matrix.json --select_os linux --output linux_matrix.json --exclude "${{ inputs.exclude_archs }}" --pretty - python3 ${{ inputs.matrix_parse_script }} --input ./duckdb/.github/config/distribution_matrix.json --select_os osx --output osx_matrix.json --exclude "${{ inputs.exclude_archs }}" --pretty - python3 ${{ inputs.matrix_parse_script }} --input ./duckdb/.github/config/distribution_matrix.json --select_os windows --output windows_matrix.json --exclude "${{ inputs.exclude_archs }}" --pretty - python3 ${{ inputs.matrix_parse_script }} --input ./duckdb/.github/config/distribution_matrix.json --select_os wasm --output wasm_matrix.json --exclude "${{ inputs.exclude_archs }}" --pretty - - - id: set-matrix-linux - run: | - linux_matrix="`cat linux_matrix.json`" - echo linux_matrix=$linux_matrix >> $GITHUB_OUTPUT - echo `cat $GITHUB_OUTPUT` - - - id: set-matrix-osx - run: | - osx_matrix="`cat osx_matrix.json`" - echo osx_matrix=$osx_matrix >> $GITHUB_OUTPUT - echo `cat $GITHUB_OUTPUT` - - - id: set-matrix-windows - run: | - windows_matrix="`cat windows_matrix.json`" - echo windows_matrix=$windows_matrix >> $GITHUB_OUTPUT - echo `cat $GITHUB_OUTPUT` - - - id: set-matrix-wasm - run: | - wasm_matrix="`cat wasm_matrix.json`" - echo wasm_matrix=$wasm_matrix >> $GITHUB_OUTPUT - echo `cat $GITHUB_OUTPUT` - - linux: - name: Linux - runs-on: ubuntu-latest - container: ${{ matrix.container }} - needs: generate_matrix - if: ${{ needs.generate_matrix.outputs.linux_matrix != '{}' && needs.generate_matrix.outputs.linux_matrix != '' }} - strategy: - matrix: ${{fromJson(needs.generate_matrix.outputs.linux_matrix)}} - env: - VCPKG_TARGET_TRIPLET: ${{ matrix.vcpkg_triplet }} - VCPKG_TOOLCHAIN_PATH: ${{ github.workspace }}/vcpkg/scripts/buildsystems/vcpkg.cmake - GEN: Ninja - BUILD_SHELL: ${{ inputs.build_duckdb_shell && '1' || '0' }} - DUCKDB_PLATFORM: ${{ matrix.duckdb_arch }} - - steps: - - name: Install required ubuntu packages - if: ${{ matrix.duckdb_arch == 'linux_amd64' || matrix.duckdb_arch == 'linux_arm64' }} - run: | - apt-get update -y -qq - apt-get install -y -qq software-properties-common - add-apt-repository ppa:git-core/ppa - apt-get update -y -qq - apt-get install -y -qq ninja-build make gcc-multilib g++-multilib libssl-dev wget openjdk-8-jdk zip maven unixodbc-dev libc6-dev-i386 lib32readline6-dev libssl-dev libcurl4-gnutls-dev libexpat1-dev gettext unzip build-essential checkinstall libffi-dev curl libz-dev openssh-client - - - name: Install Git 2.18.5 - if: ${{ matrix.duckdb_arch == 'linux_amd64' || matrix.duckdb_arch == 'linux_arm64' }} - run: | - wget https://github.com/git/git/archive/refs/tags/v2.18.5.tar.gz - tar xvf v2.18.5.tar.gz - cd git-2.18.5 - make - make prefix=/usr install - git --version - - - uses: actions/checkout@v3 - with: - fetch-depth: 0 - submodules: 'true' - - - name: Checkout DuckDB to version - run: | - cd duckdb - git checkout ${{ inputs.duckdb_version }} - - - name: Setup ManyLinux2014 - if: ${{ matrix.duckdb_arch == 'linux_amd64_gcc4' }} - run: | - ./duckdb/scripts/setup_manylinux2014.sh general aws-cli ccache ssh python_alias openssl - - - name: Setup Rust - if: ${{ matrix.duckdb_arch == 'linux_amd64'}} - uses: dtolnay/rust-toolchain@stable - - - name: Setup Rust for cross compilation - if: ${{ matrix.duckdb_arch == 'linux_arm64'}} - uses: dtolnay/rust-toolchain@stable - with: - targets: aarch64-unknown-linux-gnu - - - name: Setup Rust for manylinux (dtolnay/rust-toolchain doesn't work due to curl being old here) - if: ${{ matrix.duckdb_arch == 'linux_amd64_gcc4' }} - run: | - curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y - echo "$HOME/.cargo/bin" >> $GITHUB_PATH - - - name: Setup Ccache - uses: hendrikmuhs/ccache-action@v1.2.11 # Note: pinned due to GLIBC incompatibility in later releases - continue-on-error: true - with: - key: ${{ github.job }}-${{ matrix.duckdb_arch }} - - - name: Setup Ubuntu - if: ${{ matrix.duckdb_arch == 'linux_amd64' || matrix.duckdb_arch == 'linux_arm64' }} - uses: ./duckdb/.github/actions/ubuntu_18_setup - with: - aarch64_cross_compile: ${{ matrix.duckdb_arch == 'linux_arm64' && 1 }} - - - name: Setup vcpkg - uses: lukka/run-vcpkg@v11.1 - with: - vcpkgGitCommitId: ${{ inputs.vcpkg_commit }} - - - name: Handle OpenSSL dependency for rust build - run: | - echo "OPENSSL_ROOT_DIR=`pwd`/build/release/vcpkg_installed/${{ matrix.vcpkg_triplet }}" >> $GITHUB_ENV - echo "OPENSSL_DIR=`pwd`/build/release/vcpkg_installed/${{ matrix.vcpkg_triplet }}" >> $GITHUB_ENV - echo "OPENSSL_USE_STATIC_LIBS=true" >> $GITHUB_ENV - - - name: Set Openssl dir - if: inputs.openssl_path != '' - shell: bash - run: | - - - name: Build extension - env: - GEN: ninja - CC: ${{ matrix.duckdb_arch == 'linux_arm64' && 'aarch64-linux-gnu-gcc' || '' }} - CXX: ${{ matrix.duckdb_arch == 'linux_arm64' && 'aarch64-linux-gnu-g++' || '' }} - DUCKDB_PLATFORM: ${{ matrix.duckdb_arch }} - run: | - make release - - - name: Test extension - if: ${{ matrix.duckdb_arch != 'linux_arm64'}} - run: | - make test - - - name: Error log - if: always() - run: | - echo "ERROR LOG" - cat build/release/rust/src/delta_kernel-stamp/delta_kernel-build-*.log - - - - uses: actions/upload-artifact@v2 - with: - name: ${{ inputs.extension_name }}-${{ inputs.duckdb_version }}-extension-${{matrix.duckdb_arch}}${{inputs.artifact_postfix}} - path: | - build/release/extension/${{ inputs.extension_name }}/${{ inputs.extension_name }}.duckdb_extension - - macos: - name: MacOS - runs-on: macos-latest - needs: generate_matrix - if: ${{ needs.generate_matrix.outputs.osx_matrix != '{}' && needs.generate_matrix.outputs.osx_matrix != '' }} - strategy: - matrix: ${{fromJson(needs.generate_matrix.outputs.osx_matrix)}} - env: - VCPKG_TOOLCHAIN_PATH: ${{ github.workspace }}/vcpkg/scripts/buildsystems/vcpkg.cmake - VCPKG_TARGET_TRIPLET: ${{ matrix.vcpkg_triplet }} - OSX_BUILD_ARCH: ${{ matrix.osx_build_arch }} - GEN: Ninja - BUILD_SHELL: ${{ inputs.build_duckdb_shell && '1' || '0' }} - DUCKDB_PLATFORM: ${{ matrix.duckdb_arch }} - - steps: - - uses: actions/checkout@v3 - with: - fetch-depth: 0 - submodules: 'true' - - - name: Install Ninja - run: | - brew install ninja - - - name: Setup Ccache - uses: hendrikmuhs/ccache-action@main - continue-on-error: true - with: - key: ${{ github.job }}-${{ matrix.duckdb_arch }} - - - uses: actions/setup-python@v5 - with: - python-version: '3.11' - - - name: Checkout DuckDB to version - run: | - cd duckdb - git checkout ${{ inputs.duckdb_version }} - - - name: Setup vcpkg - uses: lukka/run-vcpkg@v11.1 - with: - vcpkgGitCommitId: ${{ inputs.vcpkg_commit }} - - - name: Install Rust cross compile dependency - if: ${{ matrix.osx_build_arch == 'x86_64'}} - run: | - rustup target add x86_64-apple-darwin - - - name: Build extension - shell: bash - env: - DUCKDB_PLATFORM: ${{ matrix.duckdb_arch }} - run: | - make release - - - name: Test Extension - if: ${{ matrix.osx_build_arch == 'arm64'}} - shell: bash - run: | - make test - - - name: Error log - if: always() - run: | - echo "ERROR LOG" - cat build/release/rust/src/delta_kernel-stamp/delta_kernel-build-*.log - - - uses: actions/upload-artifact@v2 - with: - name: ${{ inputs.extension_name }}-${{ inputs.duckdb_version }}-extension-${{matrix.duckdb_arch}}${{inputs.artifact_postfix}} - path: | - build/release/extension/${{ inputs.extension_name }}/${{ inputs.extension_name }}.duckdb_extension - - windows: - name: Windows - runs-on: windows-latest - needs: generate_matrix - if: ${{ needs.generate_matrix.outputs.windows_matrix != '{}' && needs.generate_matrix.outputs.windows_matrix != '' }} - strategy: - matrix: ${{fromJson(needs.generate_matrix.outputs.windows_matrix)}} - env: - GEN: Ninja - VCPKG_TOOLCHAIN_PATH: ${{ github.workspace }}/vcpkg/scripts/buildsystems/vcpkg.cmake - VCPKG_TARGET_TRIPLET: ${{ matrix.vcpkg_triplet }} - BUILD_SHELL: ${{ inputs.build_duckdb_shell && '1' || '0' }} - DUCKDB_PLATFORM: ${{ matrix.duckdb_arch }} - CC: ${{ matrix.duckdb_arch == 'windows_amd64_rtools' && 'gcc' || '' }} - CXX: ${{ matrix.duckdb_arch == 'windows_amd64_rtools' && 'g++' || '' }} - - steps: - - name: Keep \n line endings - shell: bash - run: | - git config --global core.autocrlf false - git config --global core.eol lf - - - uses: actions/checkout@v3 - with: - fetch-depth: 0 - submodules: 'true' - - - uses: actions/setup-python@v5 - with: - python-version: '3.11' - - - name: Setup Rust - uses: dtolnay/rust-toolchain@stable - - - uses: r-lib/actions/setup-r@v2 - if: matrix.duckdb_arch == 'windows_amd64_rtools' - with: - r-version: 'devel' - update-rtools: true - rtools-version: '42' # linker bug in 43 - - - name: Checkout DuckDB to version - run: | - cd duckdb - git checkout ${{ inputs.duckdb_version }} - - - name: Setup Ccache - uses: hendrikmuhs/ccache-action@main - continue-on-error: true - with: - key: ${{ github.job }}-${{ matrix.duckdb_arch }} - - - name: Setup vcpkg - uses: lukka/run-vcpkg@v11.1 - with: - vcpkgGitCommitId: ${{ inputs.vcpkg_commit }} - - - name: Build & test extension - env: - VCPKG_OVERLAY_TRIPLETS: "${{ github.workspace }}/overlay_triplets" - DUCKDB_PLATFORM: ${{ matrix.duckdb_arch }} - run: | - make test_release - - - name: Error log - if: always() - run: | - cat build/release/rust/src/delta_kernel-stamp/delta_kernel-build-*.log - - - uses: actions/upload-artifact@v2 - with: - name: ${{ inputs.extension_name }}-${{ inputs.duckdb_version }}-extension-${{matrix.duckdb_arch}}${{inputs.artifact_postfix}} - path: | - build/release/extension/${{ inputs.extension_name }}/${{ inputs.extension_name }}.duckdb_extension - - wasm: - name: DuckDB-Wasm - runs-on: ubuntu-latest - needs: generate_matrix - if: ${{ needs.generate_matrix.outputs.wasm_matrix != '{}' && needs.generate_matrix.outputs.wasm_matrix != '' }} - strategy: - matrix: ${{fromJson(needs.generate_matrix.outputs.wasm_matrix)}} - env: - VCPKG_TARGET_TRIPLET: ${{ matrix.vcpkg_triplet }} - VCPKG_TOOLCHAIN_PATH: ${{ github.workspace }}/vcpkg/scripts/buildsystems/vcpkg.cmake - GEN: Ninja - DUCKDB_PLATFORM: ${{ matrix.duckdb_arch }} - - steps: - - uses: actions/checkout@v3 - with: - fetch-depth: 0 - submodules: 'true' - - - name: Checkout DuckDB to version - run: | - cd duckdb - git checkout ${{ inputs.duckdb_version }} - - - uses: mymindstorm/setup-emsdk@v13 - with: - version: 'latest' - - - name: Setup vcpkg - uses: lukka/run-vcpkg@v11.1 - with: - vcpkgGitCommitId: ${{ inputs.vcpkg_commit }} - - - name: Setup Ccache - uses: hendrikmuhs/ccache-action@main - continue-on-error: true - with: - key: ${{ github.job }}-${{ matrix.duckdb_arch }} - - - name: Build Wasm module - run: | - make ${{ matrix.duckdb_arch }} - - - uses: actions/upload-artifact@v3 - with: - name: ${{ inputs.extension_name }}-${{ inputs.duckdb_version }}-extension-${{matrix.duckdb_arch}}${{inputs.artifact_postfix}} - path: | - build/${{ matrix.duckdb_arch }}/${{ inputs.extension_name }}.duckdb_extension.wasm From a392ea517720dfe08276d74e32f7e9e78c7585b9 Mon Sep 17 00:00:00 2001 From: Sam Ansmink Date: Wed, 3 Jul 2024 11:24:28 +0200 Subject: [PATCH 30/38] Update README.md add newly supported platforms linux_arm64 and windows_amd64 --- README.md | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 686c252..e10bd74 100644 --- a/README.md +++ b/README.md @@ -5,8 +5,9 @@ tables, both local and remote. # Supported platforms The supported platforms are: -- `linux_amd64` and `linux_amd64_gcc4` +- `linux_amd64` and `linux_amd64_gcc4` and `linux_arm64` - `osx_amd64` and `osx_arm64` +- `windows_amd64` Support for the [other](https://duckdb.org/docs/extensions/working_with_extensions#platforms) DuckDB platforms is work-in-progress @@ -68,4 +69,4 @@ To also run the tests on generated data: ```shell make generate-data GENERATED_DATA_AVAILABLE=1 make test -``` \ No newline at end of file +``` From 3e33b4967eac2cb4e6725eac52336fe0e319de59 Mon Sep 17 00:00:00 2001 From: Sam Ansmink Date: Fri, 12 Jul 2024 11:05:42 +0200 Subject: [PATCH 31/38] add support for minio, r2, gcs --- .github/workflows/LocalTesting.yml | 87 ++++++++++++++ extension-ci-tools | 2 +- extension_config.cmake | 7 ++ scripts/create_minio_credential_file.sh | 43 +++++++ scripts/upload_test_files_to_minio.sh | 4 + src/functions/delta_scan.cpp | 75 +++++++++++- test/sql/cloud/minio_local/gcs_r2.test | 93 +++++++++++++++ test/sql/cloud/minio_local/minio_local.test | 121 ++++++++++++++++++++ vcpkg.json | 7 +- 9 files changed, 432 insertions(+), 7 deletions(-) create mode 100755 scripts/create_minio_credential_file.sh create mode 100755 scripts/upload_test_files_to_minio.sh create mode 100644 test/sql/cloud/minio_local/gcs_r2.test create mode 100644 test/sql/cloud/minio_local/minio_local.test diff --git a/.github/workflows/LocalTesting.yml b/.github/workflows/LocalTesting.yml index ecdc23c..a424ebf 100644 --- a/.github/workflows/LocalTesting.yml +++ b/.github/workflows/LocalTesting.yml @@ -18,6 +18,7 @@ jobs: VCPKG_TOOLCHAIN_PATH: ${{ github.workspace }}/vcpkg/scripts/buildsystems/vcpkg.cmake AZURE_STORAGE_CONNECTION_STRING: 'DefaultEndpointsProtocol=http;AccountName=devstoreaccount1;AccountKey=Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==;BlobEndpoint=http://127.0.0.1:10000/devstoreaccount1;QueueEndpoint=http://127.0.0.1:10001/devstoreaccount1;TableEndpoint=http://127.0.0.1:10002/devstoreaccount1;' AZURE_STORAGE_ACCOUNT: devstoreaccount1 + ACTIONS_ALLOW_USE_UNSECURE_NODE_VERSION: true steps: - uses: actions/checkout@v3 @@ -76,6 +77,92 @@ jobs: echo "## azurite" cat azurite_log.txt + minio-tests-linux: + name: Minio (local S3 test server) tests (Linux) + runs-on: ubuntu-latest + env: + S3_TEST_SERVER_AVAILABLE: 1 + GEN: ninja + VCPKG_TARGET_TRIPLET: x64-linux + VCPKG_TOOLCHAIN_PATH: ${{ github.workspace }}/vcpkg/scripts/buildsystems/vcpkg.cmake + + steps: + - uses: actions/checkout@v3 + with: + fetch-depth: 0 + submodules: 'true' + + - name: Checkout DuckDB to version + if: ${{ matrix.duckdb_version != ''}} + run: | + cd duckdb + git checkout ${{ matrix.duckdb_version }} + + - uses: actions/setup-python@v4 + with: + python-version: '3.11' + + - name: Install Ninja + shell: bash + run: sudo apt-get update -y -qq && sudo apt-get install -y -qq ninja-build + + - name: Setup Ccache + uses: hendrikmuhs/ccache-action@main + with: + key: ${{ github.job }} + save: ${{ github.ref == 'refs/heads/main' || github.repository != 'duckdb/duckdb' }} + + - name: Setup vcpkg + uses: lukka/run-vcpkg@v11.1 + with: + vcpkgGitCommitId: a1a1cbc975abf909a6c8985a6a2b8fe20bbd9bd6 + + - name: Build + shell: bash + run: make + + - name: Start S3/HTTP test server + shell: bash + run: | + cd duckdb + mkdir data/attach_test + touch data/attach_test/attach.db + sudo ./scripts/install_s3_test_server.sh + source ./scripts/run_s3_test_server.sh + sleep 30 + + - name: Write AWS credentials file + shell: bash + run: | + ./scripts/create_minio_credential_file.sh + + - name: Copy files to minio + shell: bash + env: + DUCKDB_MINIO_TEST_SERVER_AVAILABLE: 1 + AWS_ACCESS_KEY_ID: minio_duckdb_user + AWS_SECRET_ACCESS_KEY: minio_duckdb_user_password + AWS_DEFAULT_REGION: eu-west-1 + AWS_ENDPOINT: duckdb-minio.com:9000 + run: | + ./scripts/upload_test_files_to_minio.sh + + - name: Test + shell: bash + run: | + make test + + - name: Run Env tests + shell: bash + env: + DUCKDB_MINIO_TEST_SERVER_AVAILABLE: 1 + AWS_ACCESS_KEY_ID: minio_duckdb_user + AWS_SECRET_ACCESS_KEY: minio_duckdb_user_password + AWS_DEFAULT_REGION: eu-west-1 + AWS_ENDPOINT: duckdb-minio.com:9000 + run: | + ./build/release/test/unittest "*/test/sql/cloud/minio_local/*" + generated-tests-linux: name: Generated Tests (Linux) runs-on: ubuntu-latest diff --git a/extension-ci-tools b/extension-ci-tools index 71b8a60..d6d09ae 160000 --- a/extension-ci-tools +++ b/extension-ci-tools @@ -1 +1 @@ -Subproject commit 71b8a603ea24b1ac8a2cff134aca28163576548f +Subproject commit d6d09ae94e71ae74d21f71bed5f9057accbb7505 diff --git a/extension_config.cmake b/extension_config.cmake index 16571c2..b2ba8c0 100644 --- a/extension_config.cmake +++ b/extension_config.cmake @@ -16,6 +16,13 @@ duckdb_extension_load(azure GIT_TAG 49b63dc8cd166952a0a34dfd54e6cfe5b823e05e ) +# Build the aws extension to test with credential providers +duckdb_extension_load(aws + LOAD_TESTS + GIT_URL https://github.com/duckdb/duckdb_aws + GIT_TAG 3d1f5c8d0127ff7aaf127935721b197e5fdd95e5 +) + # Build the tpch and tpcds extension for testing/benchmarking duckdb_extension_load(tpch) duckdb_extension_load(tpcds) diff --git a/scripts/create_minio_credential_file.sh b/scripts/create_minio_credential_file.sh new file mode 100755 index 0000000..c9f88d2 --- /dev/null +++ b/scripts/create_minio_credential_file.sh @@ -0,0 +1,43 @@ +#!/bin/bash +# Warning: overwrites your existing aws credentials file! + +# Set the file path for the credentials file +credentials_file=~/.aws/credentials + +# Set the file path for the config file +config_file=~/.aws/config + +# create dir if not already exists +mkdir -p ~/.aws + +# Create the credentials configuration +credentials_str="[default] +aws_access_key_id=minio_duckdb_user +aws_secret_access_key=minio_duckdb_user_password + +[minio-testing-2] +aws_access_key_id=minio_duckdb_user_2 +aws_secret_access_key=minio_duckdb_user_2_password + +[minio-testing-invalid] +aws_access_key_id=minio_duckdb_user_invalid +aws_secret_access_key=thispasswordiscompletelywrong +aws_session_token=completelybogussessiontoken +" + +# Write the credentials configuration to the file +echo "$credentials_str" > "$credentials_file" + +# Create the credentials configuration +config_str="[default] +region=eu-west-1 + +[profile minio-testing-2] +region=eu-west-1 + +[profile minio-testing-invalid] +region=the-moon-123 +" + +# Write the config to the file +echo "$config_str" > "$config_file" \ No newline at end of file diff --git a/scripts/upload_test_files_to_minio.sh b/scripts/upload_test_files_to_minio.sh new file mode 100755 index 0000000..c5723c7 --- /dev/null +++ b/scripts/upload_test_files_to_minio.sh @@ -0,0 +1,4 @@ +#!/bin/bash + +aws s3 cp --endpoint-url http://duckdb-minio.com:9000 --recursive ./build/release/rust/src/delta_kernel/acceptance/tests/dat/out/reader_tests/generated "s3://test-bucket/dat" +aws s3 cp --endpoint-url http://duckdb-minio.com:9000 --recursive ./build/release/rust/src/delta_kernel/acceptance/tests/dat/out/reader_tests/generated "s3://test-bucket-public/dat" \ No newline at end of file diff --git a/src/functions/delta_scan.cpp b/src/functions/delta_scan.cpp index 5dae760..5d50c8b 100644 --- a/src/functions/delta_scan.cpp +++ b/src/functions/delta_scan.cpp @@ -112,7 +112,14 @@ static ffi::EngineBuilder* CreateBuilder(ClientContext &context, const string &p ffi::EngineBuilder* builder; // For "regular" paths we early out with the default builder config - if (!StringUtil::StartsWith(path, "s3://") && !StringUtil::StartsWith(path, "azure://") && !StringUtil::StartsWith(path, "az://") && !StringUtil::StartsWith(path, "abfs://") && !StringUtil::StartsWith(path, "abfss://")) { + if (!StringUtil::StartsWith(path, "s3://") && + !StringUtil::StartsWith(path, "gcs://") && + !StringUtil::StartsWith(path, "gs://") && + !StringUtil::StartsWith(path, "r2://") && + !StringUtil::StartsWith(path, "azure://") && + !StringUtil::StartsWith(path, "az://") && + !StringUtil::StartsWith(path, "abfs://") && + !StringUtil::StartsWith(path, "abfss://")) { auto interface_builder_res = ffi::get_engine_builder(KernelUtils::ToDeltaString(path), DuckDBEngineError::AllocateError); return KernelUtils::UnpackResult(interface_builder_res, "get_engine_interface_builder for path " + path); } @@ -130,6 +137,33 @@ static ffi::EngineBuilder* CreateBuilder(ClientContext &context, const string &p bucket = path.substr(5, end_of_container-5); path_in_bucket = path.substr(end_of_container); secret_type = "s3"; + } else if (StringUtil::StartsWith(path, "gcs://")) { + auto end_of_container = path.find('/',6); + + if(end_of_container == string::npos) { + throw IOException("Invalid gcs url passed to delta scan: %s", path); + } + bucket = path.substr(6, end_of_container-6); + path_in_bucket = path.substr(end_of_container); + secret_type = "gcs"; + } else if (StringUtil::StartsWith(path, "gs://")) { + auto end_of_container = path.find('/',5); + + if(end_of_container == string::npos) { + throw IOException("Invalid gcs url passed to delta scan: %s", path); + } + bucket = path.substr(5, end_of_container-5); + path_in_bucket = path.substr(end_of_container); + secret_type = "gcs"; + } else if (StringUtil::StartsWith(path, "r2://")) { + auto end_of_container = path.find('/',5); + + if(end_of_container == string::npos) { + throw IOException("Invalid gcs url passed to delta scan: %s", path); + } + bucket = path.substr(5, end_of_container-5); + path_in_bucket = path.substr(end_of_container); + secret_type = "r2"; } else if ((StringUtil::StartsWith(path, "azure://")) || (StringUtil::StartsWith(path, "abfss://"))) { auto end_of_container = path.find('/',8); @@ -159,8 +193,18 @@ static ffi::EngineBuilder* CreateBuilder(ClientContext &context, const string &p secret_type = "azure"; } - auto interface_builder_res = ffi::get_engine_builder(KernelUtils::ToDeltaString(path), DuckDBEngineError::AllocateError); - builder = KernelUtils::UnpackResult(interface_builder_res, "get_engine_interface_builder for path " + path); + // We need to substitute DuckDB's usage of s3 and r2 paths because delta kernel needs to just interpret them as s3 protocol servers. + string cleaned_path; + if (StringUtil::StartsWith(path, "r2://") || StringUtil::StartsWith(path, "gs://") ) { + cleaned_path = "s3://" + path.substr(5); + } else if (StringUtil::StartsWith(path, "gcs://")) { + cleaned_path = "s3://" + path.substr(6); + } else { + cleaned_path = path; + } + + auto interface_builder_res = ffi::get_engine_builder(KernelUtils::ToDeltaString(cleaned_path), DuckDBEngineError::AllocateError); + builder = KernelUtils::UnpackResult(interface_builder_res, "get_engine_interface_builder for path " + cleaned_path); // For S3 or Azure paths we need to trim the url, set the container, and fetch a potential secret auto &secret_manager = SecretManager::Get(context); @@ -170,18 +214,24 @@ static ffi::EngineBuilder* CreateBuilder(ClientContext &context, const string &p // No secret: nothing left to do here! if (!secret_match.HasMatch()) { + if (StringUtil::StartsWith(path, "r2://") || StringUtil::StartsWith(path, "gs://") || StringUtil::StartsWith(path, "gcs://")) { + throw NotImplementedException("Can not scan a gcs:// gs:// or r2:// url without a secret providing its endpoint currently. Please create an R2 or GCS secret containing the credentials for this endpoint and try again."); + } + return builder; } const auto &kv_secret = dynamic_cast(*secret_match.secret_entry->secret); - // Here you would need to add the logic for setting the builder options for Azure // This is just a placeholder and will need to be replaced with the actual logic - if (secret_type == "s3") { + if (secret_type == "s3" || secret_type == "gcs" || secret_type == "r2") { auto key_id = kv_secret.TryGetValue("key_id").ToString(); auto secret = kv_secret.TryGetValue("secret").ToString(); auto session_token = kv_secret.TryGetValue("session_token").ToString(); auto region = kv_secret.TryGetValue("region").ToString(); + auto endpoint = kv_secret.TryGetValue("endpoint").ToString(); + auto use_ssl = kv_secret.TryGetValue("use_ssl").ToString(); + auto url_style = kv_secret.TryGetValue("url_style").ToString(); if (key_id.empty() && secret.empty()) { ffi::set_builder_option(builder, KernelUtils::ToDeltaString("skip_signature"), KernelUtils::ToDeltaString("true")); @@ -196,6 +246,21 @@ static ffi::EngineBuilder* CreateBuilder(ClientContext &context, const string &p if (!session_token.empty()) { ffi::set_builder_option(builder, KernelUtils::ToDeltaString("aws_session_token"), KernelUtils::ToDeltaString(session_token)); } + if (!endpoint.empty() && endpoint != "s3.amazonaws.com") { + if(!StringUtil::StartsWith(endpoint, "https://") && !StringUtil::StartsWith(endpoint, "http://")) { + if(use_ssl == "1" || use_ssl == "NULL") { + endpoint = "https://" + endpoint; + } else { + endpoint = "http://" + endpoint; + } + } + + if (StringUtil::StartsWith(endpoint, "http://")) { + ffi::set_builder_option(builder, KernelUtils::ToDeltaString("allow_http"), KernelUtils::ToDeltaString("true")); + } + ffi::set_builder_option(builder, KernelUtils::ToDeltaString("aws_endpoint"), KernelUtils::ToDeltaString(endpoint)); + } + ffi::set_builder_option(builder, KernelUtils::ToDeltaString("aws_region"), KernelUtils::ToDeltaString(region)); } else if (secret_type == "azure") { diff --git a/test/sql/cloud/minio_local/gcs_r2.test b/test/sql/cloud/minio_local/gcs_r2.test new file mode 100644 index 0000000..319380c --- /dev/null +++ b/test/sql/cloud/minio_local/gcs_r2.test @@ -0,0 +1,93 @@ +# name: test/sql/cloud/minio_local/gcs_r2.test +# description: test delta extension with GCS and R2 +# group: [aws] + +require httpfs + +require parquet + +require delta + +require aws + +require-env DUCKDB_MINIO_TEST_SERVER_AVAILABLE + +require-env AWS_ACCESS_KEY_ID + +require-env AWS_SECRET_ACCESS_KEY + +require-env AWS_DEFAULT_REGION + +require-env AWS_ENDPOINT + +statement ok +set secret_directory='__TEST_DIR__/minio_local_gcs_env' + +statement error +FROM delta_scan('gcs://test-bucket/dat/all_primitive_types/delta') +---- +Can not scan a gcs:// gs:// or r2:// url without a secret providing its endpoint currently. Please create an R2 or GCS secret containing the credentials for this endpoint and try again. + +statement error +FROM delta_scan('gs://test-bucket/dat/all_primitive_types/delta') +---- +Can not scan a gcs:// gs:// or r2:// url without a secret providing its endpoint currently. Please create an R2 or GCS secret containing the credentials for this endpoint and try again. + +statement error +FROM delta_scan('r2://test-bucket/dat/all_primitive_types/delta') +---- +Can not scan a gcs:// gs:// or r2:// url without a secret providing its endpoint currently. Please create an R2 or GCS secret containing the credentials for this endpoint and try again. + +# create a fake gcs secret +statement ok +CREATE SECRET ( + TYPE GCS, + KEY_ID '${AWS_ACCESS_KEY_ID}', + SECRET '${AWS_SECRET_ACCESS_KEY}', + REGION '${AWS_DEFAULT_REGION}', + ENDPOINT '${AWS_ENDPOINT}', + USE_SSL false +) + +query I +SELECT int32 +FROM delta_scan('gcs://test-bucket-public/dat/all_primitive_types/delta') +---- +0 +1 +2 +3 +4 + +query I +SELECT int32 +FROM delta_scan('gs://test-bucket-public/dat/all_primitive_types/delta') +---- +0 +1 +2 +3 +4 + +# create a fake r2 secret +statement ok +CREATE SECRET s1 ( + TYPE R2, + PROVIDER config, + account_id 'some_bogus_account', + KEY_ID '${AWS_ACCESS_KEY_ID}', + SECRET '${AWS_SECRET_ACCESS_KEY}', + REGION '${AWS_DEFAULT_REGION}', + ENDPOINT '${AWS_ENDPOINT}', + USE_SSL false +) + +query I +SELECT int32 +FROM delta_scan('r2://test-bucket-public/dat/all_primitive_types/delta') +---- +0 +1 +2 +3 +4 diff --git a/test/sql/cloud/minio_local/minio_local.test b/test/sql/cloud/minio_local/minio_local.test new file mode 100644 index 0000000..28031f7 --- /dev/null +++ b/test/sql/cloud/minio_local/minio_local.test @@ -0,0 +1,121 @@ +# name: test/sql/cloud/minio_local/aws_secret_chains_env.test +# description: test delta extension with a local minio installation +# group: [aws] + +require httpfs + +require parquet + +require delta + +require aws + +require-env DUCKDB_MINIO_TEST_SERVER_AVAILABLE + +require-env AWS_ACCESS_KEY_ID + +require-env AWS_SECRET_ACCESS_KEY + +require-env AWS_DEFAULT_REGION + +require-env AWS_ENDPOINT + +statement ok +set secret_directory='__TEST_DIR__/aws_secret_chains_env' + +# Secret with just the endpoint +statement ok +CREATE SECRET s1 ( + TYPE S3, + ENDPOINT '${AWS_ENDPOINT}', + USE_SSL false +); + +# We need auth for this +statement error +SELECT int32 +FROM delta_scan('s3://test-bucket/dat/all_primitive_types/delta') +---- +IO Error + +# unauthenticated query is fine! +query I +SELECT int32 +FROM delta_scan('s3://test-bucket-public/dat/all_primitive_types/delta') +---- +0 +1 +2 +3 +4 + +statement ok +DROP SECRET S1; + +# Now we create a config secret with credentials +statement ok +CREATE SECRET s1 ( + TYPE S3, + PROVIDER config, + KEY_ID '${AWS_ACCESS_KEY_ID}', + SECRET '${AWS_SECRET_ACCESS_KEY}', + REGION '${AWS_DEFAULT_REGION}', + ENDPOINT '${AWS_ENDPOINT}', + USE_SSL false +); + +# Public bucket now does work +query I +SELECT int32 +FROM delta_scan('s3://test-bucket-public/dat/all_primitive_types/delta') +---- +0 +1 +2 +3 +4 + +# Private bucket now does work too +query I +SELECT int32 +FROM delta_scan('s3://test-bucket/dat/all_primitive_types/delta') +---- +0 +1 +2 +3 +4 + +statement ok +DROP SECRET S1; + +# Now we create a credential chain secret that searches the env vars automatically +statement ok +CREATE SECRET s1 ( + TYPE S3, + PROVIDER credential_chain, + ENDPOINT '${AWS_ENDPOINT}', + USE_SSL false +); + +# Still works! +query I +SELECT int32 +FROM delta_scan('s3://test-bucket-public/dat/all_primitive_types/delta') +---- +0 +1 +2 +3 +4 + +# Still works! +query I +SELECT int32 +FROM delta_scan('s3://test-bucket/dat/all_primitive_types/delta') +---- +0 +1 +2 +3 +4 diff --git a/vcpkg.json b/vcpkg.json index 0cefd94..8e8245d 100644 --- a/vcpkg.json +++ b/vcpkg.json @@ -3,6 +3,11 @@ "azure-identity-cpp", "azure-storage-blobs-cpp", "azure-storage-files-datalake-cpp", - "openssl" + "openssl", + "zlib", + { + "name": "aws-sdk-cpp", + "features": [ "sts" ] + } ] } \ No newline at end of file From e0add7b43b2d394e912acbf0ff01ff2a08a63bd8 Mon Sep 17 00:00:00 2001 From: Sam Ansmink Date: Mon, 15 Jul 2024 11:23:24 +0200 Subject: [PATCH 32/38] fix bug with count star and partition values --- src/functions/delta_scan.cpp | 3 +++ test/sql/dat/all.test | 41 +++++++++++++++++++++++++++++++++++- 2 files changed, 43 insertions(+), 1 deletion(-) diff --git a/src/functions/delta_scan.cpp b/src/functions/delta_scan.cpp index 5d50c8b..e07c391 100644 --- a/src/functions/delta_scan.cpp +++ b/src/functions/delta_scan.cpp @@ -557,6 +557,9 @@ void DeltaMultiFileReader::FinalizeBind(const MultiFileReaderOptions &file_optio if (!file_metadata->partition_map.empty()) { for (idx_t i = 0; i < global_column_ids.size(); i++) { column_t col_id = global_column_ids[i]; + if (IsRowIdColumnId(col_id)) { + continue; + } auto col_partition_entry = file_metadata->partition_map.find(global_names[col_id]); if (col_partition_entry != file_metadata->partition_map.end()) { // Todo: use https://github.com/delta-io/delta/blob/master/PROTOCOL.md#partition-value-serialization diff --git a/test/sql/dat/all.test b/test/sql/dat/all.test index c25c646..f6332da 100644 --- a/test/sql/dat/all.test +++ b/test/sql/dat/all.test @@ -26,7 +26,6 @@ query I rowsort nested_types SELECT * FROM delta_scan('${DAT_PATH}/out/reader_tests/generated/nested_types/delta') ---- - query I rowsort nested_types SELECT * FROM parquet_scan('${DAT_PATH}/out/reader_tests/generated/nested_types/expected/latest/**/*.parquet') @@ -43,6 +42,16 @@ SELECT * FROM parquet_scan('${DAT_PATH}/out/reader_tests/generated/basic_append/expected/latest/**/*.parquet') ---- +query I rowsort basic_append_count +SELECT count(*) +FROM delta_scan('${DAT_PATH}/out/reader_tests/generated/basic_append/delta') +---- + +query I rowsort basic_append_count +SELECT count(*) +FROM parquet_scan('${DAT_PATH}/out/reader_tests/generated/basic_append/expected/latest/**/*.parquet') +---- + # with_schema_change query I rowsort with_checkpoint SELECT * @@ -54,6 +63,16 @@ SELECT * FROM parquet_scan('${DAT_PATH}/out/reader_tests/generated/with_schema_change/expected/latest/**/*.parquet') ---- +query I rowsort with_checkpoint_count +SELECT count(*) +FROM delta_scan('${DAT_PATH}/out/reader_tests/generated/with_schema_change/delta') +---- + +query I rowsort with_checkpoint_count +SELECT count(*) +FROM parquet_scan('${DAT_PATH}/out/reader_tests/generated/with_schema_change/expected/latest/**/*.parquet') +---- + # basic_partitioned query I rowsort basic_partitioned SELECT * @@ -65,6 +84,16 @@ SELECT * FROM parquet_scan('${DAT_PATH}/out/reader_tests/generated/basic_partitioned/expected/latest/**/*.parquet') ---- +query I rowsort basic_partitioned_count +SELECT count(*) +FROM delta_scan('${DAT_PATH}/out/reader_tests/generated/basic_partitioned/delta') +---- + +query I rowsort basic_partitioned_count +SELECT count(*) +FROM parquet_scan('${DAT_PATH}/out/reader_tests/generated/basic_partitioned/expected/latest/**/*.parquet') +---- + # multi_partitioned query I rowsort multi_partitioned SELECT * @@ -76,6 +105,16 @@ SELECT letter, date, decode(data) as data, number FROM parquet_scan('${DAT_PATH}/out/reader_tests/generated/multi_partitioned/expected/latest/**/*.parquet') ---- +query I rowsort multi_partitioned_count +SELECT count(*) +FROM delta_scan('${DAT_PATH}/out/reader_tests/generated/multi_partitioned/delta') +---- + +query I rowsort multi_partitioned_count +SELECT count(*) +FROM parquet_scan('${DAT_PATH}/out/reader_tests/generated/multi_partitioned/expected/latest/**/*.parquet') +---- + # TODO: fix this require notwindows From 4693e29550ef977bfed5b7593bd94b4e4b813b66 Mon Sep 17 00:00:00 2001 From: Sam Ansmink Date: Fri, 19 Jul 2024 11:17:30 +0200 Subject: [PATCH 33/38] bump kernel to v0.2.0 --- CMakeLists.txt | 2 +- src/functions/delta_scan.cpp | 2 +- src/include/delta_kernel_ffi.hpp | 278 +++++++++++++++++++++------ src/include/delta_utils.hpp | 10 +- src/include/functions/delta_scan.hpp | 2 +- test/sql/dat/all.test | 13 +- 6 files changed, 226 insertions(+), 81 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 6797b39..4aeaefd 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -99,7 +99,7 @@ ExternalProject_Add( GIT_REPOSITORY "https://github.com/delta-incubator/delta-kernel-rs" # WARNING: the FFI headers are currently pinned due to the C linkage issue of the c++ headers. Currently, when bumping # the kernel version, the produced header in ./src/include/delta_kernel_ffi.hpp should be also bumped, applying the fix - GIT_TAG ed2b80b127984481adba8e59879f39b9e5f871d1 + GIT_TAG v0.2.0 # Prints the env variables passed to the cargo build to the terminal, useful in debugging because passing them # through CMake is an error-prone mess CONFIGURE_COMMAND ${CMAKE_COMMAND} -E env ${RUST_UNSET_ENV_VARS} ${RUST_ENV_VARS} env diff --git a/src/functions/delta_scan.cpp b/src/functions/delta_scan.cpp index e07c391..d4a30fd 100644 --- a/src/functions/delta_scan.cpp +++ b/src/functions/delta_scan.cpp @@ -83,7 +83,7 @@ static void visit_callback(ffi::NullableCvoid engine_context, struct ffi::Kernel context->metadata.back()->partition_map = std::move(constant_map); } - static void visit_data(void *engine_context, ffi::EngineData* engine_data, const struct ffi::KernelBoolSlice selection_vec) { + static void visit_data(void *engine_context, ffi::ExclusiveEngineData* engine_data, const struct ffi::KernelBoolSlice selection_vec) { ffi::visit_scan_data(engine_data, selection_vec, engine_context, visit_callback); } diff --git a/src/include/delta_kernel_ffi.hpp b/src/include/delta_kernel_ffi.hpp index de22390..15db00d 100644 --- a/src/include/delta_kernel_ffi.hpp +++ b/src/include/delta_kernel_ffi.hpp @@ -49,15 +49,9 @@ enum class KernelError { InvalidTableLocationError, InvalidDecimalError, InvalidStructDataError, + InternalError, }; -#if defined(DEFINE_DEFAULT_ENGINE) -/// Struct to allow binding to the arrow [C Data -/// Interface](https://arrow.apache.org/docs/format/CDataInterface.html). This includes the data and -/// the schema. -struct ArrowFFIData; -#endif - struct CStringMap; /// this struct can be used by an engine to materialize a selection vector @@ -71,7 +65,9 @@ struct EngineBuilder; /// an opaque struct that encapsulates data read by an engine. this handle can be passed back into /// some kernel calls to operate on the data, or can be converted into the raw data as read by the /// [`delta_kernel::Engine`] by calling [`get_raw_engine_data`] -struct EngineData; +struct ExclusiveEngineData; + +struct ExclusiveFileReadResultIterator; struct KernelExpressionVisitorState; @@ -83,16 +79,55 @@ struct SharedScan; struct SharedScanDataIterator; +struct SharedSchema; + struct SharedSnapshot; +struct StringSliceIterator; + /// Represents an owned slice of boolean values allocated by the kernel. Any time the engine /// receives a `KernelBoolSlice` as a return value from a kernel method, engine is responsible -/// to free that slice, by calling [super::drop_bool_slice] exactly once. +/// to free that slice, by calling [super::free_bool_slice] exactly once. struct KernelBoolSlice { bool *ptr; uintptr_t len; }; +/// Represents an object that crosses the FFI boundary and which outlives the scope that created +/// it. It can be passed freely between rust code and external code. The +/// +/// An accompanying [`HandleDescriptor`] trait defines the behavior of each handle type: +/// +/// * The true underlying ("target") type the handle represents. For safety reasons, target type +/// must always be [`Send`]. +/// +/// * Mutable (`Box`-like) vs. shared (`Arc`-like). For safety reasons, the target type of a +/// shared handle must always be [`Send`]+[`Sync`]. +/// +/// * Sized vs. unsized. Sized types allow handle operations to be implemented more efficiently. +/// +/// # Validity +/// +/// A `Handle` is _valid_ if all of the following hold: +/// +/// * It was created by a call to [`Handle::from`] +/// * Not yet dropped by a call to [`Handle::drop_handle`] +/// * Not yet consumed by a call to [`Handle::into_inner`] +/// +/// Additionally, in keeping with the [`Send`] contract, multi-threaded external code must +/// enforce mutual exclusion -- no mutable handle should ever be passed to more than one kernel +/// API call at a time. If thread races are possible, the handle should be protected with a +/// mutex. Due to Rust [reference +/// rules](https://doc.rust-lang.org/book/ch04-02-references-and-borrowing.html#the-rules-of-references), +/// this requirement applies even for API calls that appear to be read-only (because Rust code +/// always receives the handle as mutable). +/// +/// NOTE: Because the underlying type is always [`Sync`], multi-threaded external code can +/// freely access shared (non-mutable) handles. +/// +template +using Handle = H*; + /// An error that can be returned to the engine. Engines that wish to associate additional /// information can define and use any type that is [pointer /// interconvertible](https://en.cppreference.com/w/cpp/language/static_cast#pointer-interconvertible) @@ -155,40 +190,11 @@ struct KernelStringSlice { using AllocateErrorFn = EngineError*(*)(KernelError etype, KernelStringSlice msg); -/// Represents an object that crosses the FFI boundary and which outlives the scope that created -/// it. It can be passed freely between rust code and external code. The -/// -/// An accompanying [`HandleDescriptor`] trait defines the behavior of each handle type: -/// -/// * The true underlying ("target") type the handle represents. For safety reasons, target type -/// must always be [`Send`]. -/// -/// * Mutable (`Box`-like) vs. shared (`Arc`-like). For safety reasons, the target type of a -/// shared handle must always be [`Send`]+[`Sync`]. -/// -/// * Sized vs. unsized. Sized types allow handle operations to be implemented more efficiently. -/// -/// # Validity -/// -/// A `Handle` is _valid_ if all of the following hold: -/// -/// * It was created by a call to [`Handle::from`] -/// * Not yet dropped by a call to [`Handle::drop_handle`] -/// * Not yet consumed by a call to [`Handle::into_inner`] -/// -/// Additionally, in keeping with the [`Send`] contract, multi-threaded external code must -/// enforce mutual exclusion -- no mutable handle should ever be passed to more than one kernel -/// API call at a time. If thread races are possible, the handle should be protected with a -/// mutex. Due to Rust [reference -/// rules](https://doc.rust-lang.org/book/ch04-02-references-and-borrowing.html#the-rules-of-references), -/// this requirement applies even for API calls that appear to be read-only (because Rust code -/// always receives the handle as mutable). -/// -/// NOTE: Because the underlying type is always [`Sync`], multi-threaded external code can -/// freely access shared (non-mutable) handles. -/// -template -using Handle = H*; +using NullableCvoid = void*; + +/// Allow engines to allocate strings of their own type. the contract of calling a passed allocate +/// function is that `kernel_str` is _only_ valid until the return from this function +using AllocateStringFn = NullableCvoid(*)(KernelStringSlice kernel_str); /// The `EngineSchemaVisitor` defines a visitor system to allow engines to build their own /// representation of a schema from a particular schema within kernel. @@ -283,6 +289,68 @@ struct EngineIterator { const void *(*get_next)(void *data); }; +struct FileMeta { + KernelStringSlice path; + int64_t last_modified; + uintptr_t size; +}; + +/// ABI-compatible struct for ArrowArray from C Data Interface +/// See +/// +/// ``` +/// # use arrow_data::ArrayData; +/// # use arrow_data::ffi::FFI_ArrowArray; +/// fn export_array(array: &ArrayData) -> FFI_ArrowArray { +/// FFI_ArrowArray::new(array) +/// } +/// ``` +struct FFI_ArrowArray { + int64_t length; + int64_t null_count; + int64_t offset; + int64_t n_buffers; + int64_t n_children; + const void **buffers; + FFI_ArrowArray **children; + FFI_ArrowArray *dictionary; + void (*release)(FFI_ArrowArray *arg1); + void *private_data; +}; + +/// ABI-compatible struct for `ArrowSchema` from C Data Interface +/// See +/// +/// ``` +/// # use arrow_schema::DataType; +/// # use arrow_schema::ffi::FFI_ArrowSchema; +/// fn array_schema(data_type: &DataType) -> FFI_ArrowSchema { +/// FFI_ArrowSchema::try_from(data_type).unwrap() +/// } +/// ``` +/// +struct FFI_ArrowSchema { + const char *format; + const char *name; + const char *metadata; + int64_t flags; + int64_t n_children; + FFI_ArrowSchema **children; + FFI_ArrowSchema *dictionary; + void (*release)(FFI_ArrowSchema *arg1); + void *private_data; +}; + +#if defined(DEFINE_DEFAULT_ENGINE) +/// Struct to allow binding to the arrow [C Data +/// Interface](https://arrow.apache.org/docs/format/CDataInterface.html). This includes the data and +/// the schema. +struct ArrowFFIData { + FFI_ArrowArray array; + FFI_ArrowSchema schema; +}; +#endif + /// A predicate that can be used to skip data when scanning. /// /// When invoking [`scan::scan`], The engine provides a pointer to the (engine's native) predicate, @@ -298,12 +366,6 @@ struct EnginePredicate { uintptr_t (*visitor)(void *predicate, KernelExpressionVisitorState *state); }; -using NullableCvoid = void*; - -/// Allow engines to allocate strings of their own type. the contract of calling a passed allocate -/// function is that `kernel_str` is _only_ valid until the return from this function -using AllocateStringFn = NullableCvoid(*)(KernelStringSlice kernel_str); - /// Give engines an easy way to consume stats struct Stats { /// For any file where the deletion vector is not present (see [`DvInfo::has_vector`]), the @@ -339,7 +401,14 @@ extern "C" { /// # Safety /// /// Caller is responsible for passing a valid handle. -void drop_bool_slice(KernelBoolSlice slice); +void free_bool_slice(KernelBoolSlice slice); + +/// Drop an `ExclusiveEngineData`. +/// +/// # Safety +/// +/// Caller is responsible for passing a valid handle as engine_data +void free_engine_data(Handle engine_data); #if defined(DEFINE_DEFAULT_ENGINE) /// Get a "builder" that can be used to construct an engine. The function @@ -390,7 +459,7 @@ ExternResult> get_sync_engine(AllocateErrorFn allocat /// # Safety /// /// Caller is responsible for passing a valid handle. -void drop_engine(Handle engine); +void free_engine(Handle engine); /// Get the latest snapshot from the specified table /// @@ -403,7 +472,7 @@ ExternResult> snapshot(KernelStringSlice path, /// # Safety /// /// Caller is responsible for passing a valid handle. -void drop_snapshot(Handle snapshot); +void free_snapshot(Handle snapshot); /// Get the version of the specified snapshot /// @@ -412,6 +481,27 @@ void drop_snapshot(Handle snapshot); /// Caller is responsible for passing a valid handle. uint64_t version(Handle snapshot); +/// Get the resolved root of the table. This should be used in any future calls that require +/// constructing a path +/// +/// # Safety +/// +/// Caller is responsible for passing a valid handle. +NullableCvoid snapshot_table_root(Handle snapshot, AllocateStringFn allocate_fn); + +/// # Safety +/// +/// The iterator must be valid (returned by [kernel_scan_data_init]) and not yet freed by +/// [kernel_scan_data_free]. The visitor function pointer must be non-null. +bool string_slice_next(Handle data, + NullableCvoid engine_context, + void (*engine_visitor)(NullableCvoid engine_context, KernelStringSlice slice)); + +/// # Safety +/// +/// Caller is responsible for (at most once) passing a valid pointer to a [`StringSliceIterator`] +void free_string_slice_data(Handle data); + /// Visit the schema of the passed `SnapshotHandle`, using the provided `visitor`. See the /// documentation of [`EngineSchemaVisitor`] for a description of how this visitor works. /// @@ -464,14 +554,49 @@ uintptr_t visit_expression_literal_double(KernelExpressionVisitorState *state, d uintptr_t visit_expression_literal_bool(KernelExpressionVisitorState *state, bool value); -/// Allow an engine to "unwrap" an [`EngineData`] into the raw pointer for the case it wants +/// Call the engine back with the next `EngingeData` batch read by Parquet/Json handler. The +/// _engine_ "owns" the data that is passed into the `engine_visitor`, since it is allocated by the +/// `Engine` being used for log-replay. If the engine wants the kernel to free this data, it _must_ +/// call [`free_engine_data`] on it. +/// +/// # Safety +/// +/// The iterator must be valid (returned by [`read_parquet_file`]) and not yet freed by +/// [`free_read_result_iter`]. The visitor function pointer must be non-null. +ExternResult read_result_next(Handle data, + NullableCvoid engine_context, + void (*engine_visitor)(NullableCvoid engine_context, + Handle engine_data)); + +/// Free the memory from the passed read result iterator +/// # Safety +/// +/// Caller is responsible for (at most once) passing a valid pointer returned by a call to +/// [`read_parquet_file`]. +void free_read_result_iter(Handle data); + +/// Use the specified engine's [`delta_kernel::ParquetHandler`] to read the specified file. +/// +/// # Safety +/// Caller is responsible for calling with a valid `ExternEngineHandle` and `FileMeta` +ExternResult> read_parquet_file(Handle engine, + const FileMeta *file, + Handle physical_schema); + +/// Get the number of rows in an engine data +/// +/// # Safety +/// `data_handle` must be a valid pointer to a kernel allocated `ExclusiveEngineData` +uintptr_t engine_data_length(Handle *data); + +/// Allow an engine to "unwrap" an [`ExclusiveEngineData`] into the raw pointer for the case it wants /// to use its own engine data format /// /// # Safety /// -/// `data_handle` must be a valid pointer to a kernel allocated `EngineData`. The Engine must +/// `data_handle` must be a valid pointer to a kernel allocated `ExclusiveEngineData`. The Engine must /// ensure the handle outlives the returned pointer. -void *get_raw_engine_data(Handle data); +void *get_raw_engine_data(Handle data); #if defined(DEFINE_DEFAULT_ENGINE) /// Get an [`ArrowFFIData`] to allow binding to the arrow [C Data @@ -479,16 +604,16 @@ void *get_raw_engine_data(Handle data); /// the schema. /// /// # Safety -/// data_handle must be a valid EngineData as read by the +/// data_handle must be a valid ExclusiveEngineData as read by the /// [`delta_kernel::engine::default::DefaultEngine`] obtained from `get_default_engine`. -ExternResult get_raw_arrow_data(Handle data, +ExternResult get_raw_arrow_data(Handle data, Handle engine); #endif /// Drops a scan. /// # Safety /// Caller is responsible for passing a [valid][Handle#Validity] scan handle. -void drop_scan(Handle scan); +void free_scan(Handle scan); /// Get a [`Scan`] over the table specified by the passed snapshot. /// # Safety @@ -505,10 +630,35 @@ ExternResult> scan(Handle snapshot, /// Engine is responsible for providing a valid scan pointer Handle get_global_scan_state(Handle scan); +/// Get the kernel view of the physical read schema that an engine should read from parquet file in +/// a scan +/// /// # Safety +/// Engine is responsible for providing a valid GlobalScanState pointer +Handle get_global_read_schema(Handle state); + +/// Free a global read schema +/// +/// # Safety +/// Engine is responsible for providing a valid schema obtained via [`get_global_read_schema`] +void free_global_read_schema(Handle schema); + +/// Get a count of the number of partition columns for this scan /// +/// # Safety +/// Caller is responsible for passing a valid global scan pointer. +uintptr_t get_partition_column_count(Handle state); + +/// Get an iterator of the list of partition columns for this scan. +/// +/// # Safety /// Caller is responsible for passing a valid global scan pointer. -void drop_global_scan_state(Handle state); +Handle get_partition_columns(Handle state); + +/// # Safety +/// +/// Caller is responsible for passing a valid global scan state pointer. +void free_global_scan_state(Handle state); /// Get an iterator over the data needed to perform a scan. This will return a /// [`KernelScanDataIterator`] which can be passed to [`kernel_scan_data_next`] to get the actual @@ -523,18 +673,18 @@ ExternResult> kernel_scan_data_init(Handle kernel_scan_data_next(Handle data, NullableCvoid engine_context, void (*engine_visitor)(NullableCvoid engine_context, - Handle engine_data, + Handle engine_data, KernelBoolSlice selection_vector)); /// # Safety /// /// Caller is responsible for (at most once) passing a valid pointer returned by a call to /// [`kernel_scan_data_init`]. -void kernel_scan_data_free(Handle data); +void free_kernel_scan_data(Handle data); /// allow probing into a CStringMap. If the specified key is in the map, kernel will call /// allocate_fn with the value associated with the key and return the value returned from that @@ -559,8 +709,8 @@ ExternResult selection_vector_from_dv(const DvInfo *dv_info, /// data which provides the data handle and selection vector as each element in the iterator. /// /// # Safety -/// engine is responsbile for passing a valid [`EngineData`] and selection vector. -void visit_scan_data(Handle data, +/// engine is responsbile for passing a valid [`ExclusiveEngineData`] and selection vector. +void visit_scan_data(Handle data, KernelBoolSlice selection_vec, NullableCvoid engine_context, CScanCallback callback); diff --git a/src/include/delta_utils.hpp b/src/include/delta_utils.hpp index 37dc289..9b33c5c 100644 --- a/src/include/delta_utils.hpp +++ b/src/include/delta_utils.hpp @@ -102,11 +102,11 @@ struct TemplatedUniqueKernelPointer : public UniqueKernelPointer { }; }; -typedef TemplatedUniqueKernelPointer KernelSnapshot; -typedef TemplatedUniqueKernelPointer KernelExternEngine; -typedef TemplatedUniqueKernelPointer KernelScan; -typedef TemplatedUniqueKernelPointer KernelGlobalScanState; -typedef TemplatedUniqueKernelPointer KernelScanDataIterator; +typedef TemplatedUniqueKernelPointer KernelSnapshot; +typedef TemplatedUniqueKernelPointer KernelExternEngine; +typedef TemplatedUniqueKernelPointer KernelScan; +typedef TemplatedUniqueKernelPointer KernelGlobalScanState; +typedef TemplatedUniqueKernelPointer KernelScanDataIterator; struct KernelUtils { static ffi::KernelStringSlice ToDeltaString(const string &str); diff --git a/src/include/functions/delta_scan.hpp b/src/include/functions/delta_scan.hpp index 07c782b..b4c3c76 100644 --- a/src/include/functions/delta_scan.hpp +++ b/src/include/functions/delta_scan.hpp @@ -22,7 +22,7 @@ struct DeltaFileMetaData { ~DeltaFileMetaData() { if (selection_vector.ptr) { - ffi::drop_bool_slice(selection_vector); + ffi::free_bool_slice(selection_vector); } } diff --git a/test/sql/dat/all.test b/test/sql/dat/all.test index f6332da..fc7c6c5 100644 --- a/test/sql/dat/all.test +++ b/test/sql/dat/all.test @@ -53,22 +53,22 @@ FROM parquet_scan('${DAT_PATH}/out/reader_tests/generated/basic_append/expected/ ---- # with_schema_change -query I rowsort with_checkpoint +query I rowsort with_schema_change SELECT * FROM delta_scan('${DAT_PATH}/out/reader_tests/generated/with_schema_change/delta') ---- -query I rowsort with_checkpoint +query I rowsort with_schema_change SELECT * FROM parquet_scan('${DAT_PATH}/out/reader_tests/generated/with_schema_change/expected/latest/**/*.parquet') ---- -query I rowsort with_checkpoint_count +query I rowsort with_schema_change_count SELECT count(*) FROM delta_scan('${DAT_PATH}/out/reader_tests/generated/with_schema_change/delta') ---- -query I rowsort with_checkpoint_count +query I rowsort with_schema_change_count SELECT count(*) FROM parquet_scan('${DAT_PATH}/out/reader_tests/generated/with_schema_change/expected/latest/**/*.parquet') ---- @@ -129,11 +129,6 @@ SELECT * FROM parquet_scan('${DAT_PATH}/out/reader_tests/generated/multi_partitioned_2/expected/latest/**/*.parquet') ---- -### FAILING DAT TESTS - -# TODO fix all of these -mode skip - # no_replay query I rowsort no_replay SELECT * From 7279b4fe01ec9473d12eb4add8682096c7c3c561 Mon Sep 17 00:00:00 2001 From: Sam Ansmink Date: Fri, 19 Jul 2024 15:12:58 +0200 Subject: [PATCH 34/38] add new return type to msvc workaround struct --- src/include/delta_kernel_ffi.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/include/delta_kernel_ffi.hpp b/src/include/delta_kernel_ffi.hpp index 15db00d..cf69a8d 100644 --- a/src/include/delta_kernel_ffi.hpp +++ b/src/include/delta_kernel_ffi.hpp @@ -393,7 +393,7 @@ struct im_an_unused_struct_that_tricks_msvc_into_compilation { ExternResult field7; ExternResult> field8; ExternResult> field9; - ExternResult> field10; + ExternResult> field10; }; extern "C" { From 34bb7fe1101ec5d7dc45c1a816665025dfbe40e7 Mon Sep 17 00:00:00 2001 From: Sam Ansmink Date: Wed, 24 Jul 2024 10:55:13 +0200 Subject: [PATCH 35/38] add basic benchmarking suite --- .gitignore | 2 + Makefile | 3 + benchmark/README.md | 27 ++++++++ benchmark/benchmark.Makefile | 65 +++++++++++++++++++ benchmark/tpcds/sf1-delta/load.sql | 24 +++++++ benchmark/tpcds/sf1-delta/q01.benchmark | 7 ++ benchmark/tpcds/sf1-delta/q02.benchmark | 7 ++ benchmark/tpcds/sf1-delta/q03.benchmark | 7 ++ benchmark/tpcds/sf1-delta/q04.benchmark | 7 ++ benchmark/tpcds/sf1-delta/q05.benchmark | 7 ++ benchmark/tpcds/sf1-delta/q06.benchmark | 7 ++ benchmark/tpcds/sf1-delta/q07.benchmark | 7 ++ benchmark/tpcds/sf1-delta/q08.benchmark | 7 ++ benchmark/tpcds/sf1-delta/q09.benchmark | 7 ++ benchmark/tpcds/sf1-delta/q10.benchmark | 7 ++ benchmark/tpcds/sf1-delta/q11.benchmark | 7 ++ benchmark/tpcds/sf1-delta/q12.benchmark | 7 ++ benchmark/tpcds/sf1-delta/q13.benchmark | 7 ++ benchmark/tpcds/sf1-delta/q14.benchmark | 7 ++ benchmark/tpcds/sf1-delta/q15.benchmark | 7 ++ benchmark/tpcds/sf1-delta/q16.benchmark | 7 ++ benchmark/tpcds/sf1-delta/q17.benchmark | 7 ++ benchmark/tpcds/sf1-delta/q18.benchmark | 7 ++ benchmark/tpcds/sf1-delta/q19.benchmark | 7 ++ benchmark/tpcds/sf1-delta/q20.benchmark | 7 ++ benchmark/tpcds/sf1-delta/q21.benchmark | 7 ++ benchmark/tpcds/sf1-delta/q22.benchmark | 7 ++ benchmark/tpcds/sf1-delta/q23.benchmark | 7 ++ benchmark/tpcds/sf1-delta/q24.benchmark | 7 ++ benchmark/tpcds/sf1-delta/q25.benchmark | 7 ++ benchmark/tpcds/sf1-delta/q26.benchmark | 7 ++ benchmark/tpcds/sf1-delta/q27.benchmark | 7 ++ benchmark/tpcds/sf1-delta/q28.benchmark | 7 ++ benchmark/tpcds/sf1-delta/q29.benchmark | 7 ++ benchmark/tpcds/sf1-delta/q30.benchmark | 7 ++ benchmark/tpcds/sf1-delta/q31.benchmark | 7 ++ benchmark/tpcds/sf1-delta/q32.benchmark | 7 ++ benchmark/tpcds/sf1-delta/q33.benchmark | 7 ++ benchmark/tpcds/sf1-delta/q34.benchmark | 7 ++ benchmark/tpcds/sf1-delta/q35.benchmark | 7 ++ benchmark/tpcds/sf1-delta/q36.benchmark | 7 ++ benchmark/tpcds/sf1-delta/q37.benchmark | 7 ++ benchmark/tpcds/sf1-delta/q38.benchmark | 7 ++ benchmark/tpcds/sf1-delta/q39.benchmark | 7 ++ benchmark/tpcds/sf1-delta/q40.benchmark | 7 ++ benchmark/tpcds/sf1-delta/q41.benchmark | 7 ++ benchmark/tpcds/sf1-delta/q42.benchmark | 7 ++ benchmark/tpcds/sf1-delta/q43.benchmark | 7 ++ benchmark/tpcds/sf1-delta/q44.benchmark | 7 ++ benchmark/tpcds/sf1-delta/q45.benchmark | 7 ++ benchmark/tpcds/sf1-delta/q46.benchmark | 7 ++ benchmark/tpcds/sf1-delta/q47.benchmark | 7 ++ benchmark/tpcds/sf1-delta/q48.benchmark | 7 ++ benchmark/tpcds/sf1-delta/q49.benchmark | 7 ++ benchmark/tpcds/sf1-delta/q50.benchmark | 7 ++ benchmark/tpcds/sf1-delta/q51.benchmark | 7 ++ benchmark/tpcds/sf1-delta/q52.benchmark | 7 ++ benchmark/tpcds/sf1-delta/q53.benchmark | 7 ++ benchmark/tpcds/sf1-delta/q54.benchmark | 7 ++ benchmark/tpcds/sf1-delta/q55.benchmark | 7 ++ benchmark/tpcds/sf1-delta/q56.benchmark | 7 ++ benchmark/tpcds/sf1-delta/q57.benchmark | 7 ++ benchmark/tpcds/sf1-delta/q58.benchmark | 7 ++ benchmark/tpcds/sf1-delta/q59.benchmark | 7 ++ benchmark/tpcds/sf1-delta/q60.benchmark | 7 ++ benchmark/tpcds/sf1-delta/q61.benchmark | 7 ++ benchmark/tpcds/sf1-delta/q62.benchmark | 7 ++ benchmark/tpcds/sf1-delta/q63.benchmark | 7 ++ benchmark/tpcds/sf1-delta/q64.benchmark | 7 ++ benchmark/tpcds/sf1-delta/q65.benchmark | 7 ++ benchmark/tpcds/sf1-delta/q66.benchmark | 7 ++ benchmark/tpcds/sf1-delta/q67.benchmark | 7 ++ benchmark/tpcds/sf1-delta/q68.benchmark | 7 ++ benchmark/tpcds/sf1-delta/q69.benchmark | 7 ++ benchmark/tpcds/sf1-delta/q70.benchmark | 7 ++ benchmark/tpcds/sf1-delta/q71.benchmark | 7 ++ benchmark/tpcds/sf1-delta/q72.benchmark | 7 ++ benchmark/tpcds/sf1-delta/q73.benchmark | 7 ++ benchmark/tpcds/sf1-delta/q74.benchmark | 7 ++ benchmark/tpcds/sf1-delta/q75.benchmark | 7 ++ benchmark/tpcds/sf1-delta/q76.benchmark | 7 ++ benchmark/tpcds/sf1-delta/q77.benchmark | 7 ++ benchmark/tpcds/sf1-delta/q78.benchmark | 7 ++ benchmark/tpcds/sf1-delta/q79.benchmark | 7 ++ benchmark/tpcds/sf1-delta/q80.benchmark | 7 ++ benchmark/tpcds/sf1-delta/q81.benchmark | 7 ++ benchmark/tpcds/sf1-delta/q82.benchmark | 7 ++ benchmark/tpcds/sf1-delta/q83.benchmark | 7 ++ benchmark/tpcds/sf1-delta/q84.benchmark | 7 ++ benchmark/tpcds/sf1-delta/q85.benchmark | 7 ++ benchmark/tpcds/sf1-delta/q86.benchmark | 7 ++ benchmark/tpcds/sf1-delta/q87.benchmark | 7 ++ benchmark/tpcds/sf1-delta/q88.benchmark | 7 ++ benchmark/tpcds/sf1-delta/q89.benchmark | 7 ++ benchmark/tpcds/sf1-delta/q90.benchmark | 7 ++ benchmark/tpcds/sf1-delta/q91.benchmark | 7 ++ benchmark/tpcds/sf1-delta/q92.benchmark | 7 ++ benchmark/tpcds/sf1-delta/q93.benchmark | 7 ++ benchmark/tpcds/sf1-delta/q94.benchmark | 7 ++ benchmark/tpcds/sf1-delta/q95.benchmark | 7 ++ benchmark/tpcds/sf1-delta/q96.benchmark | 7 ++ benchmark/tpcds/sf1-delta/q97.benchmark | 7 ++ benchmark/tpcds/sf1-delta/q98.benchmark | 7 ++ benchmark/tpcds/sf1-delta/q99.benchmark | 7 ++ .../tpcds/sf1-delta/tpcds_sf1.benchmark.in | 17 +++++ benchmark/tpcds/sf1-parquet/load.sql | 24 +++++++ benchmark/tpcds/sf1-parquet/q01.benchmark | 7 ++ benchmark/tpcds/sf1-parquet/q02.benchmark | 7 ++ benchmark/tpcds/sf1-parquet/q03.benchmark | 7 ++ benchmark/tpcds/sf1-parquet/q04.benchmark | 7 ++ benchmark/tpcds/sf1-parquet/q05.benchmark | 7 ++ benchmark/tpcds/sf1-parquet/q06.benchmark | 7 ++ benchmark/tpcds/sf1-parquet/q07.benchmark | 7 ++ benchmark/tpcds/sf1-parquet/q08.benchmark | 7 ++ benchmark/tpcds/sf1-parquet/q09.benchmark | 7 ++ benchmark/tpcds/sf1-parquet/q10.benchmark | 7 ++ benchmark/tpcds/sf1-parquet/q11.benchmark | 7 ++ benchmark/tpcds/sf1-parquet/q12.benchmark | 7 ++ benchmark/tpcds/sf1-parquet/q13.benchmark | 7 ++ benchmark/tpcds/sf1-parquet/q14.benchmark | 7 ++ benchmark/tpcds/sf1-parquet/q15.benchmark | 7 ++ benchmark/tpcds/sf1-parquet/q16.benchmark | 7 ++ benchmark/tpcds/sf1-parquet/q17.benchmark | 7 ++ benchmark/tpcds/sf1-parquet/q18.benchmark | 7 ++ benchmark/tpcds/sf1-parquet/q19.benchmark | 7 ++ benchmark/tpcds/sf1-parquet/q20.benchmark | 7 ++ benchmark/tpcds/sf1-parquet/q21.benchmark | 7 ++ benchmark/tpcds/sf1-parquet/q22.benchmark | 7 ++ benchmark/tpcds/sf1-parquet/q23.benchmark | 7 ++ benchmark/tpcds/sf1-parquet/q24.benchmark | 7 ++ benchmark/tpcds/sf1-parquet/q25.benchmark | 7 ++ benchmark/tpcds/sf1-parquet/q26.benchmark | 7 ++ benchmark/tpcds/sf1-parquet/q27.benchmark | 7 ++ benchmark/tpcds/sf1-parquet/q28.benchmark | 7 ++ benchmark/tpcds/sf1-parquet/q29.benchmark | 7 ++ benchmark/tpcds/sf1-parquet/q30.benchmark | 7 ++ benchmark/tpcds/sf1-parquet/q31.benchmark | 7 ++ benchmark/tpcds/sf1-parquet/q32.benchmark | 7 ++ benchmark/tpcds/sf1-parquet/q33.benchmark | 7 ++ benchmark/tpcds/sf1-parquet/q34.benchmark | 7 ++ benchmark/tpcds/sf1-parquet/q35.benchmark | 7 ++ benchmark/tpcds/sf1-parquet/q36.benchmark | 7 ++ benchmark/tpcds/sf1-parquet/q37.benchmark | 7 ++ benchmark/tpcds/sf1-parquet/q38.benchmark | 7 ++ benchmark/tpcds/sf1-parquet/q39.benchmark | 7 ++ benchmark/tpcds/sf1-parquet/q40.benchmark | 7 ++ benchmark/tpcds/sf1-parquet/q41.benchmark | 7 ++ benchmark/tpcds/sf1-parquet/q42.benchmark | 7 ++ benchmark/tpcds/sf1-parquet/q43.benchmark | 7 ++ benchmark/tpcds/sf1-parquet/q44.benchmark | 7 ++ benchmark/tpcds/sf1-parquet/q45.benchmark | 7 ++ benchmark/tpcds/sf1-parquet/q46.benchmark | 7 ++ benchmark/tpcds/sf1-parquet/q47.benchmark | 7 ++ benchmark/tpcds/sf1-parquet/q48.benchmark | 7 ++ benchmark/tpcds/sf1-parquet/q49.benchmark | 7 ++ benchmark/tpcds/sf1-parquet/q50.benchmark | 7 ++ benchmark/tpcds/sf1-parquet/q51.benchmark | 7 ++ benchmark/tpcds/sf1-parquet/q52.benchmark | 7 ++ benchmark/tpcds/sf1-parquet/q53.benchmark | 7 ++ benchmark/tpcds/sf1-parquet/q54.benchmark | 7 ++ benchmark/tpcds/sf1-parquet/q55.benchmark | 7 ++ benchmark/tpcds/sf1-parquet/q56.benchmark | 7 ++ benchmark/tpcds/sf1-parquet/q57.benchmark | 7 ++ benchmark/tpcds/sf1-parquet/q58.benchmark | 7 ++ benchmark/tpcds/sf1-parquet/q59.benchmark | 7 ++ benchmark/tpcds/sf1-parquet/q60.benchmark | 7 ++ benchmark/tpcds/sf1-parquet/q61.benchmark | 7 ++ benchmark/tpcds/sf1-parquet/q62.benchmark | 7 ++ benchmark/tpcds/sf1-parquet/q63.benchmark | 7 ++ benchmark/tpcds/sf1-parquet/q64.benchmark | 7 ++ benchmark/tpcds/sf1-parquet/q65.benchmark | 7 ++ benchmark/tpcds/sf1-parquet/q66.benchmark | 7 ++ benchmark/tpcds/sf1-parquet/q67.benchmark | 7 ++ benchmark/tpcds/sf1-parquet/q68.benchmark | 7 ++ benchmark/tpcds/sf1-parquet/q69.benchmark | 7 ++ benchmark/tpcds/sf1-parquet/q70.benchmark | 7 ++ benchmark/tpcds/sf1-parquet/q71.benchmark | 7 ++ benchmark/tpcds/sf1-parquet/q72.benchmark | 7 ++ benchmark/tpcds/sf1-parquet/q73.benchmark | 7 ++ benchmark/tpcds/sf1-parquet/q74.benchmark | 7 ++ benchmark/tpcds/sf1-parquet/q75.benchmark | 7 ++ benchmark/tpcds/sf1-parquet/q76.benchmark | 7 ++ benchmark/tpcds/sf1-parquet/q77.benchmark | 7 ++ benchmark/tpcds/sf1-parquet/q78.benchmark | 7 ++ benchmark/tpcds/sf1-parquet/q79.benchmark | 7 ++ benchmark/tpcds/sf1-parquet/q80.benchmark | 7 ++ benchmark/tpcds/sf1-parquet/q81.benchmark | 7 ++ benchmark/tpcds/sf1-parquet/q82.benchmark | 7 ++ benchmark/tpcds/sf1-parquet/q83.benchmark | 7 ++ benchmark/tpcds/sf1-parquet/q84.benchmark | 7 ++ benchmark/tpcds/sf1-parquet/q85.benchmark | 7 ++ benchmark/tpcds/sf1-parquet/q86.benchmark | 7 ++ benchmark/tpcds/sf1-parquet/q87.benchmark | 7 ++ benchmark/tpcds/sf1-parquet/q88.benchmark | 7 ++ benchmark/tpcds/sf1-parquet/q89.benchmark | 7 ++ benchmark/tpcds/sf1-parquet/q90.benchmark | 7 ++ benchmark/tpcds/sf1-parquet/q91.benchmark | 7 ++ benchmark/tpcds/sf1-parquet/q92.benchmark | 7 ++ benchmark/tpcds/sf1-parquet/q93.benchmark | 7 ++ benchmark/tpcds/sf1-parquet/q94.benchmark | 7 ++ benchmark/tpcds/sf1-parquet/q95.benchmark | 7 ++ benchmark/tpcds/sf1-parquet/q96.benchmark | 7 ++ benchmark/tpcds/sf1-parquet/q97.benchmark | 7 ++ benchmark/tpcds/sf1-parquet/q98.benchmark | 7 ++ benchmark/tpcds/sf1-parquet/q99.benchmark | 7 ++ .../tpcds/sf1-parquet/tpcds_sf1.benchmark.in | 17 +++++ benchmark/tpch/sf1-delta-remote/load.sql | 8 +++ benchmark/tpch/sf1-delta-remote/q01.benchmark | 7 ++ benchmark/tpch/sf1-delta-remote/q02.benchmark | 7 ++ benchmark/tpch/sf1-delta-remote/q03.benchmark | 7 ++ benchmark/tpch/sf1-delta-remote/q04.benchmark | 7 ++ benchmark/tpch/sf1-delta-remote/q05.benchmark | 7 ++ benchmark/tpch/sf1-delta-remote/q06.benchmark | 7 ++ benchmark/tpch/sf1-delta-remote/q07.benchmark | 7 ++ benchmark/tpch/sf1-delta-remote/q08.benchmark | 7 ++ benchmark/tpch/sf1-delta-remote/q09.benchmark | 7 ++ benchmark/tpch/sf1-delta-remote/q10.benchmark | 7 ++ benchmark/tpch/sf1-delta-remote/q11.benchmark | 7 ++ benchmark/tpch/sf1-delta-remote/q12.benchmark | 7 ++ benchmark/tpch/sf1-delta-remote/q13.benchmark | 7 ++ benchmark/tpch/sf1-delta-remote/q14.benchmark | 7 ++ benchmark/tpch/sf1-delta-remote/q15.benchmark | 7 ++ benchmark/tpch/sf1-delta-remote/q16.benchmark | 7 ++ benchmark/tpch/sf1-delta-remote/q17.benchmark | 7 ++ benchmark/tpch/sf1-delta-remote/q18.benchmark | 7 ++ benchmark/tpch/sf1-delta-remote/q19.benchmark | 7 ++ benchmark/tpch/sf1-delta-remote/q20.benchmark | 7 ++ benchmark/tpch/sf1-delta-remote/q21.benchmark | 7 ++ benchmark/tpch/sf1-delta-remote/q22.benchmark | 7 ++ .../tpch_sf1_delta.benchmark.in | 19 ++++++ benchmark/tpch/sf1-delta/load.sql | 8 +++ benchmark/tpch/sf1-delta/q01.benchmark | 7 ++ benchmark/tpch/sf1-delta/q02.benchmark | 7 ++ benchmark/tpch/sf1-delta/q03.benchmark | 7 ++ benchmark/tpch/sf1-delta/q04.benchmark | 7 ++ benchmark/tpch/sf1-delta/q05.benchmark | 7 ++ benchmark/tpch/sf1-delta/q06.benchmark | 7 ++ benchmark/tpch/sf1-delta/q07.benchmark | 7 ++ benchmark/tpch/sf1-delta/q08.benchmark | 7 ++ benchmark/tpch/sf1-delta/q09.benchmark | 7 ++ benchmark/tpch/sf1-delta/q10.benchmark | 7 ++ benchmark/tpch/sf1-delta/q11.benchmark | 7 ++ benchmark/tpch/sf1-delta/q12.benchmark | 7 ++ benchmark/tpch/sf1-delta/q13.benchmark | 7 ++ benchmark/tpch/sf1-delta/q14.benchmark | 7 ++ benchmark/tpch/sf1-delta/q15.benchmark | 7 ++ benchmark/tpch/sf1-delta/q16.benchmark | 7 ++ benchmark/tpch/sf1-delta/q17.benchmark | 7 ++ benchmark/tpch/sf1-delta/q18.benchmark | 7 ++ benchmark/tpch/sf1-delta/q19.benchmark | 7 ++ benchmark/tpch/sf1-delta/q20.benchmark | 7 ++ benchmark/tpch/sf1-delta/q21.benchmark | 7 ++ benchmark/tpch/sf1-delta/q22.benchmark | 7 ++ .../sf1-delta/tpch_sf1_delta.benchmark.in | 17 +++++ benchmark/tpch/sf1-parquet-remote/load.sql | 8 +++ .../tpch/sf1-parquet-remote/q01.benchmark | 7 ++ .../tpch/sf1-parquet-remote/q02.benchmark | 7 ++ .../tpch/sf1-parquet-remote/q03.benchmark | 7 ++ .../tpch/sf1-parquet-remote/q04.benchmark | 7 ++ .../tpch/sf1-parquet-remote/q05.benchmark | 7 ++ .../tpch/sf1-parquet-remote/q06.benchmark | 7 ++ .../tpch/sf1-parquet-remote/q07.benchmark | 7 ++ .../tpch/sf1-parquet-remote/q08.benchmark | 7 ++ .../tpch/sf1-parquet-remote/q09.benchmark | 7 ++ .../tpch/sf1-parquet-remote/q10.benchmark | 7 ++ .../tpch/sf1-parquet-remote/q11.benchmark | 7 ++ .../tpch/sf1-parquet-remote/q12.benchmark | 7 ++ .../tpch/sf1-parquet-remote/q13.benchmark | 7 ++ .../tpch/sf1-parquet-remote/q14.benchmark | 7 ++ .../tpch/sf1-parquet-remote/q15.benchmark | 7 ++ .../tpch/sf1-parquet-remote/q16.benchmark | 7 ++ .../tpch/sf1-parquet-remote/q17.benchmark | 7 ++ .../tpch/sf1-parquet-remote/q18.benchmark | 7 ++ .../tpch/sf1-parquet-remote/q19.benchmark | 7 ++ .../tpch/sf1-parquet-remote/q20.benchmark | 7 ++ .../tpch/sf1-parquet-remote/q21.benchmark | 7 ++ .../tpch/sf1-parquet-remote/q22.benchmark | 7 ++ .../tpch_sf1_delta.benchmark.in | 19 ++++++ scripts/plot.py | 27 ++++++++ 279 files changed, 2133 insertions(+) create mode 100644 benchmark/README.md create mode 100644 benchmark/benchmark.Makefile create mode 100644 benchmark/tpcds/sf1-delta/load.sql create mode 100644 benchmark/tpcds/sf1-delta/q01.benchmark create mode 100644 benchmark/tpcds/sf1-delta/q02.benchmark create mode 100644 benchmark/tpcds/sf1-delta/q03.benchmark create mode 100644 benchmark/tpcds/sf1-delta/q04.benchmark create mode 100644 benchmark/tpcds/sf1-delta/q05.benchmark create mode 100644 benchmark/tpcds/sf1-delta/q06.benchmark create mode 100644 benchmark/tpcds/sf1-delta/q07.benchmark create mode 100644 benchmark/tpcds/sf1-delta/q08.benchmark create mode 100644 benchmark/tpcds/sf1-delta/q09.benchmark create mode 100644 benchmark/tpcds/sf1-delta/q10.benchmark create mode 100644 benchmark/tpcds/sf1-delta/q11.benchmark create mode 100644 benchmark/tpcds/sf1-delta/q12.benchmark create mode 100644 benchmark/tpcds/sf1-delta/q13.benchmark create mode 100644 benchmark/tpcds/sf1-delta/q14.benchmark create mode 100644 benchmark/tpcds/sf1-delta/q15.benchmark create mode 100644 benchmark/tpcds/sf1-delta/q16.benchmark create mode 100644 benchmark/tpcds/sf1-delta/q17.benchmark create mode 100644 benchmark/tpcds/sf1-delta/q18.benchmark create mode 100644 benchmark/tpcds/sf1-delta/q19.benchmark create mode 100644 benchmark/tpcds/sf1-delta/q20.benchmark create mode 100644 benchmark/tpcds/sf1-delta/q21.benchmark create mode 100644 benchmark/tpcds/sf1-delta/q22.benchmark create mode 100644 benchmark/tpcds/sf1-delta/q23.benchmark create mode 100644 benchmark/tpcds/sf1-delta/q24.benchmark create mode 100644 benchmark/tpcds/sf1-delta/q25.benchmark create mode 100644 benchmark/tpcds/sf1-delta/q26.benchmark create mode 100644 benchmark/tpcds/sf1-delta/q27.benchmark create mode 100644 benchmark/tpcds/sf1-delta/q28.benchmark create mode 100644 benchmark/tpcds/sf1-delta/q29.benchmark create mode 100644 benchmark/tpcds/sf1-delta/q30.benchmark create mode 100644 benchmark/tpcds/sf1-delta/q31.benchmark create mode 100644 benchmark/tpcds/sf1-delta/q32.benchmark create mode 100644 benchmark/tpcds/sf1-delta/q33.benchmark create mode 100644 benchmark/tpcds/sf1-delta/q34.benchmark create mode 100644 benchmark/tpcds/sf1-delta/q35.benchmark create mode 100644 benchmark/tpcds/sf1-delta/q36.benchmark create mode 100644 benchmark/tpcds/sf1-delta/q37.benchmark create mode 100644 benchmark/tpcds/sf1-delta/q38.benchmark create mode 100644 benchmark/tpcds/sf1-delta/q39.benchmark create mode 100644 benchmark/tpcds/sf1-delta/q40.benchmark create mode 100644 benchmark/tpcds/sf1-delta/q41.benchmark create mode 100644 benchmark/tpcds/sf1-delta/q42.benchmark create mode 100644 benchmark/tpcds/sf1-delta/q43.benchmark create mode 100644 benchmark/tpcds/sf1-delta/q44.benchmark create mode 100644 benchmark/tpcds/sf1-delta/q45.benchmark create mode 100644 benchmark/tpcds/sf1-delta/q46.benchmark create mode 100644 benchmark/tpcds/sf1-delta/q47.benchmark create mode 100644 benchmark/tpcds/sf1-delta/q48.benchmark create mode 100644 benchmark/tpcds/sf1-delta/q49.benchmark create mode 100644 benchmark/tpcds/sf1-delta/q50.benchmark create mode 100644 benchmark/tpcds/sf1-delta/q51.benchmark create mode 100644 benchmark/tpcds/sf1-delta/q52.benchmark create mode 100644 benchmark/tpcds/sf1-delta/q53.benchmark create mode 100644 benchmark/tpcds/sf1-delta/q54.benchmark create mode 100644 benchmark/tpcds/sf1-delta/q55.benchmark create mode 100644 benchmark/tpcds/sf1-delta/q56.benchmark create mode 100644 benchmark/tpcds/sf1-delta/q57.benchmark create mode 100644 benchmark/tpcds/sf1-delta/q58.benchmark create mode 100644 benchmark/tpcds/sf1-delta/q59.benchmark create mode 100644 benchmark/tpcds/sf1-delta/q60.benchmark create mode 100644 benchmark/tpcds/sf1-delta/q61.benchmark create mode 100644 benchmark/tpcds/sf1-delta/q62.benchmark create mode 100644 benchmark/tpcds/sf1-delta/q63.benchmark create mode 100644 benchmark/tpcds/sf1-delta/q64.benchmark create mode 100644 benchmark/tpcds/sf1-delta/q65.benchmark create mode 100644 benchmark/tpcds/sf1-delta/q66.benchmark create mode 100644 benchmark/tpcds/sf1-delta/q67.benchmark create mode 100644 benchmark/tpcds/sf1-delta/q68.benchmark create mode 100644 benchmark/tpcds/sf1-delta/q69.benchmark create mode 100644 benchmark/tpcds/sf1-delta/q70.benchmark create mode 100644 benchmark/tpcds/sf1-delta/q71.benchmark create mode 100644 benchmark/tpcds/sf1-delta/q72.benchmark create mode 100644 benchmark/tpcds/sf1-delta/q73.benchmark create mode 100644 benchmark/tpcds/sf1-delta/q74.benchmark create mode 100644 benchmark/tpcds/sf1-delta/q75.benchmark create mode 100644 benchmark/tpcds/sf1-delta/q76.benchmark create mode 100644 benchmark/tpcds/sf1-delta/q77.benchmark create mode 100644 benchmark/tpcds/sf1-delta/q78.benchmark create mode 100644 benchmark/tpcds/sf1-delta/q79.benchmark create mode 100644 benchmark/tpcds/sf1-delta/q80.benchmark create mode 100644 benchmark/tpcds/sf1-delta/q81.benchmark create mode 100644 benchmark/tpcds/sf1-delta/q82.benchmark create mode 100644 benchmark/tpcds/sf1-delta/q83.benchmark create mode 100644 benchmark/tpcds/sf1-delta/q84.benchmark create mode 100644 benchmark/tpcds/sf1-delta/q85.benchmark create mode 100644 benchmark/tpcds/sf1-delta/q86.benchmark create mode 100644 benchmark/tpcds/sf1-delta/q87.benchmark create mode 100644 benchmark/tpcds/sf1-delta/q88.benchmark create mode 100644 benchmark/tpcds/sf1-delta/q89.benchmark create mode 100644 benchmark/tpcds/sf1-delta/q90.benchmark create mode 100644 benchmark/tpcds/sf1-delta/q91.benchmark create mode 100644 benchmark/tpcds/sf1-delta/q92.benchmark create mode 100644 benchmark/tpcds/sf1-delta/q93.benchmark create mode 100644 benchmark/tpcds/sf1-delta/q94.benchmark create mode 100644 benchmark/tpcds/sf1-delta/q95.benchmark create mode 100644 benchmark/tpcds/sf1-delta/q96.benchmark create mode 100644 benchmark/tpcds/sf1-delta/q97.benchmark create mode 100644 benchmark/tpcds/sf1-delta/q98.benchmark create mode 100644 benchmark/tpcds/sf1-delta/q99.benchmark create mode 100644 benchmark/tpcds/sf1-delta/tpcds_sf1.benchmark.in create mode 100644 benchmark/tpcds/sf1-parquet/load.sql create mode 100644 benchmark/tpcds/sf1-parquet/q01.benchmark create mode 100644 benchmark/tpcds/sf1-parquet/q02.benchmark create mode 100644 benchmark/tpcds/sf1-parquet/q03.benchmark create mode 100644 benchmark/tpcds/sf1-parquet/q04.benchmark create mode 100644 benchmark/tpcds/sf1-parquet/q05.benchmark create mode 100644 benchmark/tpcds/sf1-parquet/q06.benchmark create mode 100644 benchmark/tpcds/sf1-parquet/q07.benchmark create mode 100644 benchmark/tpcds/sf1-parquet/q08.benchmark create mode 100644 benchmark/tpcds/sf1-parquet/q09.benchmark create mode 100644 benchmark/tpcds/sf1-parquet/q10.benchmark create mode 100644 benchmark/tpcds/sf1-parquet/q11.benchmark create mode 100644 benchmark/tpcds/sf1-parquet/q12.benchmark create mode 100644 benchmark/tpcds/sf1-parquet/q13.benchmark create mode 100644 benchmark/tpcds/sf1-parquet/q14.benchmark create mode 100644 benchmark/tpcds/sf1-parquet/q15.benchmark create mode 100644 benchmark/tpcds/sf1-parquet/q16.benchmark create mode 100644 benchmark/tpcds/sf1-parquet/q17.benchmark create mode 100644 benchmark/tpcds/sf1-parquet/q18.benchmark create mode 100644 benchmark/tpcds/sf1-parquet/q19.benchmark create mode 100644 benchmark/tpcds/sf1-parquet/q20.benchmark create mode 100644 benchmark/tpcds/sf1-parquet/q21.benchmark create mode 100644 benchmark/tpcds/sf1-parquet/q22.benchmark create mode 100644 benchmark/tpcds/sf1-parquet/q23.benchmark create mode 100644 benchmark/tpcds/sf1-parquet/q24.benchmark create mode 100644 benchmark/tpcds/sf1-parquet/q25.benchmark create mode 100644 benchmark/tpcds/sf1-parquet/q26.benchmark create mode 100644 benchmark/tpcds/sf1-parquet/q27.benchmark create mode 100644 benchmark/tpcds/sf1-parquet/q28.benchmark create mode 100644 benchmark/tpcds/sf1-parquet/q29.benchmark create mode 100644 benchmark/tpcds/sf1-parquet/q30.benchmark create mode 100644 benchmark/tpcds/sf1-parquet/q31.benchmark create mode 100644 benchmark/tpcds/sf1-parquet/q32.benchmark create mode 100644 benchmark/tpcds/sf1-parquet/q33.benchmark create mode 100644 benchmark/tpcds/sf1-parquet/q34.benchmark create mode 100644 benchmark/tpcds/sf1-parquet/q35.benchmark create mode 100644 benchmark/tpcds/sf1-parquet/q36.benchmark create mode 100644 benchmark/tpcds/sf1-parquet/q37.benchmark create mode 100644 benchmark/tpcds/sf1-parquet/q38.benchmark create mode 100644 benchmark/tpcds/sf1-parquet/q39.benchmark create mode 100644 benchmark/tpcds/sf1-parquet/q40.benchmark create mode 100644 benchmark/tpcds/sf1-parquet/q41.benchmark create mode 100644 benchmark/tpcds/sf1-parquet/q42.benchmark create mode 100644 benchmark/tpcds/sf1-parquet/q43.benchmark create mode 100644 benchmark/tpcds/sf1-parquet/q44.benchmark create mode 100644 benchmark/tpcds/sf1-parquet/q45.benchmark create mode 100644 benchmark/tpcds/sf1-parquet/q46.benchmark create mode 100644 benchmark/tpcds/sf1-parquet/q47.benchmark create mode 100644 benchmark/tpcds/sf1-parquet/q48.benchmark create mode 100644 benchmark/tpcds/sf1-parquet/q49.benchmark create mode 100644 benchmark/tpcds/sf1-parquet/q50.benchmark create mode 100644 benchmark/tpcds/sf1-parquet/q51.benchmark create mode 100644 benchmark/tpcds/sf1-parquet/q52.benchmark create mode 100644 benchmark/tpcds/sf1-parquet/q53.benchmark create mode 100644 benchmark/tpcds/sf1-parquet/q54.benchmark create mode 100644 benchmark/tpcds/sf1-parquet/q55.benchmark create mode 100644 benchmark/tpcds/sf1-parquet/q56.benchmark create mode 100644 benchmark/tpcds/sf1-parquet/q57.benchmark create mode 100644 benchmark/tpcds/sf1-parquet/q58.benchmark create mode 100644 benchmark/tpcds/sf1-parquet/q59.benchmark create mode 100644 benchmark/tpcds/sf1-parquet/q60.benchmark create mode 100644 benchmark/tpcds/sf1-parquet/q61.benchmark create mode 100644 benchmark/tpcds/sf1-parquet/q62.benchmark create mode 100644 benchmark/tpcds/sf1-parquet/q63.benchmark create mode 100644 benchmark/tpcds/sf1-parquet/q64.benchmark create mode 100644 benchmark/tpcds/sf1-parquet/q65.benchmark create mode 100644 benchmark/tpcds/sf1-parquet/q66.benchmark create mode 100644 benchmark/tpcds/sf1-parquet/q67.benchmark create mode 100644 benchmark/tpcds/sf1-parquet/q68.benchmark create mode 100644 benchmark/tpcds/sf1-parquet/q69.benchmark create mode 100644 benchmark/tpcds/sf1-parquet/q70.benchmark create mode 100644 benchmark/tpcds/sf1-parquet/q71.benchmark create mode 100644 benchmark/tpcds/sf1-parquet/q72.benchmark create mode 100644 benchmark/tpcds/sf1-parquet/q73.benchmark create mode 100644 benchmark/tpcds/sf1-parquet/q74.benchmark create mode 100644 benchmark/tpcds/sf1-parquet/q75.benchmark create mode 100644 benchmark/tpcds/sf1-parquet/q76.benchmark create mode 100644 benchmark/tpcds/sf1-parquet/q77.benchmark create mode 100644 benchmark/tpcds/sf1-parquet/q78.benchmark create mode 100644 benchmark/tpcds/sf1-parquet/q79.benchmark create mode 100644 benchmark/tpcds/sf1-parquet/q80.benchmark create mode 100644 benchmark/tpcds/sf1-parquet/q81.benchmark create mode 100644 benchmark/tpcds/sf1-parquet/q82.benchmark create mode 100644 benchmark/tpcds/sf1-parquet/q83.benchmark create mode 100644 benchmark/tpcds/sf1-parquet/q84.benchmark create mode 100644 benchmark/tpcds/sf1-parquet/q85.benchmark create mode 100644 benchmark/tpcds/sf1-parquet/q86.benchmark create mode 100644 benchmark/tpcds/sf1-parquet/q87.benchmark create mode 100644 benchmark/tpcds/sf1-parquet/q88.benchmark create mode 100644 benchmark/tpcds/sf1-parquet/q89.benchmark create mode 100644 benchmark/tpcds/sf1-parquet/q90.benchmark create mode 100644 benchmark/tpcds/sf1-parquet/q91.benchmark create mode 100644 benchmark/tpcds/sf1-parquet/q92.benchmark create mode 100644 benchmark/tpcds/sf1-parquet/q93.benchmark create mode 100644 benchmark/tpcds/sf1-parquet/q94.benchmark create mode 100644 benchmark/tpcds/sf1-parquet/q95.benchmark create mode 100644 benchmark/tpcds/sf1-parquet/q96.benchmark create mode 100644 benchmark/tpcds/sf1-parquet/q97.benchmark create mode 100644 benchmark/tpcds/sf1-parquet/q98.benchmark create mode 100644 benchmark/tpcds/sf1-parquet/q99.benchmark create mode 100644 benchmark/tpcds/sf1-parquet/tpcds_sf1.benchmark.in create mode 100644 benchmark/tpch/sf1-delta-remote/load.sql create mode 100644 benchmark/tpch/sf1-delta-remote/q01.benchmark create mode 100644 benchmark/tpch/sf1-delta-remote/q02.benchmark create mode 100644 benchmark/tpch/sf1-delta-remote/q03.benchmark create mode 100644 benchmark/tpch/sf1-delta-remote/q04.benchmark create mode 100644 benchmark/tpch/sf1-delta-remote/q05.benchmark create mode 100644 benchmark/tpch/sf1-delta-remote/q06.benchmark create mode 100644 benchmark/tpch/sf1-delta-remote/q07.benchmark create mode 100644 benchmark/tpch/sf1-delta-remote/q08.benchmark create mode 100644 benchmark/tpch/sf1-delta-remote/q09.benchmark create mode 100644 benchmark/tpch/sf1-delta-remote/q10.benchmark create mode 100644 benchmark/tpch/sf1-delta-remote/q11.benchmark create mode 100644 benchmark/tpch/sf1-delta-remote/q12.benchmark create mode 100644 benchmark/tpch/sf1-delta-remote/q13.benchmark create mode 100644 benchmark/tpch/sf1-delta-remote/q14.benchmark create mode 100644 benchmark/tpch/sf1-delta-remote/q15.benchmark create mode 100644 benchmark/tpch/sf1-delta-remote/q16.benchmark create mode 100644 benchmark/tpch/sf1-delta-remote/q17.benchmark create mode 100644 benchmark/tpch/sf1-delta-remote/q18.benchmark create mode 100644 benchmark/tpch/sf1-delta-remote/q19.benchmark create mode 100644 benchmark/tpch/sf1-delta-remote/q20.benchmark create mode 100644 benchmark/tpch/sf1-delta-remote/q21.benchmark create mode 100644 benchmark/tpch/sf1-delta-remote/q22.benchmark create mode 100644 benchmark/tpch/sf1-delta-remote/tpch_sf1_delta.benchmark.in create mode 100644 benchmark/tpch/sf1-delta/load.sql create mode 100644 benchmark/tpch/sf1-delta/q01.benchmark create mode 100644 benchmark/tpch/sf1-delta/q02.benchmark create mode 100644 benchmark/tpch/sf1-delta/q03.benchmark create mode 100644 benchmark/tpch/sf1-delta/q04.benchmark create mode 100644 benchmark/tpch/sf1-delta/q05.benchmark create mode 100644 benchmark/tpch/sf1-delta/q06.benchmark create mode 100644 benchmark/tpch/sf1-delta/q07.benchmark create mode 100644 benchmark/tpch/sf1-delta/q08.benchmark create mode 100644 benchmark/tpch/sf1-delta/q09.benchmark create mode 100644 benchmark/tpch/sf1-delta/q10.benchmark create mode 100644 benchmark/tpch/sf1-delta/q11.benchmark create mode 100644 benchmark/tpch/sf1-delta/q12.benchmark create mode 100644 benchmark/tpch/sf1-delta/q13.benchmark create mode 100644 benchmark/tpch/sf1-delta/q14.benchmark create mode 100644 benchmark/tpch/sf1-delta/q15.benchmark create mode 100644 benchmark/tpch/sf1-delta/q16.benchmark create mode 100644 benchmark/tpch/sf1-delta/q17.benchmark create mode 100644 benchmark/tpch/sf1-delta/q18.benchmark create mode 100644 benchmark/tpch/sf1-delta/q19.benchmark create mode 100644 benchmark/tpch/sf1-delta/q20.benchmark create mode 100644 benchmark/tpch/sf1-delta/q21.benchmark create mode 100644 benchmark/tpch/sf1-delta/q22.benchmark create mode 100644 benchmark/tpch/sf1-delta/tpch_sf1_delta.benchmark.in create mode 100644 benchmark/tpch/sf1-parquet-remote/load.sql create mode 100644 benchmark/tpch/sf1-parquet-remote/q01.benchmark create mode 100644 benchmark/tpch/sf1-parquet-remote/q02.benchmark create mode 100644 benchmark/tpch/sf1-parquet-remote/q03.benchmark create mode 100644 benchmark/tpch/sf1-parquet-remote/q04.benchmark create mode 100644 benchmark/tpch/sf1-parquet-remote/q05.benchmark create mode 100644 benchmark/tpch/sf1-parquet-remote/q06.benchmark create mode 100644 benchmark/tpch/sf1-parquet-remote/q07.benchmark create mode 100644 benchmark/tpch/sf1-parquet-remote/q08.benchmark create mode 100644 benchmark/tpch/sf1-parquet-remote/q09.benchmark create mode 100644 benchmark/tpch/sf1-parquet-remote/q10.benchmark create mode 100644 benchmark/tpch/sf1-parquet-remote/q11.benchmark create mode 100644 benchmark/tpch/sf1-parquet-remote/q12.benchmark create mode 100644 benchmark/tpch/sf1-parquet-remote/q13.benchmark create mode 100644 benchmark/tpch/sf1-parquet-remote/q14.benchmark create mode 100644 benchmark/tpch/sf1-parquet-remote/q15.benchmark create mode 100644 benchmark/tpch/sf1-parquet-remote/q16.benchmark create mode 100644 benchmark/tpch/sf1-parquet-remote/q17.benchmark create mode 100644 benchmark/tpch/sf1-parquet-remote/q18.benchmark create mode 100644 benchmark/tpch/sf1-parquet-remote/q19.benchmark create mode 100644 benchmark/tpch/sf1-parquet-remote/q20.benchmark create mode 100644 benchmark/tpch/sf1-parquet-remote/q21.benchmark create mode 100644 benchmark/tpch/sf1-parquet-remote/q22.benchmark create mode 100644 benchmark/tpch/sf1-parquet-remote/tpch_sf1_delta.benchmark.in create mode 100644 scripts/plot.py diff --git a/.gitignore b/.gitignore index 31bc287..bc1caa6 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,6 @@ build +benchmark_results +duckdb_benchmark_data/ .idea cmake-build-debug duckdb_unittest_tempdir/ diff --git a/Makefile b/Makefile index 78144e6..defa0b6 100644 --- a/Makefile +++ b/Makefile @@ -11,6 +11,9 @@ test_release: export DAT_PATH=./build/release/rust/src/delta_kernel/acceptance/t test_debug: export DELTA_KERNEL_TESTS_PATH=./build/debug/rust/src/delta_kernel/kernel/tests/data test_debug: export DAT_PATH=./build/debug/rust/src/delta_kernel/acceptance/tests/dat +# Include the Makefile from the benchmark directory +include benchmark/benchmark.Makefile + # Include the Makefile from extension-ci-tools include extension-ci-tools/makefiles/duckdb_extension.Makefile diff --git a/benchmark/README.md b/benchmark/README.md new file mode 100644 index 0000000..6ff3801 --- /dev/null +++ b/benchmark/README.md @@ -0,0 +1,27 @@ +# Benchmarking the Delta Extension + +## Basics +A primitive benchmarking suite exists for the Delta extension. + +To run the benchmarks, firstly run the build using: +```shell +BUILD_BENCHMARK=1 make +``` + +Then to run a benchmark, use one of the benchmark Makefile targets prefixed with `bench-run-`: +```shell +make bench-run-tpch-sf1 +``` +Now the TPCH benchmark will be run twice, once on parquet files and once on a delta table. + +To create a plot from the results run: +```shell +make plot +``` + +## Configurations options +Specific benchmarks can be run from a suite using the `BENCHMARK_PATTERN` variable. For example to compare +only Q01 from TPCH SF1, run: +```shell +BENCHMARK_PATTERN=q01.benchmark make bench-run-tpch-sf1 +``` \ No newline at end of file diff --git a/benchmark/benchmark.Makefile b/benchmark/benchmark.Makefile new file mode 100644 index 0000000..3b88eb7 --- /dev/null +++ b/benchmark/benchmark.Makefile @@ -0,0 +1,65 @@ +# Set this flag during building to enable the benchmark runner +ifeq (${BUILD_BENCHMARK}, 1) + TOOLCHAIN_FLAGS:=${TOOLCHAIN_FLAGS} -DBUILD_BENCHMARKS=1 +endif + +ifeq ("${BENCHMARK_PATTERN}", "") + BENCHMARK_PATTERN:=.* +endif + +bench-output-dir: + mkdir -p benchmark_results + +clean_benchmark: + rm -rf benchmark_results + +plot: + python3 scripts/plot.py + + +############### BENCHMARK TARGETS ############### + +### +# TPCH LOCAL +### + +# TPCH SF1 on delta table +bench-run-tpch-sf1-delta: bench-output-dir + ./build/release/benchmark/benchmark_runner --root-dir './' 'benchmark/tpch/sf1-delta/${BENCHMARK_PATTERN}' &> benchmark_results/tpch-sf1-delta.csv +# TPCH SF1 on parquet files +bench-run-tpch-sf1-parquet: bench-output-dir + ./build/release/benchmark/benchmark_runner 'benchmark/tpch/sf1-parquet/${BENCHMARK_PATTERN}' &> benchmark_results/tpch-sf1-parquet.csv +# COMPARES TPCH SF1 on parquet file vs on delta files +bench-run-tpch-sf1: bench-run-tpch-sf1-delta bench-run-tpch-sf1-parquet + +### +# TPCH REMOTE +### + +# TPCH on remote delta table (set BENCHMARK_DATA_S3_LINEITEM_SF1) +bench-run-tpch-sf1-remote-delta: bench-output-dir + ./build/release/benchmark/benchmark_runner --root-dir './' 'benchmark/tpch/sf1-delta-remote/${BENCHMARK_PATTERN}' &> benchmark_results/tpch-sf1-remote-delta.csv +# TPCH on remote parquet table (set BENCHMARK_DATA_S3_LINEITEM_SF1) +bench-run-tpch-sf1-remote-parquet: bench-output-dir + ./build/release/benchmark/benchmark_runner --root-dir './' 'benchmark/tpch/sf1-parquet-remote/${BENCHMARK_PATTERN}' &> benchmark_results/tpch-sf1-remote-parquet.csv +# COMPARES TPCH SF1 on parquet file vs on delta files +bench-run-tpch-sf1-remote: bench-run-tpch-sf1-remote-parquet bench-run-tpch-sf1-remote-delta + +### +# TPCDS LOCAL +### + +# TPCDS SF1 on delta table +bench-run-tpcds-sf1-delta: bench-output-dir + ./build/release/benchmark/benchmark_runner --root-dir './' 'benchmark/tpcds/sf1-delta/${BENCHMARK_PATTERN}' &> benchmark_results/tpcds-sf1-delta.csv +# TPCDS SF1 on parquet files +bench-run-tpcds-sf1-parquet: bench-output-dir + ./build/release/benchmark/benchmark_runner --root-dir './' 'benchmark/tpcds/sf1-parquet/${BENCHMARK_PATTERN}' &> benchmark_results/tpcds-sf1-parquet.csv +# COMPARES TPCDS SF1 on parquet file vs on delta files +bench-run-tpcds-sf1: bench-run-tpcds-sf1-delta bench-run-tpcds-sf1-parquet + +### +# ALL +### +bench-run-all-local: bench-run-tpcds-sf1 bench-run-tpch-sf1 + diff --git a/benchmark/tpcds/sf1-delta/load.sql b/benchmark/tpcds/sf1-delta/load.sql new file mode 100644 index 0000000..c482618 --- /dev/null +++ b/benchmark/tpcds/sf1-delta/load.sql @@ -0,0 +1,24 @@ +create view call_center as from delta_scan('./data/generated/tpcds_sf1/call_center/delta_lake'); +create view catalog_page as from delta_scan('./data/generated/tpcds_sf1/catalog_page/delta_lake'); +create view catalog_returns as from delta_scan('./data/generated/tpcds_sf1/catalog_returns/delta_lake'); +create view catalog_sales as from delta_scan('./data/generated/tpcds_sf1/catalog_sales/delta_lake'); +create view customer as from delta_scan('./data/generated/tpcds_sf1/customer/delta_lake'); +create view customer_demographics as from delta_scan('./data/generated/tpcds_sf1/customer_demographics/delta_lake'); +create view customer_address as from delta_scan('./data/generated/tpcds_sf1/customer_address/delta_lake'); +create view date_dim as from delta_scan('./data/generated/tpcds_sf1/date_dim/delta_lake'); +create view household_demographics as from delta_scan('./data/generated/tpcds_sf1/household_demographics/delta_lake'); +create view inventory as from delta_scan('./data/generated/tpcds_sf1/inventory/delta_lake'); +create view income_band as from delta_scan('./data/generated/tpcds_sf1/income_band/delta_lake'); +create view item as from delta_scan('./data/generated/tpcds_sf1/item/delta_lake'); +create view promotion as from delta_scan('./data/generated/tpcds_sf1/promotion/delta_lake'); +create view reason as from delta_scan('./data/generated/tpcds_sf1/reason/delta_lake'); +create view ship_mode as from delta_scan('./data/generated/tpcds_sf1/ship_mode/delta_lake'); +create view store as from delta_scan('./data/generated/tpcds_sf1/store/delta_lake'); +create view store_returns as from delta_scan('./data/generated/tpcds_sf1/store_returns/delta_lake'); +create view store_sales as from delta_scan('./data/generated/tpcds_sf1/store_sales/delta_lake'); +create view time_dim as from delta_scan('./data/generated/tpcds_sf1/time_dim/delta_lake'); +create view warehouse as from delta_scan('./data/generated/tpcds_sf1/warehouse/delta_lake'); +create view web_page as from delta_scan('./data/generated/tpcds_sf1/web_page/delta_lake'); +create view web_returns as from delta_scan('./data/generated/tpcds_sf1/web_returns/delta_lake'); +create view web_sales as from delta_scan('./data/generated/tpcds_sf1/web_sales/delta_lake'); +create view web_site as from delta_scan('./data/generated/tpcds_sf1/web_site/delta_lake'); \ No newline at end of file diff --git a/benchmark/tpcds/sf1-delta/q01.benchmark b/benchmark/tpcds/sf1-delta/q01.benchmark new file mode 100644 index 0000000..4afa256 --- /dev/null +++ b/benchmark/tpcds/sf1-delta/q01.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/q01.benchmark +# description: Run query 01 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1-delta/tpcds_sf1.benchmark.in +QUERY_NUMBER=1 +QUERY_NUMBER_PADDED=01 diff --git a/benchmark/tpcds/sf1-delta/q02.benchmark b/benchmark/tpcds/sf1-delta/q02.benchmark new file mode 100644 index 0000000..260f65a --- /dev/null +++ b/benchmark/tpcds/sf1-delta/q02.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/q02.benchmark +# description: Run query 02 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1-delta/tpcds_sf1.benchmark.in +QUERY_NUMBER=2 +QUERY_NUMBER_PADDED=02 diff --git a/benchmark/tpcds/sf1-delta/q03.benchmark b/benchmark/tpcds/sf1-delta/q03.benchmark new file mode 100644 index 0000000..06c4bf0 --- /dev/null +++ b/benchmark/tpcds/sf1-delta/q03.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/q03.benchmark +# description: Run query 03 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1-delta/tpcds_sf1.benchmark.in +QUERY_NUMBER=3 +QUERY_NUMBER_PADDED=03 diff --git a/benchmark/tpcds/sf1-delta/q04.benchmark b/benchmark/tpcds/sf1-delta/q04.benchmark new file mode 100644 index 0000000..ab13a2a --- /dev/null +++ b/benchmark/tpcds/sf1-delta/q04.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/q04.benchmark +# description: Run query 04 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1-delta/tpcds_sf1.benchmark.in +QUERY_NUMBER=4 +QUERY_NUMBER_PADDED=04 diff --git a/benchmark/tpcds/sf1-delta/q05.benchmark b/benchmark/tpcds/sf1-delta/q05.benchmark new file mode 100644 index 0000000..583f4bc --- /dev/null +++ b/benchmark/tpcds/sf1-delta/q05.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/q05.benchmark +# description: Run query 05 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1-delta/tpcds_sf1.benchmark.in +QUERY_NUMBER=5 +QUERY_NUMBER_PADDED=05 diff --git a/benchmark/tpcds/sf1-delta/q06.benchmark b/benchmark/tpcds/sf1-delta/q06.benchmark new file mode 100644 index 0000000..17a5d62 --- /dev/null +++ b/benchmark/tpcds/sf1-delta/q06.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/q06.benchmark +# description: Run query 06 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1-delta/tpcds_sf1.benchmark.in +QUERY_NUMBER=6 +QUERY_NUMBER_PADDED=06 diff --git a/benchmark/tpcds/sf1-delta/q07.benchmark b/benchmark/tpcds/sf1-delta/q07.benchmark new file mode 100644 index 0000000..aa54538 --- /dev/null +++ b/benchmark/tpcds/sf1-delta/q07.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/q07.benchmark +# description: Run query 07 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1-delta/tpcds_sf1.benchmark.in +QUERY_NUMBER=7 +QUERY_NUMBER_PADDED=07 diff --git a/benchmark/tpcds/sf1-delta/q08.benchmark b/benchmark/tpcds/sf1-delta/q08.benchmark new file mode 100644 index 0000000..655ac7f --- /dev/null +++ b/benchmark/tpcds/sf1-delta/q08.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/q08.benchmark +# description: Run query 08 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1-delta/tpcds_sf1.benchmark.in +QUERY_NUMBER=8 +QUERY_NUMBER_PADDED=08 diff --git a/benchmark/tpcds/sf1-delta/q09.benchmark b/benchmark/tpcds/sf1-delta/q09.benchmark new file mode 100644 index 0000000..c0bef34 --- /dev/null +++ b/benchmark/tpcds/sf1-delta/q09.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/q09.benchmark +# description: Run query 09 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1-delta/tpcds_sf1.benchmark.in +QUERY_NUMBER=9 +QUERY_NUMBER_PADDED=09 diff --git a/benchmark/tpcds/sf1-delta/q10.benchmark b/benchmark/tpcds/sf1-delta/q10.benchmark new file mode 100644 index 0000000..c99f19e --- /dev/null +++ b/benchmark/tpcds/sf1-delta/q10.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/q10.benchmark +# description: Run query 10 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1-delta/tpcds_sf1.benchmark.in +QUERY_NUMBER=10 +QUERY_NUMBER_PADDED=10 diff --git a/benchmark/tpcds/sf1-delta/q11.benchmark b/benchmark/tpcds/sf1-delta/q11.benchmark new file mode 100644 index 0000000..bddc394 --- /dev/null +++ b/benchmark/tpcds/sf1-delta/q11.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/q11.benchmark +# description: Run query 11 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1-delta/tpcds_sf1.benchmark.in +QUERY_NUMBER=11 +QUERY_NUMBER_PADDED=11 diff --git a/benchmark/tpcds/sf1-delta/q12.benchmark b/benchmark/tpcds/sf1-delta/q12.benchmark new file mode 100644 index 0000000..475d6c8 --- /dev/null +++ b/benchmark/tpcds/sf1-delta/q12.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/q12.benchmark +# description: Run query 12 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1-delta/tpcds_sf1.benchmark.in +QUERY_NUMBER=12 +QUERY_NUMBER_PADDED=12 diff --git a/benchmark/tpcds/sf1-delta/q13.benchmark b/benchmark/tpcds/sf1-delta/q13.benchmark new file mode 100644 index 0000000..a6915d0 --- /dev/null +++ b/benchmark/tpcds/sf1-delta/q13.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/q13.benchmark +# description: Run query 13 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1-delta/tpcds_sf1.benchmark.in +QUERY_NUMBER=13 +QUERY_NUMBER_PADDED=13 diff --git a/benchmark/tpcds/sf1-delta/q14.benchmark b/benchmark/tpcds/sf1-delta/q14.benchmark new file mode 100644 index 0000000..eec9b70 --- /dev/null +++ b/benchmark/tpcds/sf1-delta/q14.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/q14.benchmark +# description: Run query 14 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1-delta/tpcds_sf1.benchmark.in +QUERY_NUMBER=14 +QUERY_NUMBER_PADDED=14 diff --git a/benchmark/tpcds/sf1-delta/q15.benchmark b/benchmark/tpcds/sf1-delta/q15.benchmark new file mode 100644 index 0000000..9f44d26 --- /dev/null +++ b/benchmark/tpcds/sf1-delta/q15.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/q15.benchmark +# description: Run query 15 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1-delta/tpcds_sf1.benchmark.in +QUERY_NUMBER=15 +QUERY_NUMBER_PADDED=15 diff --git a/benchmark/tpcds/sf1-delta/q16.benchmark b/benchmark/tpcds/sf1-delta/q16.benchmark new file mode 100644 index 0000000..b76f510 --- /dev/null +++ b/benchmark/tpcds/sf1-delta/q16.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/q16.benchmark +# description: Run query 16 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1-delta/tpcds_sf1.benchmark.in +QUERY_NUMBER=16 +QUERY_NUMBER_PADDED=16 diff --git a/benchmark/tpcds/sf1-delta/q17.benchmark b/benchmark/tpcds/sf1-delta/q17.benchmark new file mode 100644 index 0000000..46fa716 --- /dev/null +++ b/benchmark/tpcds/sf1-delta/q17.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/q17.benchmark +# description: Run query 17 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1-delta/tpcds_sf1.benchmark.in +QUERY_NUMBER=17 +QUERY_NUMBER_PADDED=17 diff --git a/benchmark/tpcds/sf1-delta/q18.benchmark b/benchmark/tpcds/sf1-delta/q18.benchmark new file mode 100644 index 0000000..e7cebaa --- /dev/null +++ b/benchmark/tpcds/sf1-delta/q18.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/q18.benchmark +# description: Run query 18 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1-delta/tpcds_sf1.benchmark.in +QUERY_NUMBER=18 +QUERY_NUMBER_PADDED=18 diff --git a/benchmark/tpcds/sf1-delta/q19.benchmark b/benchmark/tpcds/sf1-delta/q19.benchmark new file mode 100644 index 0000000..88d96c6 --- /dev/null +++ b/benchmark/tpcds/sf1-delta/q19.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/q19.benchmark +# description: Run query 19 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1-delta/tpcds_sf1.benchmark.in +QUERY_NUMBER=19 +QUERY_NUMBER_PADDED=19 diff --git a/benchmark/tpcds/sf1-delta/q20.benchmark b/benchmark/tpcds/sf1-delta/q20.benchmark new file mode 100644 index 0000000..aa01979 --- /dev/null +++ b/benchmark/tpcds/sf1-delta/q20.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/q20.benchmark +# description: Run query 20 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1-delta/tpcds_sf1.benchmark.in +QUERY_NUMBER=20 +QUERY_NUMBER_PADDED=20 diff --git a/benchmark/tpcds/sf1-delta/q21.benchmark b/benchmark/tpcds/sf1-delta/q21.benchmark new file mode 100644 index 0000000..dbe5a17 --- /dev/null +++ b/benchmark/tpcds/sf1-delta/q21.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/q21.benchmark +# description: Run query 21 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1-delta/tpcds_sf1.benchmark.in +QUERY_NUMBER=21 +QUERY_NUMBER_PADDED=21 diff --git a/benchmark/tpcds/sf1-delta/q22.benchmark b/benchmark/tpcds/sf1-delta/q22.benchmark new file mode 100644 index 0000000..486db66 --- /dev/null +++ b/benchmark/tpcds/sf1-delta/q22.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/q22.benchmark +# description: Run query 22 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1-delta/tpcds_sf1.benchmark.in +QUERY_NUMBER=22 +QUERY_NUMBER_PADDED=22 diff --git a/benchmark/tpcds/sf1-delta/q23.benchmark b/benchmark/tpcds/sf1-delta/q23.benchmark new file mode 100644 index 0000000..695da9a --- /dev/null +++ b/benchmark/tpcds/sf1-delta/q23.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/q23.benchmark +# description: Run query 23 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1-delta/tpcds_sf1.benchmark.in +QUERY_NUMBER=23 +QUERY_NUMBER_PADDED=23 diff --git a/benchmark/tpcds/sf1-delta/q24.benchmark b/benchmark/tpcds/sf1-delta/q24.benchmark new file mode 100644 index 0000000..e12cffc --- /dev/null +++ b/benchmark/tpcds/sf1-delta/q24.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/q24.benchmark +# description: Run query 24 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1-delta/tpcds_sf1.benchmark.in +QUERY_NUMBER=24 +QUERY_NUMBER_PADDED=24 diff --git a/benchmark/tpcds/sf1-delta/q25.benchmark b/benchmark/tpcds/sf1-delta/q25.benchmark new file mode 100644 index 0000000..988426f --- /dev/null +++ b/benchmark/tpcds/sf1-delta/q25.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/q25.benchmark +# description: Run query 25 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1-delta/tpcds_sf1.benchmark.in +QUERY_NUMBER=25 +QUERY_NUMBER_PADDED=25 diff --git a/benchmark/tpcds/sf1-delta/q26.benchmark b/benchmark/tpcds/sf1-delta/q26.benchmark new file mode 100644 index 0000000..a475e11 --- /dev/null +++ b/benchmark/tpcds/sf1-delta/q26.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/q26.benchmark +# description: Run query 26 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1-delta/tpcds_sf1.benchmark.in +QUERY_NUMBER=26 +QUERY_NUMBER_PADDED=26 diff --git a/benchmark/tpcds/sf1-delta/q27.benchmark b/benchmark/tpcds/sf1-delta/q27.benchmark new file mode 100644 index 0000000..3474886 --- /dev/null +++ b/benchmark/tpcds/sf1-delta/q27.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/q27.benchmark +# description: Run query 27 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1-delta/tpcds_sf1.benchmark.in +QUERY_NUMBER=27 +QUERY_NUMBER_PADDED=27 diff --git a/benchmark/tpcds/sf1-delta/q28.benchmark b/benchmark/tpcds/sf1-delta/q28.benchmark new file mode 100644 index 0000000..9dea5b9 --- /dev/null +++ b/benchmark/tpcds/sf1-delta/q28.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/q28.benchmark +# description: Run query 28 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1-delta/tpcds_sf1.benchmark.in +QUERY_NUMBER=28 +QUERY_NUMBER_PADDED=28 diff --git a/benchmark/tpcds/sf1-delta/q29.benchmark b/benchmark/tpcds/sf1-delta/q29.benchmark new file mode 100644 index 0000000..22247ea --- /dev/null +++ b/benchmark/tpcds/sf1-delta/q29.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/q29.benchmark +# description: Run query 29 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1-delta/tpcds_sf1.benchmark.in +QUERY_NUMBER=29 +QUERY_NUMBER_PADDED=29 diff --git a/benchmark/tpcds/sf1-delta/q30.benchmark b/benchmark/tpcds/sf1-delta/q30.benchmark new file mode 100644 index 0000000..b41d4ad --- /dev/null +++ b/benchmark/tpcds/sf1-delta/q30.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/q30.benchmark +# description: Run query 30 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1-delta/tpcds_sf1.benchmark.in +QUERY_NUMBER=30 +QUERY_NUMBER_PADDED=30 diff --git a/benchmark/tpcds/sf1-delta/q31.benchmark b/benchmark/tpcds/sf1-delta/q31.benchmark new file mode 100644 index 0000000..92a0306 --- /dev/null +++ b/benchmark/tpcds/sf1-delta/q31.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/q31.benchmark +# description: Run query 31 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1-delta/tpcds_sf1.benchmark.in +QUERY_NUMBER=31 +QUERY_NUMBER_PADDED=31 diff --git a/benchmark/tpcds/sf1-delta/q32.benchmark b/benchmark/tpcds/sf1-delta/q32.benchmark new file mode 100644 index 0000000..57ebee9 --- /dev/null +++ b/benchmark/tpcds/sf1-delta/q32.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/q32.benchmark +# description: Run query 32 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1-delta/tpcds_sf1.benchmark.in +QUERY_NUMBER=32 +QUERY_NUMBER_PADDED=32 diff --git a/benchmark/tpcds/sf1-delta/q33.benchmark b/benchmark/tpcds/sf1-delta/q33.benchmark new file mode 100644 index 0000000..32c0479 --- /dev/null +++ b/benchmark/tpcds/sf1-delta/q33.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/q33.benchmark +# description: Run query 33 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1-delta/tpcds_sf1.benchmark.in +QUERY_NUMBER=33 +QUERY_NUMBER_PADDED=33 diff --git a/benchmark/tpcds/sf1-delta/q34.benchmark b/benchmark/tpcds/sf1-delta/q34.benchmark new file mode 100644 index 0000000..0a034c4 --- /dev/null +++ b/benchmark/tpcds/sf1-delta/q34.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/q34.benchmark +# description: Run query 34 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1-delta/tpcds_sf1.benchmark.in +QUERY_NUMBER=34 +QUERY_NUMBER_PADDED=34 diff --git a/benchmark/tpcds/sf1-delta/q35.benchmark b/benchmark/tpcds/sf1-delta/q35.benchmark new file mode 100644 index 0000000..7ee5bb2 --- /dev/null +++ b/benchmark/tpcds/sf1-delta/q35.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/q35.benchmark +# description: Run query 35 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1-delta/tpcds_sf1.benchmark.in +QUERY_NUMBER=35 +QUERY_NUMBER_PADDED=35 diff --git a/benchmark/tpcds/sf1-delta/q36.benchmark b/benchmark/tpcds/sf1-delta/q36.benchmark new file mode 100644 index 0000000..f7b3fc0 --- /dev/null +++ b/benchmark/tpcds/sf1-delta/q36.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/q36.benchmark +# description: Run query 36 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1-delta/tpcds_sf1.benchmark.in +QUERY_NUMBER=36 +QUERY_NUMBER_PADDED=36 diff --git a/benchmark/tpcds/sf1-delta/q37.benchmark b/benchmark/tpcds/sf1-delta/q37.benchmark new file mode 100644 index 0000000..e482a3d --- /dev/null +++ b/benchmark/tpcds/sf1-delta/q37.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/q37.benchmark +# description: Run query 37 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1-delta/tpcds_sf1.benchmark.in +QUERY_NUMBER=37 +QUERY_NUMBER_PADDED=37 diff --git a/benchmark/tpcds/sf1-delta/q38.benchmark b/benchmark/tpcds/sf1-delta/q38.benchmark new file mode 100644 index 0000000..120d4d1 --- /dev/null +++ b/benchmark/tpcds/sf1-delta/q38.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/q38.benchmark +# description: Run query 38 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1-delta/tpcds_sf1.benchmark.in +QUERY_NUMBER=38 +QUERY_NUMBER_PADDED=38 diff --git a/benchmark/tpcds/sf1-delta/q39.benchmark b/benchmark/tpcds/sf1-delta/q39.benchmark new file mode 100644 index 0000000..9216a08 --- /dev/null +++ b/benchmark/tpcds/sf1-delta/q39.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/q39.benchmark +# description: Run query 39 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1-delta/tpcds_sf1.benchmark.in +QUERY_NUMBER=39 +QUERY_NUMBER_PADDED=39 diff --git a/benchmark/tpcds/sf1-delta/q40.benchmark b/benchmark/tpcds/sf1-delta/q40.benchmark new file mode 100644 index 0000000..7b78045 --- /dev/null +++ b/benchmark/tpcds/sf1-delta/q40.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/q40.benchmark +# description: Run query 40 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1-delta/tpcds_sf1.benchmark.in +QUERY_NUMBER=40 +QUERY_NUMBER_PADDED=40 diff --git a/benchmark/tpcds/sf1-delta/q41.benchmark b/benchmark/tpcds/sf1-delta/q41.benchmark new file mode 100644 index 0000000..03c041f --- /dev/null +++ b/benchmark/tpcds/sf1-delta/q41.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/q41.benchmark +# description: Run query 41 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1-delta/tpcds_sf1.benchmark.in +QUERY_NUMBER=41 +QUERY_NUMBER_PADDED=41 diff --git a/benchmark/tpcds/sf1-delta/q42.benchmark b/benchmark/tpcds/sf1-delta/q42.benchmark new file mode 100644 index 0000000..80eabea --- /dev/null +++ b/benchmark/tpcds/sf1-delta/q42.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/q42.benchmark +# description: Run query 42 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1-delta/tpcds_sf1.benchmark.in +QUERY_NUMBER=42 +QUERY_NUMBER_PADDED=42 diff --git a/benchmark/tpcds/sf1-delta/q43.benchmark b/benchmark/tpcds/sf1-delta/q43.benchmark new file mode 100644 index 0000000..2b0e27a --- /dev/null +++ b/benchmark/tpcds/sf1-delta/q43.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/q43.benchmark +# description: Run query 43 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1-delta/tpcds_sf1.benchmark.in +QUERY_NUMBER=43 +QUERY_NUMBER_PADDED=43 diff --git a/benchmark/tpcds/sf1-delta/q44.benchmark b/benchmark/tpcds/sf1-delta/q44.benchmark new file mode 100644 index 0000000..6e2571a --- /dev/null +++ b/benchmark/tpcds/sf1-delta/q44.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/q44.benchmark +# description: Run query 44 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1-delta/tpcds_sf1.benchmark.in +QUERY_NUMBER=44 +QUERY_NUMBER_PADDED=44 diff --git a/benchmark/tpcds/sf1-delta/q45.benchmark b/benchmark/tpcds/sf1-delta/q45.benchmark new file mode 100644 index 0000000..bc733aa --- /dev/null +++ b/benchmark/tpcds/sf1-delta/q45.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/q45.benchmark +# description: Run query 45 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1-delta/tpcds_sf1.benchmark.in +QUERY_NUMBER=45 +QUERY_NUMBER_PADDED=45 diff --git a/benchmark/tpcds/sf1-delta/q46.benchmark b/benchmark/tpcds/sf1-delta/q46.benchmark new file mode 100644 index 0000000..338d6c2 --- /dev/null +++ b/benchmark/tpcds/sf1-delta/q46.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/q46.benchmark +# description: Run query 46 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1-delta/tpcds_sf1.benchmark.in +QUERY_NUMBER=46 +QUERY_NUMBER_PADDED=46 diff --git a/benchmark/tpcds/sf1-delta/q47.benchmark b/benchmark/tpcds/sf1-delta/q47.benchmark new file mode 100644 index 0000000..0031c36 --- /dev/null +++ b/benchmark/tpcds/sf1-delta/q47.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/q47.benchmark +# description: Run query 47 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1-delta/tpcds_sf1.benchmark.in +QUERY_NUMBER=47 +QUERY_NUMBER_PADDED=47 diff --git a/benchmark/tpcds/sf1-delta/q48.benchmark b/benchmark/tpcds/sf1-delta/q48.benchmark new file mode 100644 index 0000000..dba4376 --- /dev/null +++ b/benchmark/tpcds/sf1-delta/q48.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/q48.benchmark +# description: Run query 48 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1-delta/tpcds_sf1.benchmark.in +QUERY_NUMBER=48 +QUERY_NUMBER_PADDED=48 diff --git a/benchmark/tpcds/sf1-delta/q49.benchmark b/benchmark/tpcds/sf1-delta/q49.benchmark new file mode 100644 index 0000000..e87d59d --- /dev/null +++ b/benchmark/tpcds/sf1-delta/q49.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/q49.benchmark +# description: Run query 49 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1-delta/tpcds_sf1.benchmark.in +QUERY_NUMBER=49 +QUERY_NUMBER_PADDED=49 diff --git a/benchmark/tpcds/sf1-delta/q50.benchmark b/benchmark/tpcds/sf1-delta/q50.benchmark new file mode 100644 index 0000000..994ca01 --- /dev/null +++ b/benchmark/tpcds/sf1-delta/q50.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/q50.benchmark +# description: Run query 50 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1-delta/tpcds_sf1.benchmark.in +QUERY_NUMBER=50 +QUERY_NUMBER_PADDED=50 diff --git a/benchmark/tpcds/sf1-delta/q51.benchmark b/benchmark/tpcds/sf1-delta/q51.benchmark new file mode 100644 index 0000000..f684809 --- /dev/null +++ b/benchmark/tpcds/sf1-delta/q51.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/q51.benchmark +# description: Run query 51 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1-delta/tpcds_sf1.benchmark.in +QUERY_NUMBER=51 +QUERY_NUMBER_PADDED=51 diff --git a/benchmark/tpcds/sf1-delta/q52.benchmark b/benchmark/tpcds/sf1-delta/q52.benchmark new file mode 100644 index 0000000..d10ac68 --- /dev/null +++ b/benchmark/tpcds/sf1-delta/q52.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/q52.benchmark +# description: Run query 52 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1-delta/tpcds_sf1.benchmark.in +QUERY_NUMBER=52 +QUERY_NUMBER_PADDED=52 diff --git a/benchmark/tpcds/sf1-delta/q53.benchmark b/benchmark/tpcds/sf1-delta/q53.benchmark new file mode 100644 index 0000000..209bb6c --- /dev/null +++ b/benchmark/tpcds/sf1-delta/q53.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/q53.benchmark +# description: Run query 53 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1-delta/tpcds_sf1.benchmark.in +QUERY_NUMBER=53 +QUERY_NUMBER_PADDED=53 diff --git a/benchmark/tpcds/sf1-delta/q54.benchmark b/benchmark/tpcds/sf1-delta/q54.benchmark new file mode 100644 index 0000000..bac8abd --- /dev/null +++ b/benchmark/tpcds/sf1-delta/q54.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/q54.benchmark +# description: Run query 54 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1-delta/tpcds_sf1.benchmark.in +QUERY_NUMBER=54 +QUERY_NUMBER_PADDED=54 diff --git a/benchmark/tpcds/sf1-delta/q55.benchmark b/benchmark/tpcds/sf1-delta/q55.benchmark new file mode 100644 index 0000000..cf9cfde --- /dev/null +++ b/benchmark/tpcds/sf1-delta/q55.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/q55.benchmark +# description: Run query 55 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1-delta/tpcds_sf1.benchmark.in +QUERY_NUMBER=55 +QUERY_NUMBER_PADDED=55 diff --git a/benchmark/tpcds/sf1-delta/q56.benchmark b/benchmark/tpcds/sf1-delta/q56.benchmark new file mode 100644 index 0000000..3658451 --- /dev/null +++ b/benchmark/tpcds/sf1-delta/q56.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/q56.benchmark +# description: Run query 56 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1-delta/tpcds_sf1.benchmark.in +QUERY_NUMBER=56 +QUERY_NUMBER_PADDED=56 diff --git a/benchmark/tpcds/sf1-delta/q57.benchmark b/benchmark/tpcds/sf1-delta/q57.benchmark new file mode 100644 index 0000000..6730b17 --- /dev/null +++ b/benchmark/tpcds/sf1-delta/q57.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/q57.benchmark +# description: Run query 57 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1-delta/tpcds_sf1.benchmark.in +QUERY_NUMBER=57 +QUERY_NUMBER_PADDED=57 diff --git a/benchmark/tpcds/sf1-delta/q58.benchmark b/benchmark/tpcds/sf1-delta/q58.benchmark new file mode 100644 index 0000000..e462efb --- /dev/null +++ b/benchmark/tpcds/sf1-delta/q58.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/q58.benchmark +# description: Run query 58 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1-delta/tpcds_sf1.benchmark.in +QUERY_NUMBER=58 +QUERY_NUMBER_PADDED=58 diff --git a/benchmark/tpcds/sf1-delta/q59.benchmark b/benchmark/tpcds/sf1-delta/q59.benchmark new file mode 100644 index 0000000..5281155 --- /dev/null +++ b/benchmark/tpcds/sf1-delta/q59.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/q59.benchmark +# description: Run query 59 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1-delta/tpcds_sf1.benchmark.in +QUERY_NUMBER=59 +QUERY_NUMBER_PADDED=59 diff --git a/benchmark/tpcds/sf1-delta/q60.benchmark b/benchmark/tpcds/sf1-delta/q60.benchmark new file mode 100644 index 0000000..71be7db --- /dev/null +++ b/benchmark/tpcds/sf1-delta/q60.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/q60.benchmark +# description: Run query 60 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1-delta/tpcds_sf1.benchmark.in +QUERY_NUMBER=60 +QUERY_NUMBER_PADDED=60 diff --git a/benchmark/tpcds/sf1-delta/q61.benchmark b/benchmark/tpcds/sf1-delta/q61.benchmark new file mode 100644 index 0000000..95506eb --- /dev/null +++ b/benchmark/tpcds/sf1-delta/q61.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/q61.benchmark +# description: Run query 61 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1-delta/tpcds_sf1.benchmark.in +QUERY_NUMBER=61 +QUERY_NUMBER_PADDED=61 diff --git a/benchmark/tpcds/sf1-delta/q62.benchmark b/benchmark/tpcds/sf1-delta/q62.benchmark new file mode 100644 index 0000000..349e4e9 --- /dev/null +++ b/benchmark/tpcds/sf1-delta/q62.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/q62.benchmark +# description: Run query 62 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1-delta/tpcds_sf1.benchmark.in +QUERY_NUMBER=62 +QUERY_NUMBER_PADDED=62 diff --git a/benchmark/tpcds/sf1-delta/q63.benchmark b/benchmark/tpcds/sf1-delta/q63.benchmark new file mode 100644 index 0000000..6a03287 --- /dev/null +++ b/benchmark/tpcds/sf1-delta/q63.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/q63.benchmark +# description: Run query 63 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1-delta/tpcds_sf1.benchmark.in +QUERY_NUMBER=63 +QUERY_NUMBER_PADDED=63 diff --git a/benchmark/tpcds/sf1-delta/q64.benchmark b/benchmark/tpcds/sf1-delta/q64.benchmark new file mode 100644 index 0000000..3c3c968 --- /dev/null +++ b/benchmark/tpcds/sf1-delta/q64.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/q64.benchmark +# description: Run query 64 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1-delta/tpcds_sf1.benchmark.in +QUERY_NUMBER=64 +QUERY_NUMBER_PADDED=64 diff --git a/benchmark/tpcds/sf1-delta/q65.benchmark b/benchmark/tpcds/sf1-delta/q65.benchmark new file mode 100644 index 0000000..8e4d181 --- /dev/null +++ b/benchmark/tpcds/sf1-delta/q65.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/q65.benchmark +# description: Run query 65 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1-delta/tpcds_sf1.benchmark.in +QUERY_NUMBER=65 +QUERY_NUMBER_PADDED=65 diff --git a/benchmark/tpcds/sf1-delta/q66.benchmark b/benchmark/tpcds/sf1-delta/q66.benchmark new file mode 100644 index 0000000..c0130f2 --- /dev/null +++ b/benchmark/tpcds/sf1-delta/q66.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/q66.benchmark +# description: Run query 66 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1-delta/tpcds_sf1.benchmark.in +QUERY_NUMBER=66 +QUERY_NUMBER_PADDED=66 diff --git a/benchmark/tpcds/sf1-delta/q67.benchmark b/benchmark/tpcds/sf1-delta/q67.benchmark new file mode 100644 index 0000000..3aa7a26 --- /dev/null +++ b/benchmark/tpcds/sf1-delta/q67.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/q67.benchmark +# description: Run query 67 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1-delta/tpcds_sf1.benchmark.in +QUERY_NUMBER=67 +QUERY_NUMBER_PADDED=67 diff --git a/benchmark/tpcds/sf1-delta/q68.benchmark b/benchmark/tpcds/sf1-delta/q68.benchmark new file mode 100644 index 0000000..faf1c29 --- /dev/null +++ b/benchmark/tpcds/sf1-delta/q68.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/q68.benchmark +# description: Run query 68 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1-delta/tpcds_sf1.benchmark.in +QUERY_NUMBER=68 +QUERY_NUMBER_PADDED=68 diff --git a/benchmark/tpcds/sf1-delta/q69.benchmark b/benchmark/tpcds/sf1-delta/q69.benchmark new file mode 100644 index 0000000..bd36138 --- /dev/null +++ b/benchmark/tpcds/sf1-delta/q69.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/q69.benchmark +# description: Run query 69 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1-delta/tpcds_sf1.benchmark.in +QUERY_NUMBER=69 +QUERY_NUMBER_PADDED=69 diff --git a/benchmark/tpcds/sf1-delta/q70.benchmark b/benchmark/tpcds/sf1-delta/q70.benchmark new file mode 100644 index 0000000..f5a4e89 --- /dev/null +++ b/benchmark/tpcds/sf1-delta/q70.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/q70.benchmark +# description: Run query 70 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1-delta/tpcds_sf1.benchmark.in +QUERY_NUMBER=70 +QUERY_NUMBER_PADDED=70 diff --git a/benchmark/tpcds/sf1-delta/q71.benchmark b/benchmark/tpcds/sf1-delta/q71.benchmark new file mode 100644 index 0000000..9eab10a --- /dev/null +++ b/benchmark/tpcds/sf1-delta/q71.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/q71.benchmark +# description: Run query 71 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1-delta/tpcds_sf1.benchmark.in +QUERY_NUMBER=71 +QUERY_NUMBER_PADDED=71 diff --git a/benchmark/tpcds/sf1-delta/q72.benchmark b/benchmark/tpcds/sf1-delta/q72.benchmark new file mode 100644 index 0000000..e07b4a0 --- /dev/null +++ b/benchmark/tpcds/sf1-delta/q72.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/q72.benchmark +# description: Run query 72 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1-delta/tpcds_sf1.benchmark.in +QUERY_NUMBER=72 +QUERY_NUMBER_PADDED=72 diff --git a/benchmark/tpcds/sf1-delta/q73.benchmark b/benchmark/tpcds/sf1-delta/q73.benchmark new file mode 100644 index 0000000..2a4205b --- /dev/null +++ b/benchmark/tpcds/sf1-delta/q73.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/q73.benchmark +# description: Run query 73 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1-delta/tpcds_sf1.benchmark.in +QUERY_NUMBER=73 +QUERY_NUMBER_PADDED=73 diff --git a/benchmark/tpcds/sf1-delta/q74.benchmark b/benchmark/tpcds/sf1-delta/q74.benchmark new file mode 100644 index 0000000..6b51bb4 --- /dev/null +++ b/benchmark/tpcds/sf1-delta/q74.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/q74.benchmark +# description: Run query 74 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1-delta/tpcds_sf1.benchmark.in +QUERY_NUMBER=74 +QUERY_NUMBER_PADDED=74 diff --git a/benchmark/tpcds/sf1-delta/q75.benchmark b/benchmark/tpcds/sf1-delta/q75.benchmark new file mode 100644 index 0000000..1ec1156 --- /dev/null +++ b/benchmark/tpcds/sf1-delta/q75.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/q75.benchmark +# description: Run query 75 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1-delta/tpcds_sf1.benchmark.in +QUERY_NUMBER=75 +QUERY_NUMBER_PADDED=75 diff --git a/benchmark/tpcds/sf1-delta/q76.benchmark b/benchmark/tpcds/sf1-delta/q76.benchmark new file mode 100644 index 0000000..1c50fa8 --- /dev/null +++ b/benchmark/tpcds/sf1-delta/q76.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/q76.benchmark +# description: Run query 76 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1-delta/tpcds_sf1.benchmark.in +QUERY_NUMBER=76 +QUERY_NUMBER_PADDED=76 diff --git a/benchmark/tpcds/sf1-delta/q77.benchmark b/benchmark/tpcds/sf1-delta/q77.benchmark new file mode 100644 index 0000000..d4751c3 --- /dev/null +++ b/benchmark/tpcds/sf1-delta/q77.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/q77.benchmark +# description: Run query 77 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1-delta/tpcds_sf1.benchmark.in +QUERY_NUMBER=77 +QUERY_NUMBER_PADDED=77 diff --git a/benchmark/tpcds/sf1-delta/q78.benchmark b/benchmark/tpcds/sf1-delta/q78.benchmark new file mode 100644 index 0000000..e820401 --- /dev/null +++ b/benchmark/tpcds/sf1-delta/q78.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/q78.benchmark +# description: Run query 78 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1-delta/tpcds_sf1.benchmark.in +QUERY_NUMBER=78 +QUERY_NUMBER_PADDED=78 diff --git a/benchmark/tpcds/sf1-delta/q79.benchmark b/benchmark/tpcds/sf1-delta/q79.benchmark new file mode 100644 index 0000000..58b820f --- /dev/null +++ b/benchmark/tpcds/sf1-delta/q79.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/q79.benchmark +# description: Run query 79 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1-delta/tpcds_sf1.benchmark.in +QUERY_NUMBER=79 +QUERY_NUMBER_PADDED=79 diff --git a/benchmark/tpcds/sf1-delta/q80.benchmark b/benchmark/tpcds/sf1-delta/q80.benchmark new file mode 100644 index 0000000..1815a97 --- /dev/null +++ b/benchmark/tpcds/sf1-delta/q80.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/q80.benchmark +# description: Run query 80 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1-delta/tpcds_sf1.benchmark.in +QUERY_NUMBER=80 +QUERY_NUMBER_PADDED=80 diff --git a/benchmark/tpcds/sf1-delta/q81.benchmark b/benchmark/tpcds/sf1-delta/q81.benchmark new file mode 100644 index 0000000..9a07b52 --- /dev/null +++ b/benchmark/tpcds/sf1-delta/q81.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/q81.benchmark +# description: Run query 81 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1-delta/tpcds_sf1.benchmark.in +QUERY_NUMBER=81 +QUERY_NUMBER_PADDED=81 diff --git a/benchmark/tpcds/sf1-delta/q82.benchmark b/benchmark/tpcds/sf1-delta/q82.benchmark new file mode 100644 index 0000000..6c27933 --- /dev/null +++ b/benchmark/tpcds/sf1-delta/q82.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/q82.benchmark +# description: Run query 82 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1-delta/tpcds_sf1.benchmark.in +QUERY_NUMBER=82 +QUERY_NUMBER_PADDED=82 diff --git a/benchmark/tpcds/sf1-delta/q83.benchmark b/benchmark/tpcds/sf1-delta/q83.benchmark new file mode 100644 index 0000000..96dd1f3 --- /dev/null +++ b/benchmark/tpcds/sf1-delta/q83.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/q83.benchmark +# description: Run query 83 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1-delta/tpcds_sf1.benchmark.in +QUERY_NUMBER=83 +QUERY_NUMBER_PADDED=83 diff --git a/benchmark/tpcds/sf1-delta/q84.benchmark b/benchmark/tpcds/sf1-delta/q84.benchmark new file mode 100644 index 0000000..1c90d12 --- /dev/null +++ b/benchmark/tpcds/sf1-delta/q84.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/q84.benchmark +# description: Run query 84 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1-delta/tpcds_sf1.benchmark.in +QUERY_NUMBER=84 +QUERY_NUMBER_PADDED=84 diff --git a/benchmark/tpcds/sf1-delta/q85.benchmark b/benchmark/tpcds/sf1-delta/q85.benchmark new file mode 100644 index 0000000..9687a23 --- /dev/null +++ b/benchmark/tpcds/sf1-delta/q85.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/q85.benchmark +# description: Run query 85 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1-delta/tpcds_sf1.benchmark.in +QUERY_NUMBER=85 +QUERY_NUMBER_PADDED=85 diff --git a/benchmark/tpcds/sf1-delta/q86.benchmark b/benchmark/tpcds/sf1-delta/q86.benchmark new file mode 100644 index 0000000..b2c74c8 --- /dev/null +++ b/benchmark/tpcds/sf1-delta/q86.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/q86.benchmark +# description: Run query 86 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1-delta/tpcds_sf1.benchmark.in +QUERY_NUMBER=86 +QUERY_NUMBER_PADDED=86 diff --git a/benchmark/tpcds/sf1-delta/q87.benchmark b/benchmark/tpcds/sf1-delta/q87.benchmark new file mode 100644 index 0000000..95a55c5 --- /dev/null +++ b/benchmark/tpcds/sf1-delta/q87.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/q87.benchmark +# description: Run query 87 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1-delta/tpcds_sf1.benchmark.in +QUERY_NUMBER=87 +QUERY_NUMBER_PADDED=87 diff --git a/benchmark/tpcds/sf1-delta/q88.benchmark b/benchmark/tpcds/sf1-delta/q88.benchmark new file mode 100644 index 0000000..c6de497 --- /dev/null +++ b/benchmark/tpcds/sf1-delta/q88.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/q88.benchmark +# description: Run query 88 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1-delta/tpcds_sf1.benchmark.in +QUERY_NUMBER=88 +QUERY_NUMBER_PADDED=88 diff --git a/benchmark/tpcds/sf1-delta/q89.benchmark b/benchmark/tpcds/sf1-delta/q89.benchmark new file mode 100644 index 0000000..f7bb181 --- /dev/null +++ b/benchmark/tpcds/sf1-delta/q89.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/q89.benchmark +# description: Run query 89 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1-delta/tpcds_sf1.benchmark.in +QUERY_NUMBER=89 +QUERY_NUMBER_PADDED=89 diff --git a/benchmark/tpcds/sf1-delta/q90.benchmark b/benchmark/tpcds/sf1-delta/q90.benchmark new file mode 100644 index 0000000..b641fd9 --- /dev/null +++ b/benchmark/tpcds/sf1-delta/q90.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/q90.benchmark +# description: Run query 90 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1-delta/tpcds_sf1.benchmark.in +QUERY_NUMBER=90 +QUERY_NUMBER_PADDED=90 diff --git a/benchmark/tpcds/sf1-delta/q91.benchmark b/benchmark/tpcds/sf1-delta/q91.benchmark new file mode 100644 index 0000000..7e960d5 --- /dev/null +++ b/benchmark/tpcds/sf1-delta/q91.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/q91.benchmark +# description: Run query 91 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1-delta/tpcds_sf1.benchmark.in +QUERY_NUMBER=91 +QUERY_NUMBER_PADDED=91 diff --git a/benchmark/tpcds/sf1-delta/q92.benchmark b/benchmark/tpcds/sf1-delta/q92.benchmark new file mode 100644 index 0000000..6486179 --- /dev/null +++ b/benchmark/tpcds/sf1-delta/q92.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/q92.benchmark +# description: Run query 92 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1-delta/tpcds_sf1.benchmark.in +QUERY_NUMBER=92 +QUERY_NUMBER_PADDED=92 diff --git a/benchmark/tpcds/sf1-delta/q93.benchmark b/benchmark/tpcds/sf1-delta/q93.benchmark new file mode 100644 index 0000000..e42fded --- /dev/null +++ b/benchmark/tpcds/sf1-delta/q93.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/q93.benchmark +# description: Run query 93 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1-delta/tpcds_sf1.benchmark.in +QUERY_NUMBER=93 +QUERY_NUMBER_PADDED=93 diff --git a/benchmark/tpcds/sf1-delta/q94.benchmark b/benchmark/tpcds/sf1-delta/q94.benchmark new file mode 100644 index 0000000..13bab12 --- /dev/null +++ b/benchmark/tpcds/sf1-delta/q94.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/q94.benchmark +# description: Run query 94 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1-delta/tpcds_sf1.benchmark.in +QUERY_NUMBER=94 +QUERY_NUMBER_PADDED=94 diff --git a/benchmark/tpcds/sf1-delta/q95.benchmark b/benchmark/tpcds/sf1-delta/q95.benchmark new file mode 100644 index 0000000..a6cffc8 --- /dev/null +++ b/benchmark/tpcds/sf1-delta/q95.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/q95.benchmark +# description: Run query 95 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1-delta/tpcds_sf1.benchmark.in +QUERY_NUMBER=95 +QUERY_NUMBER_PADDED=95 diff --git a/benchmark/tpcds/sf1-delta/q96.benchmark b/benchmark/tpcds/sf1-delta/q96.benchmark new file mode 100644 index 0000000..10cb8ad --- /dev/null +++ b/benchmark/tpcds/sf1-delta/q96.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/q96.benchmark +# description: Run query 96 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1-delta/tpcds_sf1.benchmark.in +QUERY_NUMBER=96 +QUERY_NUMBER_PADDED=96 diff --git a/benchmark/tpcds/sf1-delta/q97.benchmark b/benchmark/tpcds/sf1-delta/q97.benchmark new file mode 100644 index 0000000..465959f --- /dev/null +++ b/benchmark/tpcds/sf1-delta/q97.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/q97.benchmark +# description: Run query 97 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1-delta/tpcds_sf1.benchmark.in +QUERY_NUMBER=97 +QUERY_NUMBER_PADDED=97 diff --git a/benchmark/tpcds/sf1-delta/q98.benchmark b/benchmark/tpcds/sf1-delta/q98.benchmark new file mode 100644 index 0000000..d57b90c --- /dev/null +++ b/benchmark/tpcds/sf1-delta/q98.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/q98.benchmark +# description: Run query 98 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1-delta/tpcds_sf1.benchmark.in +QUERY_NUMBER=98 +QUERY_NUMBER_PADDED=98 diff --git a/benchmark/tpcds/sf1-delta/q99.benchmark b/benchmark/tpcds/sf1-delta/q99.benchmark new file mode 100644 index 0000000..9a3cf32 --- /dev/null +++ b/benchmark/tpcds/sf1-delta/q99.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/q99.benchmark +# description: Run query 99 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1-delta/tpcds_sf1.benchmark.in +QUERY_NUMBER=99 +QUERY_NUMBER_PADDED=99 diff --git a/benchmark/tpcds/sf1-delta/tpcds_sf1.benchmark.in b/benchmark/tpcds/sf1-delta/tpcds_sf1.benchmark.in new file mode 100644 index 0000000..57d33d1 --- /dev/null +++ b/benchmark/tpcds/sf1-delta/tpcds_sf1.benchmark.in @@ -0,0 +1,17 @@ +# name: ${FILE_PATH} +# description: ${DESCRIPTION} +# group: [tpcds-sf1] + +name DSQ${QUERY_NUMBER_PADDED} +group tpcds +subgroup sf1 + +require delta + +require parquet + +load benchmark/tpcds/sf1-delta/load.sql + +run duckdb/extension/tpcds/dsdgen/queries/${QUERY_NUMBER_PADDED}.sql + +result duckdb/extension/tpcds/dsdgen/answers/sf1/${QUERY_NUMBER_PADDED}.csv diff --git a/benchmark/tpcds/sf1-parquet/load.sql b/benchmark/tpcds/sf1-parquet/load.sql new file mode 100644 index 0000000..23e75d7 --- /dev/null +++ b/benchmark/tpcds/sf1-parquet/load.sql @@ -0,0 +1,24 @@ +create view call_center as from parquet_scan('./data/generated/tpcds_sf1/call_center/parquet/**/*.parquet'); +create view catalog_page as from parquet_scan('./data/generated/tpcds_sf1/catalog_page/parquet/**/*.parquet'); +create view catalog_returns as from parquet_scan('./data/generated/tpcds_sf1/catalog_returns/parquet/**/*.parquet'); +create view catalog_sales as from parquet_scan('./data/generated/tpcds_sf1/catalog_sales/parquet/**/*.parquet'); +create view customer as from parquet_scan('./data/generated/tpcds_sf1/customer/parquet/**/*.parquet'); +create view customer_demographics as from parquet_scan('./data/generated/tpcds_sf1/customer_demographics/parquet/**/*.parquet'); +create view customer_address as from parquet_scan('./data/generated/tpcds_sf1/customer_address/parquet/**/*.parquet'); +create view date_dim as from parquet_scan('./data/generated/tpcds_sf1/date_dim/parquet/**/*.parquet'); +create view household_demographics as from parquet_scan('./data/generated/tpcds_sf1/household_demographics/parquet/**/*.parquet'); +create view inventory as from parquet_scan('./data/generated/tpcds_sf1/inventory/parquet/**/*.parquet'); +create view income_band as from parquet_scan('./data/generated/tpcds_sf1/income_band/parquet/**/*.parquet'); +create view item as from parquet_scan('./data/generated/tpcds_sf1/item/parquet/**/*.parquet'); +create view promotion as from parquet_scan('./data/generated/tpcds_sf1/promotion/parquet/**/*.parquet'); +create view reason as from parquet_scan('./data/generated/tpcds_sf1/reason/parquet/**/*.parquet'); +create view ship_mode as from parquet_scan('./data/generated/tpcds_sf1/ship_mode/parquet/**/*.parquet'); +create view store as from parquet_scan('./data/generated/tpcds_sf1/store/parquet/**/*.parquet'); +create view store_returns as from parquet_scan('./data/generated/tpcds_sf1/store_returns/parquet/**/*.parquet'); +create view store_sales as from parquet_scan('./data/generated/tpcds_sf1/store_sales/parquet/**/*.parquet'); +create view time_dim as from parquet_scan('./data/generated/tpcds_sf1/time_dim/parquet/**/*.parquet'); +create view warehouse as from parquet_scan('./data/generated/tpcds_sf1/warehouse/parquet/**/*.parquet'); +create view web_page as from parquet_scan('./data/generated/tpcds_sf1/web_page/parquet/**/*.parquet'); +create view web_returns as from parquet_scan('./data/generated/tpcds_sf1/web_returns/parquet/**/*.parquet'); +create view web_sales as from parquet_scan('./data/generated/tpcds_sf1/web_sales/parquet/**/*.parquet'); +create view web_site as from parquet_scan('./data/generated/tpcds_sf1/web_site/parquet/**/*.parquet'); \ No newline at end of file diff --git a/benchmark/tpcds/sf1-parquet/q01.benchmark b/benchmark/tpcds/sf1-parquet/q01.benchmark new file mode 100644 index 0000000..80cfeec --- /dev/null +++ b/benchmark/tpcds/sf1-parquet/q01.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/q01.benchmark +# description: Run query 01 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1-parquet/tpcds_sf1.benchmark.in +QUERY_NUMBER=1 +QUERY_NUMBER_PADDED=01 diff --git a/benchmark/tpcds/sf1-parquet/q02.benchmark b/benchmark/tpcds/sf1-parquet/q02.benchmark new file mode 100644 index 0000000..fe7d0fd --- /dev/null +++ b/benchmark/tpcds/sf1-parquet/q02.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/q02.benchmark +# description: Run query 02 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1-parquet/tpcds_sf1.benchmark.in +QUERY_NUMBER=2 +QUERY_NUMBER_PADDED=02 diff --git a/benchmark/tpcds/sf1-parquet/q03.benchmark b/benchmark/tpcds/sf1-parquet/q03.benchmark new file mode 100644 index 0000000..214ed35 --- /dev/null +++ b/benchmark/tpcds/sf1-parquet/q03.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/q03.benchmark +# description: Run query 03 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1-parquet/tpcds_sf1.benchmark.in +QUERY_NUMBER=3 +QUERY_NUMBER_PADDED=03 diff --git a/benchmark/tpcds/sf1-parquet/q04.benchmark b/benchmark/tpcds/sf1-parquet/q04.benchmark new file mode 100644 index 0000000..246e1fb --- /dev/null +++ b/benchmark/tpcds/sf1-parquet/q04.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/q04.benchmark +# description: Run query 04 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1-parquet/tpcds_sf1.benchmark.in +QUERY_NUMBER=4 +QUERY_NUMBER_PADDED=04 diff --git a/benchmark/tpcds/sf1-parquet/q05.benchmark b/benchmark/tpcds/sf1-parquet/q05.benchmark new file mode 100644 index 0000000..1ea004b --- /dev/null +++ b/benchmark/tpcds/sf1-parquet/q05.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/q05.benchmark +# description: Run query 05 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1-parquet/tpcds_sf1.benchmark.in +QUERY_NUMBER=5 +QUERY_NUMBER_PADDED=05 diff --git a/benchmark/tpcds/sf1-parquet/q06.benchmark b/benchmark/tpcds/sf1-parquet/q06.benchmark new file mode 100644 index 0000000..4f887b2 --- /dev/null +++ b/benchmark/tpcds/sf1-parquet/q06.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/q06.benchmark +# description: Run query 06 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1-parquet/tpcds_sf1.benchmark.in +QUERY_NUMBER=6 +QUERY_NUMBER_PADDED=06 diff --git a/benchmark/tpcds/sf1-parquet/q07.benchmark b/benchmark/tpcds/sf1-parquet/q07.benchmark new file mode 100644 index 0000000..1d116c9 --- /dev/null +++ b/benchmark/tpcds/sf1-parquet/q07.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/q07.benchmark +# description: Run query 07 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1-parquet/tpcds_sf1.benchmark.in +QUERY_NUMBER=7 +QUERY_NUMBER_PADDED=07 diff --git a/benchmark/tpcds/sf1-parquet/q08.benchmark b/benchmark/tpcds/sf1-parquet/q08.benchmark new file mode 100644 index 0000000..2d0f9cb --- /dev/null +++ b/benchmark/tpcds/sf1-parquet/q08.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/q08.benchmark +# description: Run query 08 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1-parquet/tpcds_sf1.benchmark.in +QUERY_NUMBER=8 +QUERY_NUMBER_PADDED=08 diff --git a/benchmark/tpcds/sf1-parquet/q09.benchmark b/benchmark/tpcds/sf1-parquet/q09.benchmark new file mode 100644 index 0000000..d115826 --- /dev/null +++ b/benchmark/tpcds/sf1-parquet/q09.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/q09.benchmark +# description: Run query 09 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1-parquet/tpcds_sf1.benchmark.in +QUERY_NUMBER=9 +QUERY_NUMBER_PADDED=09 diff --git a/benchmark/tpcds/sf1-parquet/q10.benchmark b/benchmark/tpcds/sf1-parquet/q10.benchmark new file mode 100644 index 0000000..f4a048d --- /dev/null +++ b/benchmark/tpcds/sf1-parquet/q10.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/q10.benchmark +# description: Run query 10 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1-parquet/tpcds_sf1.benchmark.in +QUERY_NUMBER=10 +QUERY_NUMBER_PADDED=10 diff --git a/benchmark/tpcds/sf1-parquet/q11.benchmark b/benchmark/tpcds/sf1-parquet/q11.benchmark new file mode 100644 index 0000000..3c1f80a --- /dev/null +++ b/benchmark/tpcds/sf1-parquet/q11.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/q11.benchmark +# description: Run query 11 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1-parquet/tpcds_sf1.benchmark.in +QUERY_NUMBER=11 +QUERY_NUMBER_PADDED=11 diff --git a/benchmark/tpcds/sf1-parquet/q12.benchmark b/benchmark/tpcds/sf1-parquet/q12.benchmark new file mode 100644 index 0000000..4af68b7 --- /dev/null +++ b/benchmark/tpcds/sf1-parquet/q12.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/q12.benchmark +# description: Run query 12 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1-parquet/tpcds_sf1.benchmark.in +QUERY_NUMBER=12 +QUERY_NUMBER_PADDED=12 diff --git a/benchmark/tpcds/sf1-parquet/q13.benchmark b/benchmark/tpcds/sf1-parquet/q13.benchmark new file mode 100644 index 0000000..d4066d6 --- /dev/null +++ b/benchmark/tpcds/sf1-parquet/q13.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/q13.benchmark +# description: Run query 13 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1-parquet/tpcds_sf1.benchmark.in +QUERY_NUMBER=13 +QUERY_NUMBER_PADDED=13 diff --git a/benchmark/tpcds/sf1-parquet/q14.benchmark b/benchmark/tpcds/sf1-parquet/q14.benchmark new file mode 100644 index 0000000..af8071e --- /dev/null +++ b/benchmark/tpcds/sf1-parquet/q14.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/q14.benchmark +# description: Run query 14 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1-parquet/tpcds_sf1.benchmark.in +QUERY_NUMBER=14 +QUERY_NUMBER_PADDED=14 diff --git a/benchmark/tpcds/sf1-parquet/q15.benchmark b/benchmark/tpcds/sf1-parquet/q15.benchmark new file mode 100644 index 0000000..7ac477c --- /dev/null +++ b/benchmark/tpcds/sf1-parquet/q15.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/q15.benchmark +# description: Run query 15 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1-parquet/tpcds_sf1.benchmark.in +QUERY_NUMBER=15 +QUERY_NUMBER_PADDED=15 diff --git a/benchmark/tpcds/sf1-parquet/q16.benchmark b/benchmark/tpcds/sf1-parquet/q16.benchmark new file mode 100644 index 0000000..e89fc35 --- /dev/null +++ b/benchmark/tpcds/sf1-parquet/q16.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/q16.benchmark +# description: Run query 16 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1-parquet/tpcds_sf1.benchmark.in +QUERY_NUMBER=16 +QUERY_NUMBER_PADDED=16 diff --git a/benchmark/tpcds/sf1-parquet/q17.benchmark b/benchmark/tpcds/sf1-parquet/q17.benchmark new file mode 100644 index 0000000..78af07a --- /dev/null +++ b/benchmark/tpcds/sf1-parquet/q17.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/q17.benchmark +# description: Run query 17 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1-parquet/tpcds_sf1.benchmark.in +QUERY_NUMBER=17 +QUERY_NUMBER_PADDED=17 diff --git a/benchmark/tpcds/sf1-parquet/q18.benchmark b/benchmark/tpcds/sf1-parquet/q18.benchmark new file mode 100644 index 0000000..c993e81 --- /dev/null +++ b/benchmark/tpcds/sf1-parquet/q18.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/q18.benchmark +# description: Run query 18 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1-parquet/tpcds_sf1.benchmark.in +QUERY_NUMBER=18 +QUERY_NUMBER_PADDED=18 diff --git a/benchmark/tpcds/sf1-parquet/q19.benchmark b/benchmark/tpcds/sf1-parquet/q19.benchmark new file mode 100644 index 0000000..8781304 --- /dev/null +++ b/benchmark/tpcds/sf1-parquet/q19.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/q19.benchmark +# description: Run query 19 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1-parquet/tpcds_sf1.benchmark.in +QUERY_NUMBER=19 +QUERY_NUMBER_PADDED=19 diff --git a/benchmark/tpcds/sf1-parquet/q20.benchmark b/benchmark/tpcds/sf1-parquet/q20.benchmark new file mode 100644 index 0000000..eabd4f6 --- /dev/null +++ b/benchmark/tpcds/sf1-parquet/q20.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/q20.benchmark +# description: Run query 20 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1-parquet/tpcds_sf1.benchmark.in +QUERY_NUMBER=20 +QUERY_NUMBER_PADDED=20 diff --git a/benchmark/tpcds/sf1-parquet/q21.benchmark b/benchmark/tpcds/sf1-parquet/q21.benchmark new file mode 100644 index 0000000..acf7b1c --- /dev/null +++ b/benchmark/tpcds/sf1-parquet/q21.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/q21.benchmark +# description: Run query 21 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1-parquet/tpcds_sf1.benchmark.in +QUERY_NUMBER=21 +QUERY_NUMBER_PADDED=21 diff --git a/benchmark/tpcds/sf1-parquet/q22.benchmark b/benchmark/tpcds/sf1-parquet/q22.benchmark new file mode 100644 index 0000000..1b1b008 --- /dev/null +++ b/benchmark/tpcds/sf1-parquet/q22.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/q22.benchmark +# description: Run query 22 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1-parquet/tpcds_sf1.benchmark.in +QUERY_NUMBER=22 +QUERY_NUMBER_PADDED=22 diff --git a/benchmark/tpcds/sf1-parquet/q23.benchmark b/benchmark/tpcds/sf1-parquet/q23.benchmark new file mode 100644 index 0000000..52f434f --- /dev/null +++ b/benchmark/tpcds/sf1-parquet/q23.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/q23.benchmark +# description: Run query 23 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1-parquet/tpcds_sf1.benchmark.in +QUERY_NUMBER=23 +QUERY_NUMBER_PADDED=23 diff --git a/benchmark/tpcds/sf1-parquet/q24.benchmark b/benchmark/tpcds/sf1-parquet/q24.benchmark new file mode 100644 index 0000000..17b6627 --- /dev/null +++ b/benchmark/tpcds/sf1-parquet/q24.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/q24.benchmark +# description: Run query 24 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1-parquet/tpcds_sf1.benchmark.in +QUERY_NUMBER=24 +QUERY_NUMBER_PADDED=24 diff --git a/benchmark/tpcds/sf1-parquet/q25.benchmark b/benchmark/tpcds/sf1-parquet/q25.benchmark new file mode 100644 index 0000000..b349885 --- /dev/null +++ b/benchmark/tpcds/sf1-parquet/q25.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/q25.benchmark +# description: Run query 25 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1-parquet/tpcds_sf1.benchmark.in +QUERY_NUMBER=25 +QUERY_NUMBER_PADDED=25 diff --git a/benchmark/tpcds/sf1-parquet/q26.benchmark b/benchmark/tpcds/sf1-parquet/q26.benchmark new file mode 100644 index 0000000..4ff955e --- /dev/null +++ b/benchmark/tpcds/sf1-parquet/q26.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/q26.benchmark +# description: Run query 26 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1-parquet/tpcds_sf1.benchmark.in +QUERY_NUMBER=26 +QUERY_NUMBER_PADDED=26 diff --git a/benchmark/tpcds/sf1-parquet/q27.benchmark b/benchmark/tpcds/sf1-parquet/q27.benchmark new file mode 100644 index 0000000..05a2dae --- /dev/null +++ b/benchmark/tpcds/sf1-parquet/q27.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/q27.benchmark +# description: Run query 27 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1-parquet/tpcds_sf1.benchmark.in +QUERY_NUMBER=27 +QUERY_NUMBER_PADDED=27 diff --git a/benchmark/tpcds/sf1-parquet/q28.benchmark b/benchmark/tpcds/sf1-parquet/q28.benchmark new file mode 100644 index 0000000..cbc909e --- /dev/null +++ b/benchmark/tpcds/sf1-parquet/q28.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/q28.benchmark +# description: Run query 28 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1-parquet/tpcds_sf1.benchmark.in +QUERY_NUMBER=28 +QUERY_NUMBER_PADDED=28 diff --git a/benchmark/tpcds/sf1-parquet/q29.benchmark b/benchmark/tpcds/sf1-parquet/q29.benchmark new file mode 100644 index 0000000..922a7cc --- /dev/null +++ b/benchmark/tpcds/sf1-parquet/q29.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/q29.benchmark +# description: Run query 29 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1-parquet/tpcds_sf1.benchmark.in +QUERY_NUMBER=29 +QUERY_NUMBER_PADDED=29 diff --git a/benchmark/tpcds/sf1-parquet/q30.benchmark b/benchmark/tpcds/sf1-parquet/q30.benchmark new file mode 100644 index 0000000..e610adc --- /dev/null +++ b/benchmark/tpcds/sf1-parquet/q30.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/q30.benchmark +# description: Run query 30 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1-parquet/tpcds_sf1.benchmark.in +QUERY_NUMBER=30 +QUERY_NUMBER_PADDED=30 diff --git a/benchmark/tpcds/sf1-parquet/q31.benchmark b/benchmark/tpcds/sf1-parquet/q31.benchmark new file mode 100644 index 0000000..d08908e --- /dev/null +++ b/benchmark/tpcds/sf1-parquet/q31.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/q31.benchmark +# description: Run query 31 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1-parquet/tpcds_sf1.benchmark.in +QUERY_NUMBER=31 +QUERY_NUMBER_PADDED=31 diff --git a/benchmark/tpcds/sf1-parquet/q32.benchmark b/benchmark/tpcds/sf1-parquet/q32.benchmark new file mode 100644 index 0000000..50d6382 --- /dev/null +++ b/benchmark/tpcds/sf1-parquet/q32.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/q32.benchmark +# description: Run query 32 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1-parquet/tpcds_sf1.benchmark.in +QUERY_NUMBER=32 +QUERY_NUMBER_PADDED=32 diff --git a/benchmark/tpcds/sf1-parquet/q33.benchmark b/benchmark/tpcds/sf1-parquet/q33.benchmark new file mode 100644 index 0000000..b74cc23 --- /dev/null +++ b/benchmark/tpcds/sf1-parquet/q33.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/q33.benchmark +# description: Run query 33 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1-parquet/tpcds_sf1.benchmark.in +QUERY_NUMBER=33 +QUERY_NUMBER_PADDED=33 diff --git a/benchmark/tpcds/sf1-parquet/q34.benchmark b/benchmark/tpcds/sf1-parquet/q34.benchmark new file mode 100644 index 0000000..10887ab --- /dev/null +++ b/benchmark/tpcds/sf1-parquet/q34.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/q34.benchmark +# description: Run query 34 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1-parquet/tpcds_sf1.benchmark.in +QUERY_NUMBER=34 +QUERY_NUMBER_PADDED=34 diff --git a/benchmark/tpcds/sf1-parquet/q35.benchmark b/benchmark/tpcds/sf1-parquet/q35.benchmark new file mode 100644 index 0000000..871d4a5 --- /dev/null +++ b/benchmark/tpcds/sf1-parquet/q35.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/q35.benchmark +# description: Run query 35 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1-parquet/tpcds_sf1.benchmark.in +QUERY_NUMBER=35 +QUERY_NUMBER_PADDED=35 diff --git a/benchmark/tpcds/sf1-parquet/q36.benchmark b/benchmark/tpcds/sf1-parquet/q36.benchmark new file mode 100644 index 0000000..134c567 --- /dev/null +++ b/benchmark/tpcds/sf1-parquet/q36.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/q36.benchmark +# description: Run query 36 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1-parquet/tpcds_sf1.benchmark.in +QUERY_NUMBER=36 +QUERY_NUMBER_PADDED=36 diff --git a/benchmark/tpcds/sf1-parquet/q37.benchmark b/benchmark/tpcds/sf1-parquet/q37.benchmark new file mode 100644 index 0000000..5851965 --- /dev/null +++ b/benchmark/tpcds/sf1-parquet/q37.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/q37.benchmark +# description: Run query 37 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1-parquet/tpcds_sf1.benchmark.in +QUERY_NUMBER=37 +QUERY_NUMBER_PADDED=37 diff --git a/benchmark/tpcds/sf1-parquet/q38.benchmark b/benchmark/tpcds/sf1-parquet/q38.benchmark new file mode 100644 index 0000000..498f842 --- /dev/null +++ b/benchmark/tpcds/sf1-parquet/q38.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/q38.benchmark +# description: Run query 38 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1-parquet/tpcds_sf1.benchmark.in +QUERY_NUMBER=38 +QUERY_NUMBER_PADDED=38 diff --git a/benchmark/tpcds/sf1-parquet/q39.benchmark b/benchmark/tpcds/sf1-parquet/q39.benchmark new file mode 100644 index 0000000..e7b00bd --- /dev/null +++ b/benchmark/tpcds/sf1-parquet/q39.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/q39.benchmark +# description: Run query 39 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1-parquet/tpcds_sf1.benchmark.in +QUERY_NUMBER=39 +QUERY_NUMBER_PADDED=39 diff --git a/benchmark/tpcds/sf1-parquet/q40.benchmark b/benchmark/tpcds/sf1-parquet/q40.benchmark new file mode 100644 index 0000000..e2463d6 --- /dev/null +++ b/benchmark/tpcds/sf1-parquet/q40.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/q40.benchmark +# description: Run query 40 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1-parquet/tpcds_sf1.benchmark.in +QUERY_NUMBER=40 +QUERY_NUMBER_PADDED=40 diff --git a/benchmark/tpcds/sf1-parquet/q41.benchmark b/benchmark/tpcds/sf1-parquet/q41.benchmark new file mode 100644 index 0000000..6577235 --- /dev/null +++ b/benchmark/tpcds/sf1-parquet/q41.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/q41.benchmark +# description: Run query 41 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1-parquet/tpcds_sf1.benchmark.in +QUERY_NUMBER=41 +QUERY_NUMBER_PADDED=41 diff --git a/benchmark/tpcds/sf1-parquet/q42.benchmark b/benchmark/tpcds/sf1-parquet/q42.benchmark new file mode 100644 index 0000000..933d648 --- /dev/null +++ b/benchmark/tpcds/sf1-parquet/q42.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/q42.benchmark +# description: Run query 42 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1-parquet/tpcds_sf1.benchmark.in +QUERY_NUMBER=42 +QUERY_NUMBER_PADDED=42 diff --git a/benchmark/tpcds/sf1-parquet/q43.benchmark b/benchmark/tpcds/sf1-parquet/q43.benchmark new file mode 100644 index 0000000..b60ff92 --- /dev/null +++ b/benchmark/tpcds/sf1-parquet/q43.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/q43.benchmark +# description: Run query 43 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1-parquet/tpcds_sf1.benchmark.in +QUERY_NUMBER=43 +QUERY_NUMBER_PADDED=43 diff --git a/benchmark/tpcds/sf1-parquet/q44.benchmark b/benchmark/tpcds/sf1-parquet/q44.benchmark new file mode 100644 index 0000000..8baefdb --- /dev/null +++ b/benchmark/tpcds/sf1-parquet/q44.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/q44.benchmark +# description: Run query 44 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1-parquet/tpcds_sf1.benchmark.in +QUERY_NUMBER=44 +QUERY_NUMBER_PADDED=44 diff --git a/benchmark/tpcds/sf1-parquet/q45.benchmark b/benchmark/tpcds/sf1-parquet/q45.benchmark new file mode 100644 index 0000000..8e6ecd5 --- /dev/null +++ b/benchmark/tpcds/sf1-parquet/q45.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/q45.benchmark +# description: Run query 45 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1-parquet/tpcds_sf1.benchmark.in +QUERY_NUMBER=45 +QUERY_NUMBER_PADDED=45 diff --git a/benchmark/tpcds/sf1-parquet/q46.benchmark b/benchmark/tpcds/sf1-parquet/q46.benchmark new file mode 100644 index 0000000..2b266c8 --- /dev/null +++ b/benchmark/tpcds/sf1-parquet/q46.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/q46.benchmark +# description: Run query 46 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1-parquet/tpcds_sf1.benchmark.in +QUERY_NUMBER=46 +QUERY_NUMBER_PADDED=46 diff --git a/benchmark/tpcds/sf1-parquet/q47.benchmark b/benchmark/tpcds/sf1-parquet/q47.benchmark new file mode 100644 index 0000000..684daef --- /dev/null +++ b/benchmark/tpcds/sf1-parquet/q47.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/q47.benchmark +# description: Run query 47 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1-parquet/tpcds_sf1.benchmark.in +QUERY_NUMBER=47 +QUERY_NUMBER_PADDED=47 diff --git a/benchmark/tpcds/sf1-parquet/q48.benchmark b/benchmark/tpcds/sf1-parquet/q48.benchmark new file mode 100644 index 0000000..5452cff --- /dev/null +++ b/benchmark/tpcds/sf1-parquet/q48.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/q48.benchmark +# description: Run query 48 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1-parquet/tpcds_sf1.benchmark.in +QUERY_NUMBER=48 +QUERY_NUMBER_PADDED=48 diff --git a/benchmark/tpcds/sf1-parquet/q49.benchmark b/benchmark/tpcds/sf1-parquet/q49.benchmark new file mode 100644 index 0000000..dbbc8bf --- /dev/null +++ b/benchmark/tpcds/sf1-parquet/q49.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/q49.benchmark +# description: Run query 49 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1-parquet/tpcds_sf1.benchmark.in +QUERY_NUMBER=49 +QUERY_NUMBER_PADDED=49 diff --git a/benchmark/tpcds/sf1-parquet/q50.benchmark b/benchmark/tpcds/sf1-parquet/q50.benchmark new file mode 100644 index 0000000..37e0ba3 --- /dev/null +++ b/benchmark/tpcds/sf1-parquet/q50.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/q50.benchmark +# description: Run query 50 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1-parquet/tpcds_sf1.benchmark.in +QUERY_NUMBER=50 +QUERY_NUMBER_PADDED=50 diff --git a/benchmark/tpcds/sf1-parquet/q51.benchmark b/benchmark/tpcds/sf1-parquet/q51.benchmark new file mode 100644 index 0000000..7b8e96d --- /dev/null +++ b/benchmark/tpcds/sf1-parquet/q51.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/q51.benchmark +# description: Run query 51 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1-parquet/tpcds_sf1.benchmark.in +QUERY_NUMBER=51 +QUERY_NUMBER_PADDED=51 diff --git a/benchmark/tpcds/sf1-parquet/q52.benchmark b/benchmark/tpcds/sf1-parquet/q52.benchmark new file mode 100644 index 0000000..a4aca4c --- /dev/null +++ b/benchmark/tpcds/sf1-parquet/q52.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/q52.benchmark +# description: Run query 52 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1-parquet/tpcds_sf1.benchmark.in +QUERY_NUMBER=52 +QUERY_NUMBER_PADDED=52 diff --git a/benchmark/tpcds/sf1-parquet/q53.benchmark b/benchmark/tpcds/sf1-parquet/q53.benchmark new file mode 100644 index 0000000..80c1f6f --- /dev/null +++ b/benchmark/tpcds/sf1-parquet/q53.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/q53.benchmark +# description: Run query 53 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1-parquet/tpcds_sf1.benchmark.in +QUERY_NUMBER=53 +QUERY_NUMBER_PADDED=53 diff --git a/benchmark/tpcds/sf1-parquet/q54.benchmark b/benchmark/tpcds/sf1-parquet/q54.benchmark new file mode 100644 index 0000000..496eb9e --- /dev/null +++ b/benchmark/tpcds/sf1-parquet/q54.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/q54.benchmark +# description: Run query 54 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1-parquet/tpcds_sf1.benchmark.in +QUERY_NUMBER=54 +QUERY_NUMBER_PADDED=54 diff --git a/benchmark/tpcds/sf1-parquet/q55.benchmark b/benchmark/tpcds/sf1-parquet/q55.benchmark new file mode 100644 index 0000000..1117fac --- /dev/null +++ b/benchmark/tpcds/sf1-parquet/q55.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/q55.benchmark +# description: Run query 55 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1-parquet/tpcds_sf1.benchmark.in +QUERY_NUMBER=55 +QUERY_NUMBER_PADDED=55 diff --git a/benchmark/tpcds/sf1-parquet/q56.benchmark b/benchmark/tpcds/sf1-parquet/q56.benchmark new file mode 100644 index 0000000..43c423f --- /dev/null +++ b/benchmark/tpcds/sf1-parquet/q56.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/q56.benchmark +# description: Run query 56 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1-parquet/tpcds_sf1.benchmark.in +QUERY_NUMBER=56 +QUERY_NUMBER_PADDED=56 diff --git a/benchmark/tpcds/sf1-parquet/q57.benchmark b/benchmark/tpcds/sf1-parquet/q57.benchmark new file mode 100644 index 0000000..25b36ed --- /dev/null +++ b/benchmark/tpcds/sf1-parquet/q57.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/q57.benchmark +# description: Run query 57 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1-parquet/tpcds_sf1.benchmark.in +QUERY_NUMBER=57 +QUERY_NUMBER_PADDED=57 diff --git a/benchmark/tpcds/sf1-parquet/q58.benchmark b/benchmark/tpcds/sf1-parquet/q58.benchmark new file mode 100644 index 0000000..b60ebfc --- /dev/null +++ b/benchmark/tpcds/sf1-parquet/q58.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/q58.benchmark +# description: Run query 58 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1-parquet/tpcds_sf1.benchmark.in +QUERY_NUMBER=58 +QUERY_NUMBER_PADDED=58 diff --git a/benchmark/tpcds/sf1-parquet/q59.benchmark b/benchmark/tpcds/sf1-parquet/q59.benchmark new file mode 100644 index 0000000..296ac30 --- /dev/null +++ b/benchmark/tpcds/sf1-parquet/q59.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/q59.benchmark +# description: Run query 59 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1-parquet/tpcds_sf1.benchmark.in +QUERY_NUMBER=59 +QUERY_NUMBER_PADDED=59 diff --git a/benchmark/tpcds/sf1-parquet/q60.benchmark b/benchmark/tpcds/sf1-parquet/q60.benchmark new file mode 100644 index 0000000..a383742 --- /dev/null +++ b/benchmark/tpcds/sf1-parquet/q60.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/q60.benchmark +# description: Run query 60 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1-parquet/tpcds_sf1.benchmark.in +QUERY_NUMBER=60 +QUERY_NUMBER_PADDED=60 diff --git a/benchmark/tpcds/sf1-parquet/q61.benchmark b/benchmark/tpcds/sf1-parquet/q61.benchmark new file mode 100644 index 0000000..794f3de --- /dev/null +++ b/benchmark/tpcds/sf1-parquet/q61.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/q61.benchmark +# description: Run query 61 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1-parquet/tpcds_sf1.benchmark.in +QUERY_NUMBER=61 +QUERY_NUMBER_PADDED=61 diff --git a/benchmark/tpcds/sf1-parquet/q62.benchmark b/benchmark/tpcds/sf1-parquet/q62.benchmark new file mode 100644 index 0000000..01e52ac --- /dev/null +++ b/benchmark/tpcds/sf1-parquet/q62.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/q62.benchmark +# description: Run query 62 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1-parquet/tpcds_sf1.benchmark.in +QUERY_NUMBER=62 +QUERY_NUMBER_PADDED=62 diff --git a/benchmark/tpcds/sf1-parquet/q63.benchmark b/benchmark/tpcds/sf1-parquet/q63.benchmark new file mode 100644 index 0000000..2027bf4 --- /dev/null +++ b/benchmark/tpcds/sf1-parquet/q63.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/q63.benchmark +# description: Run query 63 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1-parquet/tpcds_sf1.benchmark.in +QUERY_NUMBER=63 +QUERY_NUMBER_PADDED=63 diff --git a/benchmark/tpcds/sf1-parquet/q64.benchmark b/benchmark/tpcds/sf1-parquet/q64.benchmark new file mode 100644 index 0000000..0e7178e --- /dev/null +++ b/benchmark/tpcds/sf1-parquet/q64.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/q64.benchmark +# description: Run query 64 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1-parquet/tpcds_sf1.benchmark.in +QUERY_NUMBER=64 +QUERY_NUMBER_PADDED=64 diff --git a/benchmark/tpcds/sf1-parquet/q65.benchmark b/benchmark/tpcds/sf1-parquet/q65.benchmark new file mode 100644 index 0000000..b3f6ea2 --- /dev/null +++ b/benchmark/tpcds/sf1-parquet/q65.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/q65.benchmark +# description: Run query 65 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1-parquet/tpcds_sf1.benchmark.in +QUERY_NUMBER=65 +QUERY_NUMBER_PADDED=65 diff --git a/benchmark/tpcds/sf1-parquet/q66.benchmark b/benchmark/tpcds/sf1-parquet/q66.benchmark new file mode 100644 index 0000000..479dfb5 --- /dev/null +++ b/benchmark/tpcds/sf1-parquet/q66.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/q66.benchmark +# description: Run query 66 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1-parquet/tpcds_sf1.benchmark.in +QUERY_NUMBER=66 +QUERY_NUMBER_PADDED=66 diff --git a/benchmark/tpcds/sf1-parquet/q67.benchmark b/benchmark/tpcds/sf1-parquet/q67.benchmark new file mode 100644 index 0000000..3971bc0 --- /dev/null +++ b/benchmark/tpcds/sf1-parquet/q67.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/q67.benchmark +# description: Run query 67 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1-parquet/tpcds_sf1.benchmark.in +QUERY_NUMBER=67 +QUERY_NUMBER_PADDED=67 diff --git a/benchmark/tpcds/sf1-parquet/q68.benchmark b/benchmark/tpcds/sf1-parquet/q68.benchmark new file mode 100644 index 0000000..80b5bd3 --- /dev/null +++ b/benchmark/tpcds/sf1-parquet/q68.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/q68.benchmark +# description: Run query 68 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1-parquet/tpcds_sf1.benchmark.in +QUERY_NUMBER=68 +QUERY_NUMBER_PADDED=68 diff --git a/benchmark/tpcds/sf1-parquet/q69.benchmark b/benchmark/tpcds/sf1-parquet/q69.benchmark new file mode 100644 index 0000000..20cbc5b --- /dev/null +++ b/benchmark/tpcds/sf1-parquet/q69.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/q69.benchmark +# description: Run query 69 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1-parquet/tpcds_sf1.benchmark.in +QUERY_NUMBER=69 +QUERY_NUMBER_PADDED=69 diff --git a/benchmark/tpcds/sf1-parquet/q70.benchmark b/benchmark/tpcds/sf1-parquet/q70.benchmark new file mode 100644 index 0000000..64590ae --- /dev/null +++ b/benchmark/tpcds/sf1-parquet/q70.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/q70.benchmark +# description: Run query 70 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1-parquet/tpcds_sf1.benchmark.in +QUERY_NUMBER=70 +QUERY_NUMBER_PADDED=70 diff --git a/benchmark/tpcds/sf1-parquet/q71.benchmark b/benchmark/tpcds/sf1-parquet/q71.benchmark new file mode 100644 index 0000000..bfb61bf --- /dev/null +++ b/benchmark/tpcds/sf1-parquet/q71.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/q71.benchmark +# description: Run query 71 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1-parquet/tpcds_sf1.benchmark.in +QUERY_NUMBER=71 +QUERY_NUMBER_PADDED=71 diff --git a/benchmark/tpcds/sf1-parquet/q72.benchmark b/benchmark/tpcds/sf1-parquet/q72.benchmark new file mode 100644 index 0000000..8cab000 --- /dev/null +++ b/benchmark/tpcds/sf1-parquet/q72.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/q72.benchmark +# description: Run query 72 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1-parquet/tpcds_sf1.benchmark.in +QUERY_NUMBER=72 +QUERY_NUMBER_PADDED=72 diff --git a/benchmark/tpcds/sf1-parquet/q73.benchmark b/benchmark/tpcds/sf1-parquet/q73.benchmark new file mode 100644 index 0000000..23a8ea4 --- /dev/null +++ b/benchmark/tpcds/sf1-parquet/q73.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/q73.benchmark +# description: Run query 73 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1-parquet/tpcds_sf1.benchmark.in +QUERY_NUMBER=73 +QUERY_NUMBER_PADDED=73 diff --git a/benchmark/tpcds/sf1-parquet/q74.benchmark b/benchmark/tpcds/sf1-parquet/q74.benchmark new file mode 100644 index 0000000..883c44d --- /dev/null +++ b/benchmark/tpcds/sf1-parquet/q74.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/q74.benchmark +# description: Run query 74 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1-parquet/tpcds_sf1.benchmark.in +QUERY_NUMBER=74 +QUERY_NUMBER_PADDED=74 diff --git a/benchmark/tpcds/sf1-parquet/q75.benchmark b/benchmark/tpcds/sf1-parquet/q75.benchmark new file mode 100644 index 0000000..6ab4a04 --- /dev/null +++ b/benchmark/tpcds/sf1-parquet/q75.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/q75.benchmark +# description: Run query 75 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1-parquet/tpcds_sf1.benchmark.in +QUERY_NUMBER=75 +QUERY_NUMBER_PADDED=75 diff --git a/benchmark/tpcds/sf1-parquet/q76.benchmark b/benchmark/tpcds/sf1-parquet/q76.benchmark new file mode 100644 index 0000000..ddb1f4e --- /dev/null +++ b/benchmark/tpcds/sf1-parquet/q76.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/q76.benchmark +# description: Run query 76 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1-parquet/tpcds_sf1.benchmark.in +QUERY_NUMBER=76 +QUERY_NUMBER_PADDED=76 diff --git a/benchmark/tpcds/sf1-parquet/q77.benchmark b/benchmark/tpcds/sf1-parquet/q77.benchmark new file mode 100644 index 0000000..f69818e --- /dev/null +++ b/benchmark/tpcds/sf1-parquet/q77.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/q77.benchmark +# description: Run query 77 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1-parquet/tpcds_sf1.benchmark.in +QUERY_NUMBER=77 +QUERY_NUMBER_PADDED=77 diff --git a/benchmark/tpcds/sf1-parquet/q78.benchmark b/benchmark/tpcds/sf1-parquet/q78.benchmark new file mode 100644 index 0000000..9156c52 --- /dev/null +++ b/benchmark/tpcds/sf1-parquet/q78.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/q78.benchmark +# description: Run query 78 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1-parquet/tpcds_sf1.benchmark.in +QUERY_NUMBER=78 +QUERY_NUMBER_PADDED=78 diff --git a/benchmark/tpcds/sf1-parquet/q79.benchmark b/benchmark/tpcds/sf1-parquet/q79.benchmark new file mode 100644 index 0000000..ae54bba --- /dev/null +++ b/benchmark/tpcds/sf1-parquet/q79.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/q79.benchmark +# description: Run query 79 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1-parquet/tpcds_sf1.benchmark.in +QUERY_NUMBER=79 +QUERY_NUMBER_PADDED=79 diff --git a/benchmark/tpcds/sf1-parquet/q80.benchmark b/benchmark/tpcds/sf1-parquet/q80.benchmark new file mode 100644 index 0000000..282c8d9 --- /dev/null +++ b/benchmark/tpcds/sf1-parquet/q80.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/q80.benchmark +# description: Run query 80 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1-parquet/tpcds_sf1.benchmark.in +QUERY_NUMBER=80 +QUERY_NUMBER_PADDED=80 diff --git a/benchmark/tpcds/sf1-parquet/q81.benchmark b/benchmark/tpcds/sf1-parquet/q81.benchmark new file mode 100644 index 0000000..b33e8e7 --- /dev/null +++ b/benchmark/tpcds/sf1-parquet/q81.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/q81.benchmark +# description: Run query 81 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1-parquet/tpcds_sf1.benchmark.in +QUERY_NUMBER=81 +QUERY_NUMBER_PADDED=81 diff --git a/benchmark/tpcds/sf1-parquet/q82.benchmark b/benchmark/tpcds/sf1-parquet/q82.benchmark new file mode 100644 index 0000000..b74d67d --- /dev/null +++ b/benchmark/tpcds/sf1-parquet/q82.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/q82.benchmark +# description: Run query 82 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1-parquet/tpcds_sf1.benchmark.in +QUERY_NUMBER=82 +QUERY_NUMBER_PADDED=82 diff --git a/benchmark/tpcds/sf1-parquet/q83.benchmark b/benchmark/tpcds/sf1-parquet/q83.benchmark new file mode 100644 index 0000000..79a4221 --- /dev/null +++ b/benchmark/tpcds/sf1-parquet/q83.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/q83.benchmark +# description: Run query 83 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1-parquet/tpcds_sf1.benchmark.in +QUERY_NUMBER=83 +QUERY_NUMBER_PADDED=83 diff --git a/benchmark/tpcds/sf1-parquet/q84.benchmark b/benchmark/tpcds/sf1-parquet/q84.benchmark new file mode 100644 index 0000000..61b4ba0 --- /dev/null +++ b/benchmark/tpcds/sf1-parquet/q84.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/q84.benchmark +# description: Run query 84 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1-parquet/tpcds_sf1.benchmark.in +QUERY_NUMBER=84 +QUERY_NUMBER_PADDED=84 diff --git a/benchmark/tpcds/sf1-parquet/q85.benchmark b/benchmark/tpcds/sf1-parquet/q85.benchmark new file mode 100644 index 0000000..19458e3 --- /dev/null +++ b/benchmark/tpcds/sf1-parquet/q85.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/q85.benchmark +# description: Run query 85 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1-parquet/tpcds_sf1.benchmark.in +QUERY_NUMBER=85 +QUERY_NUMBER_PADDED=85 diff --git a/benchmark/tpcds/sf1-parquet/q86.benchmark b/benchmark/tpcds/sf1-parquet/q86.benchmark new file mode 100644 index 0000000..6b687fc --- /dev/null +++ b/benchmark/tpcds/sf1-parquet/q86.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/q86.benchmark +# description: Run query 86 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1-parquet/tpcds_sf1.benchmark.in +QUERY_NUMBER=86 +QUERY_NUMBER_PADDED=86 diff --git a/benchmark/tpcds/sf1-parquet/q87.benchmark b/benchmark/tpcds/sf1-parquet/q87.benchmark new file mode 100644 index 0000000..d1c4a02 --- /dev/null +++ b/benchmark/tpcds/sf1-parquet/q87.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/q87.benchmark +# description: Run query 87 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1-parquet/tpcds_sf1.benchmark.in +QUERY_NUMBER=87 +QUERY_NUMBER_PADDED=87 diff --git a/benchmark/tpcds/sf1-parquet/q88.benchmark b/benchmark/tpcds/sf1-parquet/q88.benchmark new file mode 100644 index 0000000..758597f --- /dev/null +++ b/benchmark/tpcds/sf1-parquet/q88.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/q88.benchmark +# description: Run query 88 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1-parquet/tpcds_sf1.benchmark.in +QUERY_NUMBER=88 +QUERY_NUMBER_PADDED=88 diff --git a/benchmark/tpcds/sf1-parquet/q89.benchmark b/benchmark/tpcds/sf1-parquet/q89.benchmark new file mode 100644 index 0000000..626a3f0 --- /dev/null +++ b/benchmark/tpcds/sf1-parquet/q89.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/q89.benchmark +# description: Run query 89 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1-parquet/tpcds_sf1.benchmark.in +QUERY_NUMBER=89 +QUERY_NUMBER_PADDED=89 diff --git a/benchmark/tpcds/sf1-parquet/q90.benchmark b/benchmark/tpcds/sf1-parquet/q90.benchmark new file mode 100644 index 0000000..f88d96d --- /dev/null +++ b/benchmark/tpcds/sf1-parquet/q90.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/q90.benchmark +# description: Run query 90 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1-parquet/tpcds_sf1.benchmark.in +QUERY_NUMBER=90 +QUERY_NUMBER_PADDED=90 diff --git a/benchmark/tpcds/sf1-parquet/q91.benchmark b/benchmark/tpcds/sf1-parquet/q91.benchmark new file mode 100644 index 0000000..85d5ef8 --- /dev/null +++ b/benchmark/tpcds/sf1-parquet/q91.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/q91.benchmark +# description: Run query 91 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1-parquet/tpcds_sf1.benchmark.in +QUERY_NUMBER=91 +QUERY_NUMBER_PADDED=91 diff --git a/benchmark/tpcds/sf1-parquet/q92.benchmark b/benchmark/tpcds/sf1-parquet/q92.benchmark new file mode 100644 index 0000000..880af0b --- /dev/null +++ b/benchmark/tpcds/sf1-parquet/q92.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/q92.benchmark +# description: Run query 92 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1-parquet/tpcds_sf1.benchmark.in +QUERY_NUMBER=92 +QUERY_NUMBER_PADDED=92 diff --git a/benchmark/tpcds/sf1-parquet/q93.benchmark b/benchmark/tpcds/sf1-parquet/q93.benchmark new file mode 100644 index 0000000..1b5c5f6 --- /dev/null +++ b/benchmark/tpcds/sf1-parquet/q93.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/q93.benchmark +# description: Run query 93 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1-parquet/tpcds_sf1.benchmark.in +QUERY_NUMBER=93 +QUERY_NUMBER_PADDED=93 diff --git a/benchmark/tpcds/sf1-parquet/q94.benchmark b/benchmark/tpcds/sf1-parquet/q94.benchmark new file mode 100644 index 0000000..46886f2 --- /dev/null +++ b/benchmark/tpcds/sf1-parquet/q94.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/q94.benchmark +# description: Run query 94 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1-parquet/tpcds_sf1.benchmark.in +QUERY_NUMBER=94 +QUERY_NUMBER_PADDED=94 diff --git a/benchmark/tpcds/sf1-parquet/q95.benchmark b/benchmark/tpcds/sf1-parquet/q95.benchmark new file mode 100644 index 0000000..35b283e --- /dev/null +++ b/benchmark/tpcds/sf1-parquet/q95.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/q95.benchmark +# description: Run query 95 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1-parquet/tpcds_sf1.benchmark.in +QUERY_NUMBER=95 +QUERY_NUMBER_PADDED=95 diff --git a/benchmark/tpcds/sf1-parquet/q96.benchmark b/benchmark/tpcds/sf1-parquet/q96.benchmark new file mode 100644 index 0000000..2955000 --- /dev/null +++ b/benchmark/tpcds/sf1-parquet/q96.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/q96.benchmark +# description: Run query 96 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1-parquet/tpcds_sf1.benchmark.in +QUERY_NUMBER=96 +QUERY_NUMBER_PADDED=96 diff --git a/benchmark/tpcds/sf1-parquet/q97.benchmark b/benchmark/tpcds/sf1-parquet/q97.benchmark new file mode 100644 index 0000000..1d2aa64 --- /dev/null +++ b/benchmark/tpcds/sf1-parquet/q97.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/q97.benchmark +# description: Run query 97 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1-parquet/tpcds_sf1.benchmark.in +QUERY_NUMBER=97 +QUERY_NUMBER_PADDED=97 diff --git a/benchmark/tpcds/sf1-parquet/q98.benchmark b/benchmark/tpcds/sf1-parquet/q98.benchmark new file mode 100644 index 0000000..c36e0e6 --- /dev/null +++ b/benchmark/tpcds/sf1-parquet/q98.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/q98.benchmark +# description: Run query 98 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1-parquet/tpcds_sf1.benchmark.in +QUERY_NUMBER=98 +QUERY_NUMBER_PADDED=98 diff --git a/benchmark/tpcds/sf1-parquet/q99.benchmark b/benchmark/tpcds/sf1-parquet/q99.benchmark new file mode 100644 index 0000000..2e6d066 --- /dev/null +++ b/benchmark/tpcds/sf1-parquet/q99.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/q99.benchmark +# description: Run query 99 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1-parquet/tpcds_sf1.benchmark.in +QUERY_NUMBER=99 +QUERY_NUMBER_PADDED=99 diff --git a/benchmark/tpcds/sf1-parquet/tpcds_sf1.benchmark.in b/benchmark/tpcds/sf1-parquet/tpcds_sf1.benchmark.in new file mode 100644 index 0000000..37dbc34 --- /dev/null +++ b/benchmark/tpcds/sf1-parquet/tpcds_sf1.benchmark.in @@ -0,0 +1,17 @@ +# name: ${FILE_PATH} +# description: ${DESCRIPTION} +# group: [tpcds-sf1] + +name DSQ${QUERY_NUMBER_PADDED} +group tpcds +subgroup sf1 + +require delta + +require parquet + +load benchmark/tpcds/sf1-parquet/load.sql + +run duckdb/extension/tpcds/dsdgen/queries/${QUERY_NUMBER_PADDED}.sql + +result duckdb/extension/tpcds/dsdgen/answers/sf1/${QUERY_NUMBER_PADDED}.csv diff --git a/benchmark/tpch/sf1-delta-remote/load.sql b/benchmark/tpch/sf1-delta-remote/load.sql new file mode 100644 index 0000000..a095ffd --- /dev/null +++ b/benchmark/tpch/sf1-delta-remote/load.sql @@ -0,0 +1,8 @@ +create view customer as from delta_scan('s3://test-bucket-ceiveran/delta_testing/tpch_sf0_01' || '/customer/delta_lake'); +create view lineitem as from delta_scan('s3://test-bucket-ceiveran/delta_testing/tpch_sf0_01' || '/lineitem/delta_lake'); +create view nation as from delta_scan('s3://test-bucket-ceiveran/delta_testing/tpch_sf0_01' || '/nation/delta_lake'); +create view orders as from delta_scan('s3://test-bucket-ceiveran/delta_testing/tpch_sf0_01' || '/orders/delta_lake'); +create view part as from delta_scan('s3://test-bucket-ceiveran/delta_testing/tpch_sf0_01' || '/part/delta_lake'); +create view partsupp as from delta_scan('s3://test-bucket-ceiveran/delta_testing/tpch_sf0_01' || '/partsupp/delta_lake'); +create view region as from delta_scan('s3://test-bucket-ceiveran/delta_testing/tpch_sf0_01' || '/region/delta_lake'); +create view supplier as from delta_scan('s3://test-bucket-ceiveran/delta_testing/tpch_sf0_01' || '/supplier/delta_lake'); \ No newline at end of file diff --git a/benchmark/tpch/sf1-delta-remote/q01.benchmark b/benchmark/tpch/sf1-delta-remote/q01.benchmark new file mode 100644 index 0000000..df280b4 --- /dev/null +++ b/benchmark/tpch/sf1-delta-remote/q01.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpch/sf1-delta/q01.benchmark +# description: Run query 01 from the TPC-H benchmark +# group: [sf1-parquet] + +template benchmark/tpch/sf1-delta-remote/tpch_sf1_delta.benchmark.in +QUERY_NUMBER=1 +QUERY_NUMBER_PADDED=01 diff --git a/benchmark/tpch/sf1-delta-remote/q02.benchmark b/benchmark/tpch/sf1-delta-remote/q02.benchmark new file mode 100644 index 0000000..94b9891 --- /dev/null +++ b/benchmark/tpch/sf1-delta-remote/q02.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpch/sf1-delta/q02.benchmark +# description: Run query 02 from the TPC-H benchmark +# group: [sf1-parquet] + +template benchmark/tpch/sf1-delta-remote/tpch_sf1_delta.benchmark.in +QUERY_NUMBER=2 +QUERY_NUMBER_PADDED=02 diff --git a/benchmark/tpch/sf1-delta-remote/q03.benchmark b/benchmark/tpch/sf1-delta-remote/q03.benchmark new file mode 100644 index 0000000..6a63a9a --- /dev/null +++ b/benchmark/tpch/sf1-delta-remote/q03.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpch/sf1-delta/q03.benchmark +# description: Run query 03 from the TPC-H benchmark +# group: [sf1-parquet] + +template benchmark/tpch/sf1-delta-remote/tpch_sf1_delta.benchmark.in +QUERY_NUMBER=3 +QUERY_NUMBER_PADDED=03 diff --git a/benchmark/tpch/sf1-delta-remote/q04.benchmark b/benchmark/tpch/sf1-delta-remote/q04.benchmark new file mode 100644 index 0000000..7049851 --- /dev/null +++ b/benchmark/tpch/sf1-delta-remote/q04.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpch/sf1-delta/q04.benchmark +# description: Run query 04 from the TPC-H benchmark +# group: [sf1-parquet] + +template benchmark/tpch/sf1-delta-remote/tpch_sf1_delta.benchmark.in +QUERY_NUMBER=4 +QUERY_NUMBER_PADDED=04 diff --git a/benchmark/tpch/sf1-delta-remote/q05.benchmark b/benchmark/tpch/sf1-delta-remote/q05.benchmark new file mode 100644 index 0000000..4f93fb7 --- /dev/null +++ b/benchmark/tpch/sf1-delta-remote/q05.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpch/sf1-delta/q05.benchmark +# description: Run query 05 from the TPC-H benchmark +# group: [sf1-parquet] + +template benchmark/tpch/sf1-delta-remote/tpch_sf1_delta.benchmark.in +QUERY_NUMBER=5 +QUERY_NUMBER_PADDED=05 diff --git a/benchmark/tpch/sf1-delta-remote/q06.benchmark b/benchmark/tpch/sf1-delta-remote/q06.benchmark new file mode 100644 index 0000000..78854bb --- /dev/null +++ b/benchmark/tpch/sf1-delta-remote/q06.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpch/sf1-delta/q06.benchmark +# description: Run query 06 from the TPC-H benchmark +# group: [sf1-parquet] + +template benchmark/tpch/sf1-delta-remote/tpch_sf1_delta.benchmark.in +QUERY_NUMBER=6 +QUERY_NUMBER_PADDED=06 diff --git a/benchmark/tpch/sf1-delta-remote/q07.benchmark b/benchmark/tpch/sf1-delta-remote/q07.benchmark new file mode 100644 index 0000000..9bf2a7b --- /dev/null +++ b/benchmark/tpch/sf1-delta-remote/q07.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpch/sf1-delta/q07.benchmark +# description: Run query 07 from the TPC-H benchmark +# group: [sf1-parquet] + +template benchmark/tpch/sf1-delta-remote/tpch_sf1_delta.benchmark.in +QUERY_NUMBER=7 +QUERY_NUMBER_PADDED=07 diff --git a/benchmark/tpch/sf1-delta-remote/q08.benchmark b/benchmark/tpch/sf1-delta-remote/q08.benchmark new file mode 100644 index 0000000..a7f1a80 --- /dev/null +++ b/benchmark/tpch/sf1-delta-remote/q08.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpch/sf1-delta/q08.benchmark +# description: Run query 08 from the TPC-H benchmark +# group: [sf1-parquet] + +template benchmark/tpch/sf1-delta-remote/tpch_sf1_delta.benchmark.in +QUERY_NUMBER=8 +QUERY_NUMBER_PADDED=08 diff --git a/benchmark/tpch/sf1-delta-remote/q09.benchmark b/benchmark/tpch/sf1-delta-remote/q09.benchmark new file mode 100644 index 0000000..272167a --- /dev/null +++ b/benchmark/tpch/sf1-delta-remote/q09.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpch/sf1-delta/q09.benchmark +# description: Run query 09 from the TPC-H benchmark +# group: [sf1-parquet] + +template benchmark/tpch/sf1-delta-remote/tpch_sf1_delta.benchmark.in +QUERY_NUMBER=9 +QUERY_NUMBER_PADDED=09 diff --git a/benchmark/tpch/sf1-delta-remote/q10.benchmark b/benchmark/tpch/sf1-delta-remote/q10.benchmark new file mode 100644 index 0000000..14822e3 --- /dev/null +++ b/benchmark/tpch/sf1-delta-remote/q10.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpch/sf1-delta/q10.benchmark +# description: Run query 10 from the TPC-H benchmark +# group: [sf1-parquet] + +template benchmark/tpch/sf1-delta-remote/tpch_sf1_delta.benchmark.in +QUERY_NUMBER=10 +QUERY_NUMBER_PADDED=10 diff --git a/benchmark/tpch/sf1-delta-remote/q11.benchmark b/benchmark/tpch/sf1-delta-remote/q11.benchmark new file mode 100644 index 0000000..df5276e --- /dev/null +++ b/benchmark/tpch/sf1-delta-remote/q11.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpch/sf1-delta/q11.benchmark +# description: Run query 11 from the TPC-H benchmark +# group: [sf1-parquet] + +template benchmark/tpch/sf1-delta-remote/tpch_sf1_delta.benchmark.in +QUERY_NUMBER=11 +QUERY_NUMBER_PADDED=11 diff --git a/benchmark/tpch/sf1-delta-remote/q12.benchmark b/benchmark/tpch/sf1-delta-remote/q12.benchmark new file mode 100644 index 0000000..fee7641 --- /dev/null +++ b/benchmark/tpch/sf1-delta-remote/q12.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpch/sf1-delta/q12.benchmark +# description: Run query 12 from the TPC-H benchmark +# group: [sf1-parquet] + +template benchmark/tpch/sf1-delta-remote/tpch_sf1_delta.benchmark.in +QUERY_NUMBER=12 +QUERY_NUMBER_PADDED=12 diff --git a/benchmark/tpch/sf1-delta-remote/q13.benchmark b/benchmark/tpch/sf1-delta-remote/q13.benchmark new file mode 100644 index 0000000..2050dd4 --- /dev/null +++ b/benchmark/tpch/sf1-delta-remote/q13.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpch/sf1-delta/q13.benchmark +# description: Run query 13 from the TPC-H benchmark +# group: [sf1-parquet] + +template benchmark/tpch/sf1-delta-remote/tpch_sf1_delta.benchmark.in +QUERY_NUMBER=13 +QUERY_NUMBER_PADDED=13 diff --git a/benchmark/tpch/sf1-delta-remote/q14.benchmark b/benchmark/tpch/sf1-delta-remote/q14.benchmark new file mode 100644 index 0000000..e458993 --- /dev/null +++ b/benchmark/tpch/sf1-delta-remote/q14.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpch/sf1-delta/q14.benchmark +# description: Run query 14 from the TPC-H benchmark +# group: [sf1-parquet] + +template benchmark/tpch/sf1-delta-remote/tpch_sf1_delta.benchmark.in +QUERY_NUMBER=14 +QUERY_NUMBER_PADDED=14 diff --git a/benchmark/tpch/sf1-delta-remote/q15.benchmark b/benchmark/tpch/sf1-delta-remote/q15.benchmark new file mode 100644 index 0000000..fcd0523 --- /dev/null +++ b/benchmark/tpch/sf1-delta-remote/q15.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpch/sf1-delta/q15.benchmark +# description: Run query 15 from the TPC-H benchmark +# group: [sf1-parquet] + +template benchmark/tpch/sf1-delta-remote/tpch_sf1_delta.benchmark.in +QUERY_NUMBER=15 +QUERY_NUMBER_PADDED=15 diff --git a/benchmark/tpch/sf1-delta-remote/q16.benchmark b/benchmark/tpch/sf1-delta-remote/q16.benchmark new file mode 100644 index 0000000..a7524ad --- /dev/null +++ b/benchmark/tpch/sf1-delta-remote/q16.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpch/sf1-delta/q16.benchmark +# description: Run query 16 from the TPC-H benchmark +# group: [sf1-parquet] + +template benchmark/tpch/sf1-delta-remote/tpch_sf1_delta.benchmark.in +QUERY_NUMBER=16 +QUERY_NUMBER_PADDED=16 diff --git a/benchmark/tpch/sf1-delta-remote/q17.benchmark b/benchmark/tpch/sf1-delta-remote/q17.benchmark new file mode 100644 index 0000000..05c9584 --- /dev/null +++ b/benchmark/tpch/sf1-delta-remote/q17.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpch/sf1-delta/q17.benchmark +# description: Run query 17 from the TPC-H benchmark +# group: [sf1-parquet] + +template benchmark/tpch/sf1-delta-remote/tpch_sf1_delta.benchmark.in +QUERY_NUMBER=17 +QUERY_NUMBER_PADDED=17 diff --git a/benchmark/tpch/sf1-delta-remote/q18.benchmark b/benchmark/tpch/sf1-delta-remote/q18.benchmark new file mode 100644 index 0000000..a645646 --- /dev/null +++ b/benchmark/tpch/sf1-delta-remote/q18.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpch/sf1-delta/q18.benchmark +# description: Run query 18 from the TPC-H benchmark +# group: [sf1-parquet] + +template benchmark/tpch/sf1-delta-remote/tpch_sf1_delta.benchmark.in +QUERY_NUMBER=18 +QUERY_NUMBER_PADDED=18 diff --git a/benchmark/tpch/sf1-delta-remote/q19.benchmark b/benchmark/tpch/sf1-delta-remote/q19.benchmark new file mode 100644 index 0000000..3bc9baf --- /dev/null +++ b/benchmark/tpch/sf1-delta-remote/q19.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpch/sf1-delta/q19.benchmark +# description: Run query 19 from the TPC-H benchmark +# group: [sf1-parquet] + +template benchmark/tpch/sf1-delta-remote/tpch_sf1_delta.benchmark.in +QUERY_NUMBER=19 +QUERY_NUMBER_PADDED=19 diff --git a/benchmark/tpch/sf1-delta-remote/q20.benchmark b/benchmark/tpch/sf1-delta-remote/q20.benchmark new file mode 100644 index 0000000..766e1d2 --- /dev/null +++ b/benchmark/tpch/sf1-delta-remote/q20.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpch/sf1-delta/q20.benchmark +# description: Run query 20 from the TPC-H benchmark +# group: [sf1-parquet] + +template benchmark/tpch/sf1-delta-remote/tpch_sf1_delta.benchmark.in +QUERY_NUMBER=20 +QUERY_NUMBER_PADDED=20 diff --git a/benchmark/tpch/sf1-delta-remote/q21.benchmark b/benchmark/tpch/sf1-delta-remote/q21.benchmark new file mode 100644 index 0000000..6d75456 --- /dev/null +++ b/benchmark/tpch/sf1-delta-remote/q21.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpch/sf1-delta/q21.benchmark +# description: Run query 21 from the TPC-H benchmark +# group: [sf1-parquet] + +template benchmark/tpch/sf1-delta-remote/tpch_sf1_delta.benchmark.in +QUERY_NUMBER=21 +QUERY_NUMBER_PADDED=21 diff --git a/benchmark/tpch/sf1-delta-remote/q22.benchmark b/benchmark/tpch/sf1-delta-remote/q22.benchmark new file mode 100644 index 0000000..c808aad --- /dev/null +++ b/benchmark/tpch/sf1-delta-remote/q22.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpch/sf1-delta/q22.benchmark +# description: Run query 22 from the TPC-H benchmark +# group: [sf1-parquet] + +template benchmark/tpch/sf1-delta-remote/tpch_sf1_delta.benchmark.in +QUERY_NUMBER=22 +QUERY_NUMBER_PADDED=22 diff --git a/benchmark/tpch/sf1-delta-remote/tpch_sf1_delta.benchmark.in b/benchmark/tpch/sf1-delta-remote/tpch_sf1_delta.benchmark.in new file mode 100644 index 0000000..c94ee1c --- /dev/null +++ b/benchmark/tpch/sf1-delta-remote/tpch_sf1_delta.benchmark.in @@ -0,0 +1,19 @@ +# name: ${FILE_PATH} +# description: ${DESCRIPTION} +# group: [sf1] + +name Q${QUERY_NUMBER_PADDED} +group tpch +subgroup sf1 + +require delta + +require parquet + +require httpfs + +load benchmark/tpch/sf1-delta-remote/load.sql + +run duckdb/extension/tpch/dbgen/queries/q${QUERY_NUMBER_PADDED}.sql + +result duckdb/extension/tpch/dbgen/answers/sf0.01/q${QUERY_NUMBER_PADDED}.csv \ No newline at end of file diff --git a/benchmark/tpch/sf1-delta/load.sql b/benchmark/tpch/sf1-delta/load.sql new file mode 100644 index 0000000..5d97a08 --- /dev/null +++ b/benchmark/tpch/sf1-delta/load.sql @@ -0,0 +1,8 @@ +create view customer as from delta_scan('./data/generated/tpch_sf1/customer/delta_lake'); +create view lineitem as from delta_scan('./data/generated/tpch_sf1/lineitem/delta_lake'); +create view nation as from delta_scan('./data/generated/tpch_sf1/nation/delta_lake'); +create view orders as from delta_scan('./data/generated/tpch_sf1/orders/delta_lake'); +create view part as from delta_scan('./data/generated/tpch_sf1/part/delta_lake'); +create view partsupp as from delta_scan('./data/generated/tpch_sf1/partsupp/delta_lake'); +create view region as from delta_scan('./data/generated/tpch_sf1/region/delta_lake'); +create view supplier as from delta_scan('./data/generated/tpch_sf1/supplier/delta_lake'); \ No newline at end of file diff --git a/benchmark/tpch/sf1-delta/q01.benchmark b/benchmark/tpch/sf1-delta/q01.benchmark new file mode 100644 index 0000000..c002271 --- /dev/null +++ b/benchmark/tpch/sf1-delta/q01.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpch/sf1-delta/q01.benchmark +# description: Run query 01 from the TPC-H benchmark +# group: [sf1-parquet] + +template benchmark/tpch/sf1-delta/tpch_sf1_delta.benchmark.in +QUERY_NUMBER=1 +QUERY_NUMBER_PADDED=01 diff --git a/benchmark/tpch/sf1-delta/q02.benchmark b/benchmark/tpch/sf1-delta/q02.benchmark new file mode 100644 index 0000000..1b29623 --- /dev/null +++ b/benchmark/tpch/sf1-delta/q02.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpch/sf1-delta/q02.benchmark +# description: Run query 02 from the TPC-H benchmark +# group: [sf1-parquet] + +template benchmark/tpch/sf1-delta/tpch_sf1_delta.benchmark.in +QUERY_NUMBER=2 +QUERY_NUMBER_PADDED=02 diff --git a/benchmark/tpch/sf1-delta/q03.benchmark b/benchmark/tpch/sf1-delta/q03.benchmark new file mode 100644 index 0000000..105db6c --- /dev/null +++ b/benchmark/tpch/sf1-delta/q03.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpch/sf1-delta/q03.benchmark +# description: Run query 03 from the TPC-H benchmark +# group: [sf1-parquet] + +template benchmark/tpch/sf1-delta/tpch_sf1_delta.benchmark.in +QUERY_NUMBER=3 +QUERY_NUMBER_PADDED=03 diff --git a/benchmark/tpch/sf1-delta/q04.benchmark b/benchmark/tpch/sf1-delta/q04.benchmark new file mode 100644 index 0000000..e6adaa9 --- /dev/null +++ b/benchmark/tpch/sf1-delta/q04.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpch/sf1-delta/q04.benchmark +# description: Run query 04 from the TPC-H benchmark +# group: [sf1-parquet] + +template benchmark/tpch/sf1-delta/tpch_sf1_delta.benchmark.in +QUERY_NUMBER=4 +QUERY_NUMBER_PADDED=04 diff --git a/benchmark/tpch/sf1-delta/q05.benchmark b/benchmark/tpch/sf1-delta/q05.benchmark new file mode 100644 index 0000000..c3d58fa --- /dev/null +++ b/benchmark/tpch/sf1-delta/q05.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpch/sf1-delta/q05.benchmark +# description: Run query 05 from the TPC-H benchmark +# group: [sf1-parquet] + +template benchmark/tpch/sf1-delta/tpch_sf1_delta.benchmark.in +QUERY_NUMBER=5 +QUERY_NUMBER_PADDED=05 diff --git a/benchmark/tpch/sf1-delta/q06.benchmark b/benchmark/tpch/sf1-delta/q06.benchmark new file mode 100644 index 0000000..f0f4edf --- /dev/null +++ b/benchmark/tpch/sf1-delta/q06.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpch/sf1-delta/q06.benchmark +# description: Run query 06 from the TPC-H benchmark +# group: [sf1-parquet] + +template benchmark/tpch/sf1-delta/tpch_sf1_delta.benchmark.in +QUERY_NUMBER=6 +QUERY_NUMBER_PADDED=06 diff --git a/benchmark/tpch/sf1-delta/q07.benchmark b/benchmark/tpch/sf1-delta/q07.benchmark new file mode 100644 index 0000000..ef73acb --- /dev/null +++ b/benchmark/tpch/sf1-delta/q07.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpch/sf1-delta/q07.benchmark +# description: Run query 07 from the TPC-H benchmark +# group: [sf1-parquet] + +template benchmark/tpch/sf1-delta/tpch_sf1_delta.benchmark.in +QUERY_NUMBER=7 +QUERY_NUMBER_PADDED=07 diff --git a/benchmark/tpch/sf1-delta/q08.benchmark b/benchmark/tpch/sf1-delta/q08.benchmark new file mode 100644 index 0000000..ea07628 --- /dev/null +++ b/benchmark/tpch/sf1-delta/q08.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpch/sf1-delta/q08.benchmark +# description: Run query 08 from the TPC-H benchmark +# group: [sf1-parquet] + +template benchmark/tpch/sf1-delta/tpch_sf1_delta.benchmark.in +QUERY_NUMBER=8 +QUERY_NUMBER_PADDED=08 diff --git a/benchmark/tpch/sf1-delta/q09.benchmark b/benchmark/tpch/sf1-delta/q09.benchmark new file mode 100644 index 0000000..d56ce49 --- /dev/null +++ b/benchmark/tpch/sf1-delta/q09.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpch/sf1-delta/q09.benchmark +# description: Run query 09 from the TPC-H benchmark +# group: [sf1-parquet] + +template benchmark/tpch/sf1-delta/tpch_sf1_delta.benchmark.in +QUERY_NUMBER=9 +QUERY_NUMBER_PADDED=09 diff --git a/benchmark/tpch/sf1-delta/q10.benchmark b/benchmark/tpch/sf1-delta/q10.benchmark new file mode 100644 index 0000000..c288da4 --- /dev/null +++ b/benchmark/tpch/sf1-delta/q10.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpch/sf1-delta/q10.benchmark +# description: Run query 10 from the TPC-H benchmark +# group: [sf1-parquet] + +template benchmark/tpch/sf1-delta/tpch_sf1_delta.benchmark.in +QUERY_NUMBER=10 +QUERY_NUMBER_PADDED=10 diff --git a/benchmark/tpch/sf1-delta/q11.benchmark b/benchmark/tpch/sf1-delta/q11.benchmark new file mode 100644 index 0000000..67840f1 --- /dev/null +++ b/benchmark/tpch/sf1-delta/q11.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpch/sf1-delta/q11.benchmark +# description: Run query 11 from the TPC-H benchmark +# group: [sf1-parquet] + +template benchmark/tpch/sf1-delta/tpch_sf1_delta.benchmark.in +QUERY_NUMBER=11 +QUERY_NUMBER_PADDED=11 diff --git a/benchmark/tpch/sf1-delta/q12.benchmark b/benchmark/tpch/sf1-delta/q12.benchmark new file mode 100644 index 0000000..875acad --- /dev/null +++ b/benchmark/tpch/sf1-delta/q12.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpch/sf1-delta/q12.benchmark +# description: Run query 12 from the TPC-H benchmark +# group: [sf1-parquet] + +template benchmark/tpch/sf1-delta/tpch_sf1_delta.benchmark.in +QUERY_NUMBER=12 +QUERY_NUMBER_PADDED=12 diff --git a/benchmark/tpch/sf1-delta/q13.benchmark b/benchmark/tpch/sf1-delta/q13.benchmark new file mode 100644 index 0000000..cb28472 --- /dev/null +++ b/benchmark/tpch/sf1-delta/q13.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpch/sf1-delta/q13.benchmark +# description: Run query 13 from the TPC-H benchmark +# group: [sf1-parquet] + +template benchmark/tpch/sf1-delta/tpch_sf1_delta.benchmark.in +QUERY_NUMBER=13 +QUERY_NUMBER_PADDED=13 diff --git a/benchmark/tpch/sf1-delta/q14.benchmark b/benchmark/tpch/sf1-delta/q14.benchmark new file mode 100644 index 0000000..8cd85a0 --- /dev/null +++ b/benchmark/tpch/sf1-delta/q14.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpch/sf1-delta/q14.benchmark +# description: Run query 14 from the TPC-H benchmark +# group: [sf1-parquet] + +template benchmark/tpch/sf1-delta/tpch_sf1_delta.benchmark.in +QUERY_NUMBER=14 +QUERY_NUMBER_PADDED=14 diff --git a/benchmark/tpch/sf1-delta/q15.benchmark b/benchmark/tpch/sf1-delta/q15.benchmark new file mode 100644 index 0000000..b18fa75 --- /dev/null +++ b/benchmark/tpch/sf1-delta/q15.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpch/sf1-delta/q15.benchmark +# description: Run query 15 from the TPC-H benchmark +# group: [sf1-parquet] + +template benchmark/tpch/sf1-delta/tpch_sf1_delta.benchmark.in +QUERY_NUMBER=15 +QUERY_NUMBER_PADDED=15 diff --git a/benchmark/tpch/sf1-delta/q16.benchmark b/benchmark/tpch/sf1-delta/q16.benchmark new file mode 100644 index 0000000..9fa0790 --- /dev/null +++ b/benchmark/tpch/sf1-delta/q16.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpch/sf1-delta/q16.benchmark +# description: Run query 16 from the TPC-H benchmark +# group: [sf1-parquet] + +template benchmark/tpch/sf1-delta/tpch_sf1_delta.benchmark.in +QUERY_NUMBER=16 +QUERY_NUMBER_PADDED=16 diff --git a/benchmark/tpch/sf1-delta/q17.benchmark b/benchmark/tpch/sf1-delta/q17.benchmark new file mode 100644 index 0000000..fc64954 --- /dev/null +++ b/benchmark/tpch/sf1-delta/q17.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpch/sf1-delta/q17.benchmark +# description: Run query 17 from the TPC-H benchmark +# group: [sf1-parquet] + +template benchmark/tpch/sf1-delta/tpch_sf1_delta.benchmark.in +QUERY_NUMBER=17 +QUERY_NUMBER_PADDED=17 diff --git a/benchmark/tpch/sf1-delta/q18.benchmark b/benchmark/tpch/sf1-delta/q18.benchmark new file mode 100644 index 0000000..13f17a1 --- /dev/null +++ b/benchmark/tpch/sf1-delta/q18.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpch/sf1-delta/q18.benchmark +# description: Run query 18 from the TPC-H benchmark +# group: [sf1-parquet] + +template benchmark/tpch/sf1-delta/tpch_sf1_delta.benchmark.in +QUERY_NUMBER=18 +QUERY_NUMBER_PADDED=18 diff --git a/benchmark/tpch/sf1-delta/q19.benchmark b/benchmark/tpch/sf1-delta/q19.benchmark new file mode 100644 index 0000000..88f1bba --- /dev/null +++ b/benchmark/tpch/sf1-delta/q19.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpch/sf1-delta/q19.benchmark +# description: Run query 19 from the TPC-H benchmark +# group: [sf1-parquet] + +template benchmark/tpch/sf1-delta/tpch_sf1_delta.benchmark.in +QUERY_NUMBER=19 +QUERY_NUMBER_PADDED=19 diff --git a/benchmark/tpch/sf1-delta/q20.benchmark b/benchmark/tpch/sf1-delta/q20.benchmark new file mode 100644 index 0000000..3d07a89 --- /dev/null +++ b/benchmark/tpch/sf1-delta/q20.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpch/sf1-delta/q20.benchmark +# description: Run query 20 from the TPC-H benchmark +# group: [sf1-parquet] + +template benchmark/tpch/sf1-delta/tpch_sf1_delta.benchmark.in +QUERY_NUMBER=20 +QUERY_NUMBER_PADDED=20 diff --git a/benchmark/tpch/sf1-delta/q21.benchmark b/benchmark/tpch/sf1-delta/q21.benchmark new file mode 100644 index 0000000..29c0162 --- /dev/null +++ b/benchmark/tpch/sf1-delta/q21.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpch/sf1-delta/q21.benchmark +# description: Run query 21 from the TPC-H benchmark +# group: [sf1-parquet] + +template benchmark/tpch/sf1-delta/tpch_sf1_delta.benchmark.in +QUERY_NUMBER=21 +QUERY_NUMBER_PADDED=21 diff --git a/benchmark/tpch/sf1-delta/q22.benchmark b/benchmark/tpch/sf1-delta/q22.benchmark new file mode 100644 index 0000000..1d9cc53 --- /dev/null +++ b/benchmark/tpch/sf1-delta/q22.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpch/sf1-delta/q22.benchmark +# description: Run query 22 from the TPC-H benchmark +# group: [sf1-parquet] + +template benchmark/tpch/sf1-delta/tpch_sf1_delta.benchmark.in +QUERY_NUMBER=22 +QUERY_NUMBER_PADDED=22 diff --git a/benchmark/tpch/sf1-delta/tpch_sf1_delta.benchmark.in b/benchmark/tpch/sf1-delta/tpch_sf1_delta.benchmark.in new file mode 100644 index 0000000..8b8ef40 --- /dev/null +++ b/benchmark/tpch/sf1-delta/tpch_sf1_delta.benchmark.in @@ -0,0 +1,17 @@ +# name: ${FILE_PATH} +# description: ${DESCRIPTION} +# group: [sf1] + +name Q${QUERY_NUMBER_PADDED} +group tpch +subgroup sf1 + +require delta + +require parquet + +load benchmark/tpch/sf1-delta/load.sql + +run duckdb/extension/tpch/dbgen/queries/q${QUERY_NUMBER_PADDED}.sql + +result duckdb/extension/tpch/dbgen/answers/sf1/q${QUERY_NUMBER_PADDED}.csv \ No newline at end of file diff --git a/benchmark/tpch/sf1-parquet-remote/load.sql b/benchmark/tpch/sf1-parquet-remote/load.sql new file mode 100644 index 0000000..23c8ed2 --- /dev/null +++ b/benchmark/tpch/sf1-parquet-remote/load.sql @@ -0,0 +1,8 @@ +create view customer as from parquet_scan('s3://test-bucket-ceiveran/delta_testing/tpch_sf0_01' || '/customer/parquet/**/*.parquet'); +create view lineitem as from parquet_scan('s3://test-bucket-ceiveran/delta_testing/tpch_sf0_01' || '/lineitem/parquet/**/*.parquet'); +create view nation as from parquet_scan('s3://test-bucket-ceiveran/delta_testing/tpch_sf0_01' || '/nation/parquet/**/*.parquet'); +create view orders as from parquet_scan('s3://test-bucket-ceiveran/delta_testing/tpch_sf0_01' || '/orders/parquet/**/*.parquet'); +create view part as from parquet_scan('s3://test-bucket-ceiveran/delta_testing/tpch_sf0_01' || '/part/parquet/**/*.parquet'); +create view partsupp as from parquet_scan('s3://test-bucket-ceiveran/delta_testing/tpch_sf0_01' || '/partsupp/parquet/**/*.parquet'); +create view region as from parquet_scan('s3://test-bucket-ceiveran/delta_testing/tpch_sf0_01' || '/region/parquet/**/*.parquet'); +create view supplier as from parquet_scan('s3://test-bucket-ceiveran/delta_testing/tpch_sf0_01' || '/supplier/parquet/**/*.parquet'); \ No newline at end of file diff --git a/benchmark/tpch/sf1-parquet-remote/q01.benchmark b/benchmark/tpch/sf1-parquet-remote/q01.benchmark new file mode 100644 index 0000000..bb83e6a --- /dev/null +++ b/benchmark/tpch/sf1-parquet-remote/q01.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpch/sf1-delta/q01.benchmark +# description: Run query 01 from the TPC-H benchmark +# group: [sf1-parquet] + +template benchmark/tpch/sf1-parquet-remote/tpch_sf1_delta.benchmark.in +QUERY_NUMBER=1 +QUERY_NUMBER_PADDED=01 diff --git a/benchmark/tpch/sf1-parquet-remote/q02.benchmark b/benchmark/tpch/sf1-parquet-remote/q02.benchmark new file mode 100644 index 0000000..577415f --- /dev/null +++ b/benchmark/tpch/sf1-parquet-remote/q02.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpch/sf1-delta/q02.benchmark +# description: Run query 02 from the TPC-H benchmark +# group: [sf1-parquet] + +template benchmark/tpch/sf1-parquet-remote/tpch_sf1_delta.benchmark.in +QUERY_NUMBER=2 +QUERY_NUMBER_PADDED=02 diff --git a/benchmark/tpch/sf1-parquet-remote/q03.benchmark b/benchmark/tpch/sf1-parquet-remote/q03.benchmark new file mode 100644 index 0000000..665980c --- /dev/null +++ b/benchmark/tpch/sf1-parquet-remote/q03.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpch/sf1-delta/q03.benchmark +# description: Run query 03 from the TPC-H benchmark +# group: [sf1-parquet] + +template benchmark/tpch/sf1-parquet-remote/tpch_sf1_delta.benchmark.in +QUERY_NUMBER=3 +QUERY_NUMBER_PADDED=03 diff --git a/benchmark/tpch/sf1-parquet-remote/q04.benchmark b/benchmark/tpch/sf1-parquet-remote/q04.benchmark new file mode 100644 index 0000000..537f15d --- /dev/null +++ b/benchmark/tpch/sf1-parquet-remote/q04.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpch/sf1-delta/q04.benchmark +# description: Run query 04 from the TPC-H benchmark +# group: [sf1-parquet] + +template benchmark/tpch/sf1-parquet-remote/tpch_sf1_delta.benchmark.in +QUERY_NUMBER=4 +QUERY_NUMBER_PADDED=04 diff --git a/benchmark/tpch/sf1-parquet-remote/q05.benchmark b/benchmark/tpch/sf1-parquet-remote/q05.benchmark new file mode 100644 index 0000000..616bc5d --- /dev/null +++ b/benchmark/tpch/sf1-parquet-remote/q05.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpch/sf1-delta/q05.benchmark +# description: Run query 05 from the TPC-H benchmark +# group: [sf1-parquet] + +template benchmark/tpch/sf1-parquet-remote/tpch_sf1_delta.benchmark.in +QUERY_NUMBER=5 +QUERY_NUMBER_PADDED=05 diff --git a/benchmark/tpch/sf1-parquet-remote/q06.benchmark b/benchmark/tpch/sf1-parquet-remote/q06.benchmark new file mode 100644 index 0000000..8a471a9 --- /dev/null +++ b/benchmark/tpch/sf1-parquet-remote/q06.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpch/sf1-delta/q06.benchmark +# description: Run query 06 from the TPC-H benchmark +# group: [sf1-parquet] + +template benchmark/tpch/sf1-parquet-remote/tpch_sf1_delta.benchmark.in +QUERY_NUMBER=6 +QUERY_NUMBER_PADDED=06 diff --git a/benchmark/tpch/sf1-parquet-remote/q07.benchmark b/benchmark/tpch/sf1-parquet-remote/q07.benchmark new file mode 100644 index 0000000..02f87dd --- /dev/null +++ b/benchmark/tpch/sf1-parquet-remote/q07.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpch/sf1-delta/q07.benchmark +# description: Run query 07 from the TPC-H benchmark +# group: [sf1-parquet] + +template benchmark/tpch/sf1-parquet-remote/tpch_sf1_delta.benchmark.in +QUERY_NUMBER=7 +QUERY_NUMBER_PADDED=07 diff --git a/benchmark/tpch/sf1-parquet-remote/q08.benchmark b/benchmark/tpch/sf1-parquet-remote/q08.benchmark new file mode 100644 index 0000000..69a8b4f --- /dev/null +++ b/benchmark/tpch/sf1-parquet-remote/q08.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpch/sf1-delta/q08.benchmark +# description: Run query 08 from the TPC-H benchmark +# group: [sf1-parquet] + +template benchmark/tpch/sf1-parquet-remote/tpch_sf1_delta.benchmark.in +QUERY_NUMBER=8 +QUERY_NUMBER_PADDED=08 diff --git a/benchmark/tpch/sf1-parquet-remote/q09.benchmark b/benchmark/tpch/sf1-parquet-remote/q09.benchmark new file mode 100644 index 0000000..bbd0413 --- /dev/null +++ b/benchmark/tpch/sf1-parquet-remote/q09.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpch/sf1-delta/q09.benchmark +# description: Run query 09 from the TPC-H benchmark +# group: [sf1-parquet] + +template benchmark/tpch/sf1-parquet-remote/tpch_sf1_delta.benchmark.in +QUERY_NUMBER=9 +QUERY_NUMBER_PADDED=09 diff --git a/benchmark/tpch/sf1-parquet-remote/q10.benchmark b/benchmark/tpch/sf1-parquet-remote/q10.benchmark new file mode 100644 index 0000000..8f0564e --- /dev/null +++ b/benchmark/tpch/sf1-parquet-remote/q10.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpch/sf1-delta/q10.benchmark +# description: Run query 10 from the TPC-H benchmark +# group: [sf1-parquet] + +template benchmark/tpch/sf1-parquet-remote/tpch_sf1_delta.benchmark.in +QUERY_NUMBER=10 +QUERY_NUMBER_PADDED=10 diff --git a/benchmark/tpch/sf1-parquet-remote/q11.benchmark b/benchmark/tpch/sf1-parquet-remote/q11.benchmark new file mode 100644 index 0000000..61f7a01 --- /dev/null +++ b/benchmark/tpch/sf1-parquet-remote/q11.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpch/sf1-delta/q11.benchmark +# description: Run query 11 from the TPC-H benchmark +# group: [sf1-parquet] + +template benchmark/tpch/sf1-parquet-remote/tpch_sf1_delta.benchmark.in +QUERY_NUMBER=11 +QUERY_NUMBER_PADDED=11 diff --git a/benchmark/tpch/sf1-parquet-remote/q12.benchmark b/benchmark/tpch/sf1-parquet-remote/q12.benchmark new file mode 100644 index 0000000..48e2b2c --- /dev/null +++ b/benchmark/tpch/sf1-parquet-remote/q12.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpch/sf1-delta/q12.benchmark +# description: Run query 12 from the TPC-H benchmark +# group: [sf1-parquet] + +template benchmark/tpch/sf1-parquet-remote/tpch_sf1_delta.benchmark.in +QUERY_NUMBER=12 +QUERY_NUMBER_PADDED=12 diff --git a/benchmark/tpch/sf1-parquet-remote/q13.benchmark b/benchmark/tpch/sf1-parquet-remote/q13.benchmark new file mode 100644 index 0000000..f5b2ee3 --- /dev/null +++ b/benchmark/tpch/sf1-parquet-remote/q13.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpch/sf1-delta/q13.benchmark +# description: Run query 13 from the TPC-H benchmark +# group: [sf1-parquet] + +template benchmark/tpch/sf1-parquet-remote/tpch_sf1_delta.benchmark.in +QUERY_NUMBER=13 +QUERY_NUMBER_PADDED=13 diff --git a/benchmark/tpch/sf1-parquet-remote/q14.benchmark b/benchmark/tpch/sf1-parquet-remote/q14.benchmark new file mode 100644 index 0000000..2ddcaa5 --- /dev/null +++ b/benchmark/tpch/sf1-parquet-remote/q14.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpch/sf1-delta/q14.benchmark +# description: Run query 14 from the TPC-H benchmark +# group: [sf1-parquet] + +template benchmark/tpch/sf1-parquet-remote/tpch_sf1_delta.benchmark.in +QUERY_NUMBER=14 +QUERY_NUMBER_PADDED=14 diff --git a/benchmark/tpch/sf1-parquet-remote/q15.benchmark b/benchmark/tpch/sf1-parquet-remote/q15.benchmark new file mode 100644 index 0000000..f7414fd --- /dev/null +++ b/benchmark/tpch/sf1-parquet-remote/q15.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpch/sf1-delta/q15.benchmark +# description: Run query 15 from the TPC-H benchmark +# group: [sf1-parquet] + +template benchmark/tpch/sf1-parquet-remote/tpch_sf1_delta.benchmark.in +QUERY_NUMBER=15 +QUERY_NUMBER_PADDED=15 diff --git a/benchmark/tpch/sf1-parquet-remote/q16.benchmark b/benchmark/tpch/sf1-parquet-remote/q16.benchmark new file mode 100644 index 0000000..4375e61 --- /dev/null +++ b/benchmark/tpch/sf1-parquet-remote/q16.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpch/sf1-delta/q16.benchmark +# description: Run query 16 from the TPC-H benchmark +# group: [sf1-parquet] + +template benchmark/tpch/sf1-parquet-remote/tpch_sf1_delta.benchmark.in +QUERY_NUMBER=16 +QUERY_NUMBER_PADDED=16 diff --git a/benchmark/tpch/sf1-parquet-remote/q17.benchmark b/benchmark/tpch/sf1-parquet-remote/q17.benchmark new file mode 100644 index 0000000..df38847 --- /dev/null +++ b/benchmark/tpch/sf1-parquet-remote/q17.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpch/sf1-delta/q17.benchmark +# description: Run query 17 from the TPC-H benchmark +# group: [sf1-parquet] + +template benchmark/tpch/sf1-parquet-remote/tpch_sf1_delta.benchmark.in +QUERY_NUMBER=17 +QUERY_NUMBER_PADDED=17 diff --git a/benchmark/tpch/sf1-parquet-remote/q18.benchmark b/benchmark/tpch/sf1-parquet-remote/q18.benchmark new file mode 100644 index 0000000..d01d822 --- /dev/null +++ b/benchmark/tpch/sf1-parquet-remote/q18.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpch/sf1-delta/q18.benchmark +# description: Run query 18 from the TPC-H benchmark +# group: [sf1-parquet] + +template benchmark/tpch/sf1-parquet-remote/tpch_sf1_delta.benchmark.in +QUERY_NUMBER=18 +QUERY_NUMBER_PADDED=18 diff --git a/benchmark/tpch/sf1-parquet-remote/q19.benchmark b/benchmark/tpch/sf1-parquet-remote/q19.benchmark new file mode 100644 index 0000000..7a5a33e --- /dev/null +++ b/benchmark/tpch/sf1-parquet-remote/q19.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpch/sf1-delta/q19.benchmark +# description: Run query 19 from the TPC-H benchmark +# group: [sf1-parquet] + +template benchmark/tpch/sf1-parquet-remote/tpch_sf1_delta.benchmark.in +QUERY_NUMBER=19 +QUERY_NUMBER_PADDED=19 diff --git a/benchmark/tpch/sf1-parquet-remote/q20.benchmark b/benchmark/tpch/sf1-parquet-remote/q20.benchmark new file mode 100644 index 0000000..e1ff9d0 --- /dev/null +++ b/benchmark/tpch/sf1-parquet-remote/q20.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpch/sf1-delta/q20.benchmark +# description: Run query 20 from the TPC-H benchmark +# group: [sf1-parquet] + +template benchmark/tpch/sf1-parquet-remote/tpch_sf1_delta.benchmark.in +QUERY_NUMBER=20 +QUERY_NUMBER_PADDED=20 diff --git a/benchmark/tpch/sf1-parquet-remote/q21.benchmark b/benchmark/tpch/sf1-parquet-remote/q21.benchmark new file mode 100644 index 0000000..af179e8 --- /dev/null +++ b/benchmark/tpch/sf1-parquet-remote/q21.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpch/sf1-delta/q21.benchmark +# description: Run query 21 from the TPC-H benchmark +# group: [sf1-parquet] + +template benchmark/tpch/sf1-parquet-remote/tpch_sf1_delta.benchmark.in +QUERY_NUMBER=21 +QUERY_NUMBER_PADDED=21 diff --git a/benchmark/tpch/sf1-parquet-remote/q22.benchmark b/benchmark/tpch/sf1-parquet-remote/q22.benchmark new file mode 100644 index 0000000..527adcf --- /dev/null +++ b/benchmark/tpch/sf1-parquet-remote/q22.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpch/sf1-delta/q22.benchmark +# description: Run query 22 from the TPC-H benchmark +# group: [sf1-parquet] + +template benchmark/tpch/sf1-parquet-remote/tpch_sf1_delta.benchmark.in +QUERY_NUMBER=22 +QUERY_NUMBER_PADDED=22 diff --git a/benchmark/tpch/sf1-parquet-remote/tpch_sf1_delta.benchmark.in b/benchmark/tpch/sf1-parquet-remote/tpch_sf1_delta.benchmark.in new file mode 100644 index 0000000..dbc4e0a --- /dev/null +++ b/benchmark/tpch/sf1-parquet-remote/tpch_sf1_delta.benchmark.in @@ -0,0 +1,19 @@ +# name: ${FILE_PATH} +# description: ${DESCRIPTION} +# group: [sf1] + +name Q${QUERY_NUMBER_PADDED} +group tpch +subgroup sf1 + +require delta + +require parquet + +require httpfs + +load benchmark/tpch/sf1-parquet-remote/load.sql + +run duckdb/extension/tpch/dbgen/queries/q${QUERY_NUMBER_PADDED}.sql + +result duckdb/extension/tpch/dbgen/answers/sf0.01/q${QUERY_NUMBER_PADDED}.csv \ No newline at end of file diff --git a/scripts/plot.py b/scripts/plot.py new file mode 100644 index 0000000..c5ea201 --- /dev/null +++ b/scripts/plot.py @@ -0,0 +1,27 @@ +import duckdb + +### Parse Query Results +parse_benchmark_result_query = """ +SELECT + parse_filename(name, true) as benchmark, + parse_filename(filename, true) as config, + avg(timing) as timing +FROM + read_csv('benchmark_results/*.csv', filename=1) +GROUP BY + config, + benchmark +ORDER BY + config, + benchmark +""" + +benchmark_results = duckdb.execute(parse_benchmark_result_query).df() + +### Plot graph +import matplotlib.pyplot as plt +import numpy as np + +plt.rcParams["figure.figsize"] = [10, 5] +fig = benchmark_results.pivot(index='benchmark', columns='config', values='timing').plot(kind='bar', title='', ylabel='runtime [s]').get_figure() +fig.savefig('benchmark_results/result.png') \ No newline at end of file From 70aec0c524b5d426b983e190ed45d9fb542aa4af Mon Sep 17 00:00:00 2001 From: Sam Ansmink Date: Wed, 24 Jul 2024 11:02:35 +0200 Subject: [PATCH 36/38] small tweaks to benchmark readme and makefile --- benchmark/README.md | 16 +++++++++++++++- benchmark/benchmark.Makefile | 1 + 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/benchmark/README.md b/benchmark/README.md index 6ff3801..edc0497 100644 --- a/benchmark/README.md +++ b/benchmark/README.md @@ -8,6 +8,11 @@ To run the benchmarks, firstly run the build using: BUILD_BENCHMARK=1 make ``` +Then, make sure that the generated data is created using: +```shell +make generate-data +``` + Then to run a benchmark, use one of the benchmark Makefile targets prefixed with `bench-run-`: ```shell make bench-run-tpch-sf1 @@ -19,9 +24,18 @@ To create a plot from the results run: make plot ``` -## Configurations options +## More options Specific benchmarks can be run from a suite using the `BENCHMARK_PATTERN` variable. For example to compare only Q01 from TPCH SF1, run: ```shell BENCHMARK_PATTERN=q01.benchmark make bench-run-tpch-sf1 +``` + +Also, we can run all local benchmarks using: +```shell +make bench-run-all-local +``` +Or all remote benchmarks using +```shell +make bench-run-all-remote ``` \ No newline at end of file diff --git a/benchmark/benchmark.Makefile b/benchmark/benchmark.Makefile index 3b88eb7..322d9eb 100644 --- a/benchmark/benchmark.Makefile +++ b/benchmark/benchmark.Makefile @@ -63,3 +63,4 @@ bench-run-tpcds-sf1: bench-run-tpcds-sf1-delta bench-run-tpcds-sf1-parquet ### bench-run-all-local: bench-run-tpcds-sf1 bench-run-tpch-sf1 +bench-run-all-remote: bench-run-tpch-sf1-remote From 628c5ad73ff3c430c9972526fad05930a56e5af4 Mon Sep 17 00:00:00 2001 From: Sam Ansmink Date: Wed, 24 Jul 2024 14:06:52 +0200 Subject: [PATCH 37/38] fix accidentally borking `make` --- Makefile | 6 +++--- benchmark/benchmark.Makefile | 2 ++ 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/Makefile b/Makefile index defa0b6..2ff6867 100644 --- a/Makefile +++ b/Makefile @@ -11,12 +11,12 @@ test_release: export DAT_PATH=./build/release/rust/src/delta_kernel/acceptance/t test_debug: export DELTA_KERNEL_TESTS_PATH=./build/debug/rust/src/delta_kernel/kernel/tests/data test_debug: export DAT_PATH=./build/debug/rust/src/delta_kernel/acceptance/tests/dat -# Include the Makefile from the benchmark directory -include benchmark/benchmark.Makefile - # Include the Makefile from extension-ci-tools include extension-ci-tools/makefiles/duckdb_extension.Makefile +# Include the Makefile from the benchmark directory +include benchmark/benchmark.Makefile + # Generate some test data to test with generate-data: python3 -m pip install delta-spark duckdb pandas deltalake pyspark delta diff --git a/benchmark/benchmark.Makefile b/benchmark/benchmark.Makefile index 322d9eb..2852b03 100644 --- a/benchmark/benchmark.Makefile +++ b/benchmark/benchmark.Makefile @@ -1,3 +1,5 @@ +.PHONY: bench-output-dir clean_benchmark plot + # Set this flag during building to enable the benchmark runner ifeq (${BUILD_BENCHMARK}, 1) TOOLCHAIN_FLAGS:=${TOOLCHAIN_FLAGS} -DBUILD_BENCHMARKS=1 From cfdba36d098c66fab2cf647aa2f7ac6be86b33db Mon Sep 17 00:00:00 2001 From: Sam Ansmink Date: Wed, 24 Jul 2024 17:31:34 +0200 Subject: [PATCH 38/38] small fix to build_benchmark --- Makefile | 5 +++++ benchmark/benchmark.Makefile | 28 +++++++++++++++------------- 2 files changed, 20 insertions(+), 13 deletions(-) diff --git a/Makefile b/Makefile index 2ff6867..add6fe5 100644 --- a/Makefile +++ b/Makefile @@ -11,6 +11,11 @@ test_release: export DAT_PATH=./build/release/rust/src/delta_kernel/acceptance/t test_debug: export DELTA_KERNEL_TESTS_PATH=./build/debug/rust/src/delta_kernel/kernel/tests/data test_debug: export DAT_PATH=./build/debug/rust/src/delta_kernel/acceptance/tests/dat +# Set this flag during building to enable the benchmark runner +ifeq (${BUILD_BENCHMARK}, 1) + TOOLCHAIN_FLAGS:=${TOOLCHAIN_FLAGS} -DBUILD_BENCHMARKS=1 +endif + # Include the Makefile from extension-ci-tools include extension-ci-tools/makefiles/duckdb_extension.Makefile diff --git a/benchmark/benchmark.Makefile b/benchmark/benchmark.Makefile index 2852b03..b3f4202 100644 --- a/benchmark/benchmark.Makefile +++ b/benchmark/benchmark.Makefile @@ -1,12 +1,7 @@ .PHONY: bench-output-dir clean_benchmark plot -# Set this flag during building to enable the benchmark runner -ifeq (${BUILD_BENCHMARK}, 1) - TOOLCHAIN_FLAGS:=${TOOLCHAIN_FLAGS} -DBUILD_BENCHMARKS=1 -endif - -ifeq ("${BENCHMARK_PATTERN}", "") - BENCHMARK_PATTERN:=.* +ifeq ("$(BENCHMARK_PATTERN)a", "a") + BENCHMARK_PATTERN:=.* endif bench-output-dir: @@ -27,10 +22,13 @@ plot: # TPCH SF1 on delta table bench-run-tpch-sf1-delta: bench-output-dir - ./build/release/benchmark/benchmark_runner --root-dir './' 'benchmark/tpch/sf1-delta/${BENCHMARK_PATTERN}' &> benchmark_results/tpch-sf1-delta.csv + ./build/release/benchmark/benchmark_runner --root-dir './' 'benchmark/tpch/sf1-delta/$(BENCHMARK_PATTERN)' &> benchmark_results/tpch-sf1-delta.csv # TPCH SF1 on parquet files bench-run-tpch-sf1-parquet: bench-output-dir - ./build/release/benchmark/benchmark_runner 'benchmark/tpch/sf1-parquet/${BENCHMARK_PATTERN}' &> benchmark_results/tpch-sf1-parquet.csv + ./build/release/benchmark/benchmark_runner 'benchmark/tpch/sf1-parquet/$(BENCHMARK_PATTERN)' &> benchmark_results/tpch-sf1-parquet.csv +# TPCH SF1 on duckdb file +bench-run-tpch-sf1-duckdb: bench-output-dir + ./build/release/benchmark/benchmark_runner 'benchmark/tpch/sf1/$(BENCHMARK_PATTERN)' &> benchmark_results/tpch-sf1-duckdb.csv # COMPARES TPCH SF1 on parquet file vs on delta files bench-run-tpch-sf1: bench-run-tpch-sf1-delta bench-run-tpch-sf1-parquet @@ -40,10 +38,10 @@ bench-run-tpch-sf1: bench-run-tpch-sf1-delta bench-run-tpch-sf1-parquet # TPCH on remote delta table (set BENCHMARK_DATA_S3_LINEITEM_SF1) bench-run-tpch-sf1-remote-delta: bench-output-dir - ./build/release/benchmark/benchmark_runner --root-dir './' 'benchmark/tpch/sf1-delta-remote/${BENCHMARK_PATTERN}' &> benchmark_results/tpch-sf1-remote-delta.csv + ./build/release/benchmark/benchmark_runner --root-dir './' 'benchmark/tpch/sf1-delta-remote/$(BENCHMARK_PATTERN)' &> benchmark_results/tpch-sf1-remote-delta.csv # TPCH on remote parquet table (set BENCHMARK_DATA_S3_LINEITEM_SF1) bench-run-tpch-sf1-remote-parquet: bench-output-dir - ./build/release/benchmark/benchmark_runner --root-dir './' 'benchmark/tpch/sf1-parquet-remote/${BENCHMARK_PATTERN}' &> benchmark_results/tpch-sf1-remote-parquet.csv + ./build/release/benchmark/benchmark_runner --root-dir './' 'benchmark/tpch/sf1-parquet-remote/$(BENCHMARK_PATTERN)' &> benchmark_results/tpch-sf1-remote-parquet.csv # COMPARES TPCH SF1 on parquet file vs on delta files bench-run-tpch-sf1-remote: bench-run-tpch-sf1-remote-parquet bench-run-tpch-sf1-remote-delta @@ -53,10 +51,14 @@ bench-run-tpch-sf1-remote: bench-run-tpch-sf1-remote-parquet bench-run-tpch-sf1- # TPCDS SF1 on delta table bench-run-tpcds-sf1-delta: bench-output-dir - ./build/release/benchmark/benchmark_runner --root-dir './' 'benchmark/tpcds/sf1-delta/${BENCHMARK_PATTERN}' &> benchmark_results/tpcds-sf1-delta.csv + ./build/release/benchmark/benchmark_runner --root-dir './' 'benchmark/tpcds/sf1-delta/$(BENCHMARK_PATTERN)' &> benchmark_results/tpcds-sf1-delta.csv # TPCDS SF1 on parquet files bench-run-tpcds-sf1-parquet: bench-output-dir - ./build/release/benchmark/benchmark_runner --root-dir './' 'benchmark/tpcds/sf1-parquet/${BENCHMARK_PATTERN}' &> benchmark_results/tpcds-sf1-parquet.csv + ./build/release/benchmark/benchmark_runner --root-dir './' 'benchmark/tpcds/sf1-parquet/$(BENCHMARK_PATTERN)' &> benchmark_results/tpcds-sf1-parquet.csv +# TPCDS SF1 on duckdb files +bench-run-tpcds-sf1-duckdb: bench-output-dir + ./build/release/benchmark/benchmark_runner 'benchmark/tpcds/sf1/$(BENCHMARK_PATTERN)' &> benchmark_results/tpcds-sf1-duckdb.csv + # COMPARES TPCDS SF1 on parquet file vs on delta files bench-run-tpcds-sf1: bench-run-tpcds-sf1-delta bench-run-tpcds-sf1-parquet