diff --git a/.github/workflows/CloudTesting.yml b/.github/workflows/CloudTesting.yml new file mode 100644 index 0000000..f75a37d --- /dev/null +++ b/.github/workflows/CloudTesting.yml @@ -0,0 +1,80 @@ +name: Cloud functional tests +on: [push, repository_dispatch] +concurrency: + group: ${{ github.workflow }}-${{ github.ref }}-${{ github.head_ref || '' }}-${{ github.base_ref || '' }}-${{ github.ref != 'refs/heads/main' || github.sha }} + cancel-in-progress: true +defaults: + run: + shell: bash + +jobs: + azure-tests-linux: + name: Azure tests (Linux) + runs-on: ubuntu-latest + env: + VCPKG_TARGET_TRIPLET: x64-linux + VCPKG_TOOLCHAIN_PATH: ${{ github.workspace }}/vcpkg/scripts/buildsystems/vcpkg.cmake + GEN: Ninja + DUCKDB_PLATFORM: linux_amd64 + + steps: + - name: Install required ubuntu packages + run: | + sudo apt-get update -y -qq + sudo apt-get install -y -qq software-properties-common + sudo add-apt-repository ppa:git-core/ppa + sudo apt-get update -y -qq + sudo apt-get install -y -qq ninja-build make gcc-multilib g++-multilib zip unzip build-essential checkinstall curl libz-dev openssh-client + + - uses: actions/checkout@v3 + with: + fetch-depth: 0 + submodules: 'true' + + - name: Setup Ccache + uses: hendrikmuhs/ccache-action@v1.2.11 # Note: pinned due to GLIBC incompatibility in later releases + with: + key: ${{ github.job }}-${{ matrix.duckdb_arch }} + + - name: Setup vcpkg + uses: lukka/run-vcpkg@v11.1 + with: + vcpkgGitCommitId: a1a1cbc975abf909a6c8985a6a2b8fe20bbd9bd6 + + - name: Setup Rust + uses: dtolnay/rust-toolchain@stable + + - name: Build extension + env: + GEN: ninja + run: | + make release + + - name: Test with Service Principal (SPN) in env vars + env: + AZURE_CLIENT_ID: ${{secrets.AZURE_CLIENT_ID}} + AZURE_CLIENT_SECRET: ${{secrets.AZURE_CLIENT_SECRET}} + AZURE_TENANT_ID: ${{secrets.AZURE_TENANT_ID}} + AZURE_STORAGE_ACCOUNT: ${{secrets.AZURE_STORAGE_ACCOUNT}} + run: | + python3 duckdb/scripts/run_tests_one_by_one.py ./build/release/test/unittest "*test/sql/cloud/*" + + - name: Test with SPN logged in in azure-cli + env: + AZURE_STORAGE_ACCOUNT: ${{secrets.AZURE_STORAGE_ACCOUNT}} + DUCKDB_AZ_CLI_LOGGED_IN: 1 + run: | + az login --service-principal -u ${{secrets.AZURE_CLIENT_ID}} -p ${{secrets.AZURE_CLIENT_SECRET}} --tenant ${{secrets.AZURE_TENANT_ID}} + python3 duckdb/scripts/run_tests_one_by_one.py ./build/release/test/unittest "*test/sql/cloud/*" + + - name: Log out azure-cli + if: always() + run: | + az logout + + - name: Tests that focus on public non-authenticated requests + env: + AZURE_STORAGE_ACCOUNT: ${{secrets.AZURE_STORAGE_ACCOUNT}} + DUCKDB_AZURE_PUBLIC_CONTAINER_AVAILABLE: 1 + run: | + python3 duckdb/scripts/run_tests_one_by_one.py ./build/release/test/unittest "*test/sql/cloud/*" \ No newline at end of file diff --git a/.github/workflows/GeneratedTests.yml b/.github/workflows/GeneratedTests.yml deleted file mode 100644 index bd106a5..0000000 --- a/.github/workflows/GeneratedTests.yml +++ /dev/null @@ -1,53 +0,0 @@ -# -# This workflow calls the main distribution pipeline from DuckDB to build, test and (optionally) release the extension -# -name: GeneratedTests -on: - push: - pull_request: - workflow_dispatch: - -concurrency: - group: ${{ github.workflow }}-${{ github.ref }}-${{ github.head_ref || '' }}-${{ github.base_ref || '' }}-${{ github.ref != 'refs/heads/main' || github.sha }} - cancel-in-progress: true - -jobs: - generated-tests-linux: - name: Generated Tests (Linux) - runs-on: ubuntu-latest - env: - GEN: ninja - VCPKG_TOOLCHAIN_PATH: ${{ github.workspace }}/vcpkg/scripts/buildsystems/vcpkg.cmake - - steps: - - uses: actions/checkout@v3 - with: - fetch-depth: 0 - submodules: 'true' - - - name: Install - shell: bash - run: sudo apt-get update -y -qq && sudo apt-get install -y -qq ninja-build - - - name: Setup Ccache - uses: hendrikmuhs/ccache-action@main - with: - key: ${{ github.job }} - - - uses: actions/setup-python@v5 - with: - python-version: '3.11' - - - name: Setup vcpkg - uses: lukka/run-vcpkg@v11.1 - with: - vcpkgGitCommitId: a1a1cbc975abf909a6c8985a6a2b8fe20bbd9bd6 - - - name: Build - shell: bash - run: make generate-data - - - name: Test - shell: bash - run: | - GENERATED_DATA_AVAILABLE=1 make test \ No newline at end of file diff --git a/.github/workflows/LocalTesting.yml b/.github/workflows/LocalTesting.yml new file mode 100644 index 0000000..ecdc23c --- /dev/null +++ b/.github/workflows/LocalTesting.yml @@ -0,0 +1,117 @@ +name: Local functional tests +on: [push, pull_request,repository_dispatch] +concurrency: + group: ${{ github.workflow }}-${{ github.ref }}-${{ github.head_ref || '' }}-${{ github.base_ref || '' }}-${{ github.ref != 'refs/heads/main' || github.sha }} + cancel-in-progress: true +defaults: + run: + shell: bash + +jobs: + azurite-tests-linux: + name: Azurite (local azure test server) tests (Linux) + runs-on: ubuntu-latest + container: 'quay.io/pypa/manylinux2014_x86_64' + env: + VCPKG_TARGET_TRIPLET: 'x64-linux' + GEN: Ninja + VCPKG_TOOLCHAIN_PATH: ${{ github.workspace }}/vcpkg/scripts/buildsystems/vcpkg.cmake + AZURE_STORAGE_CONNECTION_STRING: 'DefaultEndpointsProtocol=http;AccountName=devstoreaccount1;AccountKey=Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==;BlobEndpoint=http://127.0.0.1:10000/devstoreaccount1;QueueEndpoint=http://127.0.0.1:10001/devstoreaccount1;TableEndpoint=http://127.0.0.1:10002/devstoreaccount1;' + AZURE_STORAGE_ACCOUNT: devstoreaccount1 + + steps: + - uses: actions/checkout@v3 + with: + fetch-depth: 0 + submodules: 'true' + + - name: install Azure test service + run: | + yum install -y nodejs npm + npm install -g azurite + echo -e "[azure-cli]\nname=Azure CLI\nbaseurl=https://packages.microsoft.com/yumrepos/azure-cli\nenabled=1\ngpgcheck=1\ngpgkey=https://packages.microsoft.com/keys/microsoft.asc" | tee /etc/yum.repos.d/azure-cli.repo + yum install -y azure-cli + + - name: Setup ManyLinux2014 + run: | + ./duckdb/scripts/setup_manylinux2014.sh general aws-cli ccache ssh python_alias openssl + + - name: Setup Rust for manylinux (dtolnay/rust-toolchain doesn't work due to curl being old here) + run: | + curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y + echo "$HOME/.cargo/bin" >> $GITHUB_PATH + + - name: Setup vcpkg + uses: lukka/run-vcpkg@v11.1 + with: + vcpkgGitCommitId: a1a1cbc975abf909a6c8985a6a2b8fe20bbd9bd6 + + - name: Handle OpenSSL dependency for rust build + run: | + echo "OPENSSL_ROOT_DIR=`pwd`/build/release/vcpkg_installed/x64-linux" >> $GITHUB_ENV + echo "OPENSSL_DIR=`pwd`/build/release/vcpkg_installed/x64-linux" >> $GITHUB_ENV + echo "OPENSSL_USE_STATIC_LIBS=true" >> $GITHUB_ENV + + # Build extension + - name: Build extension + env: + GEN: ninja + run: | + make release + + - name: Launch & populate Azure test service + run: | + azurite > azurite_log.txt 2>&1 & + sleep 10 + ./scripts/upload_test_files_to_azurite.sh + + - name: Test extension + run: | + make test + + - name: Azure test server log + if: always() + shell: bash + run: | + echo "## azurite" + cat azurite_log.txt + + generated-tests-linux: + name: Generated Tests (Linux) + runs-on: ubuntu-latest + env: + GEN: ninja + VCPKG_TOOLCHAIN_PATH: ${{ github.workspace }}/vcpkg/scripts/buildsystems/vcpkg.cmake + + steps: + - uses: actions/checkout@v3 + with: + fetch-depth: 0 + submodules: 'true' + + - name: Install + shell: bash + run: sudo apt-get update -y -qq && sudo apt-get install -y -qq ninja-build + + - name: Setup Ccache + uses: hendrikmuhs/ccache-action@main + with: + key: ${{ github.job }} + + - uses: actions/setup-python@v5 + with: + python-version: '3.11' + + - name: Setup vcpkg + uses: lukka/run-vcpkg@v11.1 + with: + vcpkgGitCommitId: a1a1cbc975abf909a6c8985a6a2b8fe20bbd9bd6 + + - name: Build + shell: bash + run: make generate-data + + - name: Test + shell: bash + run: | + GENERATED_DATA_AVAILABLE=1 make test \ No newline at end of file diff --git a/.gitignore b/.gitignore index 2cf38b5..31bc287 100644 --- a/.gitignore +++ b/.gitignore @@ -7,3 +7,7 @@ testext test/python/__pycache__/ .Rhistory data/generated +__azurite*__.json +__blobstorage__ +.venv +.vscode \ No newline at end of file diff --git a/.gitmodules b/.gitmodules index a55d71e..cd15846 100644 --- a/.gitmodules +++ b/.gitmodules @@ -5,4 +5,4 @@ [submodule "extension-ci-tools"] path = extension-ci-tools url = git@github.com:duckdb/extension-ci-tools.git - branch = main + branch = main \ No newline at end of file diff --git a/duckdb b/duckdb index 1f98600..7b8efd3 160000 --- a/duckdb +++ b/duckdb @@ -1 +1 @@ -Subproject commit 1f98600c2cf8722a6d2f2d805bb4af5e701319fc +Subproject commit 7b8efd3d0fab38ec9dae467861a317af3f1d7f3e diff --git a/extension-ci-tools b/extension-ci-tools index c0cc931..71b8a60 160000 --- a/extension-ci-tools +++ b/extension-ci-tools @@ -1 +1 @@ -Subproject commit c0cc9319492bfa38344c2f28bd35f2304c74cdde +Subproject commit 71b8a603ea24b1ac8a2cff134aca28163576548f diff --git a/extension_config.cmake b/extension_config.cmake index 46e7a27..16571c2 100644 --- a/extension_config.cmake +++ b/extension_config.cmake @@ -9,6 +9,13 @@ duckdb_extension_load(delta # Build the httpfs extension to test with s3/http duckdb_extension_load(httpfs) +# Build the azure extension to test with azure +duckdb_extension_load(azure + LOAD_TESTS + GIT_URL https://github.com/duckdb/duckdb_azure + GIT_TAG 49b63dc8cd166952a0a34dfd54e6cfe5b823e05e +) + # Build the tpch and tpcds extension for testing/benchmarking duckdb_extension_load(tpch) duckdb_extension_load(tpcds) diff --git a/scripts/upload_test_files_to_azurite.sh b/scripts/upload_test_files_to_azurite.sh new file mode 100755 index 0000000..f3631ba --- /dev/null +++ b/scripts/upload_test_files_to_azurite.sh @@ -0,0 +1,21 @@ +#!/bin/bash + +# Default Azurite connection string (see: https://github.com/Azure/Azurite) +conn_string="DefaultEndpointsProtocol=http;AccountName=devstoreaccount1;AccountKey=Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==;BlobEndpoint=http://127.0.0.1:10000/devstoreaccount1;QueueEndpoint=http://127.0.0.1:10001/devstoreaccount1;TableEndpoint=http://127.0.0.1:10002/devstoreaccount1;" + +# Create container +az storage container create -n delta-testing-private --connection-string "${conn_string}" +az storage container create -n delta-testing-public --connection-string "${conn_string}" --public-access blob + +copy_file() { + local from="${1}" + local to="${2}" + az storage blob upload --file "${from}" --name "${to}" --container-name "delta-testing-private" --connection-string "${conn_string}" + az storage blob upload --file "${from}" --name "${to}" --container-name "delta-testing-public" --connection-string "${conn_string}" +} + +cd ./build/release/rust/src/delta_kernel/acceptance/tests/dat/out/reader_tests/generated && +while read filepath; do + remote_filepath=dat/"$(echo "${filepath}" | cut -c 3-)" + copy_file "${filepath}" "${remote_filepath}" +done < <(find . -type f) \ No newline at end of file diff --git a/src/functions/delta_scan.cpp b/src/functions/delta_scan.cpp index ed968a2..3929c57 100644 --- a/src/functions/delta_scan.cpp +++ b/src/functions/delta_scan.cpp @@ -18,6 +18,7 @@ #include #include +#include namespace duckdb { @@ -65,31 +66,86 @@ static void visit_callback(ffi::NullableCvoid engine_context, struct ffi::Kernel ffi::visit_scan_data(engine_data, selection_vec, engine_context, visit_callback); } +string ParseAccountNameFromEndpoint(const string& endpoint) { + if (!StringUtil::StartsWith(endpoint, "https://")) { + return ""; + } + auto result = endpoint.find('.', 8); + if (result == endpoint.npos) { + return ""; + } + return endpoint.substr(8,result-8); +} + +string parseFromConnectionString(const string& connectionString, const string& key) { + std::regex pattern(key + "=([^;]+)(?=;|$)"); + std::smatch matches; + if (std::regex_search(connectionString, matches, pattern) && matches.size() > 1) { + // The second match ([1]) contains the access key + return matches[1].str(); + } + return ""; +} + static ffi::EngineBuilder* CreateBuilder(ClientContext &context, const string &path) { ffi::EngineBuilder* builder; // For "regular" paths we early out with the default builder config - if (!StringUtil::StartsWith(path, "s3://")) { + if (!StringUtil::StartsWith(path, "s3://") && !StringUtil::StartsWith(path, "azure://") && !StringUtil::StartsWith(path, "az://") && !StringUtil::StartsWith(path, "abfs://") && !StringUtil::StartsWith(path, "abfss://")) { auto interface_builder_res = ffi::get_engine_builder(KernelUtils::ToDeltaString(path), DuckDBEngineError::AllocateError); return KernelUtils::UnpackResult(interface_builder_res, "get_engine_interface_builder for path " + path); } - auto end_of_container = path.find('/',5); + string bucket; + string path_in_bucket; + string secret_type; + + if (StringUtil::StartsWith(path, "s3://")) { + auto end_of_container = path.find('/',5); - if(end_of_container == string::npos) { - throw IOException("Invalid s3 url passed to delta scan: %s", path); + if(end_of_container == string::npos) { + throw IOException("Invalid s3 url passed to delta scan: %s", path); + } + bucket = path.substr(5, end_of_container-5); + path_in_bucket = path.substr(end_of_container); + secret_type = "s3"; + } else if ((StringUtil::StartsWith(path, "azure://")) || (StringUtil::StartsWith(path, "abfss://"))) { + auto end_of_container = path.find('/',8); + + if(end_of_container == string::npos) { + throw IOException("Invalid azure url passed to delta scan: %s", path); + } + bucket = path.substr(8, end_of_container-8); + path_in_bucket = path.substr(end_of_container); + secret_type = "azure"; + } else if (StringUtil::StartsWith(path, "az://")) { + auto end_of_container = path.find('/',5); + + if(end_of_container == string::npos) { + throw IOException("Invalid azure url passed to delta scan: %s", path); + } + bucket = path.substr(5, end_of_container-5); + path_in_bucket = path.substr(end_of_container); + secret_type = "azure"; + } else if (StringUtil::StartsWith(path, "abfs://")) { + auto end_of_container = path.find('/',7); + + if(end_of_container == string::npos) { + throw IOException("Invalid azure url passed to delta scan: %s", path); + } + bucket = path.substr(8, end_of_container-8); + path_in_bucket = path.substr(end_of_container); + secret_type = "azure"; } - auto bucket = path.substr(5, end_of_container-5); - auto path_in_bucket = path.substr(end_of_container); auto interface_builder_res = ffi::get_engine_builder(KernelUtils::ToDeltaString(path), DuckDBEngineError::AllocateError); builder = KernelUtils::UnpackResult(interface_builder_res, "get_engine_interface_builder for path " + path); - // For S3 paths we need to trim the url, set the container, and fetch a potential secret + // For S3 or Azure paths we need to trim the url, set the container, and fetch a potential secret auto &secret_manager = SecretManager::Get(context); auto transaction = CatalogTransaction::GetSystemCatalogTransaction(context); - auto secret_match = secret_manager.LookupSecret(transaction, path, "s3"); + auto secret_match = secret_manager.LookupSecret(transaction, path, secret_type); // No secret: nothing left to do here! if (!secret_match.HasMatch()) { @@ -97,26 +153,98 @@ static ffi::EngineBuilder* CreateBuilder(ClientContext &context, const string &p } const auto &kv_secret = dynamic_cast(*secret_match.secret_entry->secret); - auto key_id = kv_secret.TryGetValue("key_id").ToString(); - auto secret = kv_secret.TryGetValue("secret").ToString(); - auto session_token = kv_secret.TryGetValue("session_token").ToString(); - auto region = kv_secret.TryGetValue("region").ToString(); - if (key_id.empty() && secret.empty()) { - ffi::set_builder_option(builder, KernelUtils::ToDeltaString("skip_signature"), KernelUtils::ToDeltaString("true")); - } + // Here you would need to add the logic for setting the builder options for Azure + // This is just a placeholder and will need to be replaced with the actual logic + if (secret_type == "s3") { + auto key_id = kv_secret.TryGetValue("key_id").ToString(); + auto secret = kv_secret.TryGetValue("secret").ToString(); + auto session_token = kv_secret.TryGetValue("session_token").ToString(); + auto region = kv_secret.TryGetValue("region").ToString(); - if (!key_id.empty()) { - ffi::set_builder_option(builder, KernelUtils::ToDeltaString("aws_access_key_id"), KernelUtils::ToDeltaString(key_id)); - } - if (!secret.empty()) { - ffi::set_builder_option(builder, KernelUtils::ToDeltaString("aws_secret_access_key"), KernelUtils::ToDeltaString(secret)); - } - if (!session_token.empty()) { - ffi::set_builder_option(builder, KernelUtils::ToDeltaString("aws_session_token"), KernelUtils::ToDeltaString(session_token)); - } - ffi::set_builder_option(builder, KernelUtils::ToDeltaString("aws_region"), KernelUtils::ToDeltaString(region)); + if (key_id.empty() && secret.empty()) { + ffi::set_builder_option(builder, KernelUtils::ToDeltaString("skip_signature"), KernelUtils::ToDeltaString("true")); + } + if (!key_id.empty()) { + ffi::set_builder_option(builder, KernelUtils::ToDeltaString("aws_access_key_id"), KernelUtils::ToDeltaString(key_id)); + } + if (!secret.empty()) { + ffi::set_builder_option(builder, KernelUtils::ToDeltaString("aws_secret_access_key"), KernelUtils::ToDeltaString(secret)); + } + if (!session_token.empty()) { + ffi::set_builder_option(builder, KernelUtils::ToDeltaString("aws_session_token"), KernelUtils::ToDeltaString(session_token)); + } + ffi::set_builder_option(builder, KernelUtils::ToDeltaString("aws_region"), KernelUtils::ToDeltaString(region)); + + } else if (secret_type == "azure") { + // azure seems to be super complicated as we need to cover duckdb azure plugin and delta RS builder + // and both require different settings + auto connection_string = kv_secret.TryGetValue("connection_string").ToString(); + auto account_name = kv_secret.TryGetValue("account_name").ToString(); + auto endpoint = kv_secret.TryGetValue("endpoint").ToString(); + auto client_id = kv_secret.TryGetValue("client_id").ToString(); + auto client_secret = kv_secret.TryGetValue("client_secret").ToString(); + auto tenant_id = kv_secret.TryGetValue("tenant_id").ToString(); + auto chain = kv_secret.TryGetValue("chain").ToString(); + auto provider = kv_secret.GetProvider(); + + if (provider == "credential_chain") { + // Authentication option 1a: using the cli authentication + if (chain.find("cli") != std::string::npos) { + ffi::set_builder_option(builder, KernelUtils::ToDeltaString("use_azure_cli"), KernelUtils::ToDeltaString("true")); + } + // Authentication option 1b: non-cli credential chains will just "hope for the best" technically since we are using the default + // credential chain provider duckDB and delta-kernel-rs should find the same auth + } else if (!connection_string.empty() && connection_string != "NULL") { + + // Authentication option 2: a connection string based on account key + auto account_key = parseFromConnectionString(connection_string, "AccountKey"); + account_name = parseFromConnectionString(connection_string, "AccountName"); + // Authentication option 2: a connection string based on account key + if (!account_name.empty() && !account_key.empty()) { + ffi::set_builder_option(builder, KernelUtils::ToDeltaString("account_key"), + KernelUtils::ToDeltaString(account_key)); + } else { + // Authentication option 2b: a connection string based on SAS token + endpoint = parseFromConnectionString(connection_string, "BlobEndpoint"); + if (account_name.empty()) { + account_name = ParseAccountNameFromEndpoint(endpoint); + } + auto sas_token = parseFromConnectionString(connection_string, "SharedAccessSignature"); + if (!sas_token.empty()) { + ffi::set_builder_option(builder, KernelUtils::ToDeltaString("sas_token"), + KernelUtils::ToDeltaString(sas_token)); + } + } + } else if (provider == "service_principal") { + if (!client_id.empty() && client_id != "NULL") { + ffi::set_builder_option(builder, KernelUtils::ToDeltaString("azure_client_id"), KernelUtils::ToDeltaString(client_id)); + } + if (!client_secret.empty() && client_secret != "NULL") { + ffi::set_builder_option(builder, KernelUtils::ToDeltaString("azure_client_secret"), KernelUtils::ToDeltaString(client_secret)); + } + if (!tenant_id.empty() && tenant_id != "NULL") { + ffi::set_builder_option(builder, KernelUtils::ToDeltaString("azure_tenant_id"), KernelUtils::ToDeltaString(tenant_id)); + } + } else { + // Authentication option 3: no authentication, just an account name + ffi::set_builder_option(builder, KernelUtils::ToDeltaString("azure_skip_signature"), KernelUtils::ToDeltaString("true")); + } + // Set the use_emulator option for when the azurite test server is used + if (account_name == "devstoreaccount1" || connection_string.find("devstoreaccount1") != string::npos) { + ffi::set_builder_option(builder, KernelUtils::ToDeltaString("use_emulator"), KernelUtils::ToDeltaString("true")); + } + if (!account_name.empty() && account_name != "NULL") { + ffi::set_builder_option(builder, KernelUtils::ToDeltaString("account_name"), KernelUtils::ToDeltaString(account_name)); //needed for delta RS builder + } + if (!endpoint.empty() && endpoint != "NULL") { + ffi::set_builder_option(builder, KernelUtils::ToDeltaString("azure_endpoint"), KernelUtils::ToDeltaString(endpoint)); + } else { + ffi::set_builder_option(builder, KernelUtils::ToDeltaString("azure_endpoint"), KernelUtils::ToDeltaString("https://" + account_name + ".blob.core.windows.net/")); + } + ffi::set_builder_option(builder, KernelUtils::ToDeltaString("container_name"), KernelUtils::ToDeltaString(bucket)); + } return builder; } diff --git a/test/sql/cloud/azure/cli_auth.test b/test/sql/cloud/azure/cli_auth.test new file mode 100644 index 0000000..fffa36a --- /dev/null +++ b/test/sql/cloud/azure/cli_auth.test @@ -0,0 +1,37 @@ +# name: test/sql/cloud/basic.test +# description: confirm queried data is correct +# group: [azure] + +require azure + +require parquet + +require delta + +require-env DUCKDB_AZ_CLI_LOGGED_IN + +require-env AZURE_STORAGE_ACCOUNT + +statement ok +set allow_persistent_secrets=false + +statement ok +CREATE SECRET az1 ( + TYPE AZURE, + PROVIDER CREDENTIAL_CHAIN, + CHAIN 'cli', + ACCOUNT_NAME '${AZURE_STORAGE_ACCOUNT}' +) + +mode output_result + +# Run a remote DAT test +query I rowsort all_primitive_types +SELECT * +FROM delta_scan('azure://delta-testing-private/dat/all_primitive_types/delta') +---- + +query I rowsort all_primitive_types +SELECT * +FROM parquet_scan('azure://delta-testing-private/dat/all_primitive_types/expected/latest/**/*.parquet') +---- diff --git a/test/sql/cloud/azure/hierarchical_namespace.test b/test/sql/cloud/azure/hierarchical_namespace.test new file mode 100644 index 0000000..470a325 --- /dev/null +++ b/test/sql/cloud/azure/hierarchical_namespace.test @@ -0,0 +1,42 @@ +# name: test/sql/hierarchical_namespace.test +# description: test azure extension with ADLS GEN2 storage +# group: [azure] + +# Require statement will ensure this test is run with this extension loaded +require azure + +require parquet + +require delta + +require-env AZURE_TENANT_ID + +require-env AZURE_CLIENT_ID + +require-env AZURE_CLIENT_SECRET + +require-env AZURE_STORAGE_ACCOUNT + +statement ok +set allow_persistent_secrets=false + +statement ok +CREATE SECRET spn ( + TYPE AZURE, + PROVIDER SERVICE_PRINCIPAL, + TENANT_ID '${AZURE_TENANT_ID}', + CLIENT_ID '${AZURE_CLIENT_ID}', + CLIENT_SECRET '${AZURE_CLIENT_SECRET}', + ACCOUNT_NAME '${AZURE_STORAGE_ACCOUNT}' +); + +# Run a remote DAT test on abfss +query I +SELECT int32 +FROM delta_scan('abfss://delta-testing-private/dat/all_primitive_types/delta') +---- +0 +1 +2 +3 +4 diff --git a/test/sql/cloud/azure/spn_auth.test b/test/sql/cloud/azure/spn_auth.test new file mode 100644 index 0000000..11ed035 --- /dev/null +++ b/test/sql/cloud/azure/spn_auth.test @@ -0,0 +1,38 @@ +# name: test/sql/cloud/spn_auth.test +# description: test azure extension with service principal authentication +# group: [azure] + +require azure + +require parquet + +require delta + +require-env AZURE_CLIENT_ID + +require-env AZURE_CLIENT_SECRET + +require-env AZURE_TENANT_ID + +require-env AZURE_STORAGE_ACCOUNT + +statement ok +CREATE SECRET spn ( + TYPE AZURE, + PROVIDER SERVICE_PRINCIPAL, + TENANT_ID '${AZURE_TENANT_ID}', + CLIENT_ID '${AZURE_CLIENT_ID}', + CLIENT_SECRET '${AZURE_CLIENT_SECRET}', + ACCOUNT_NAME '${AZURE_STORAGE_ACCOUNT}' +); + +# Run a remote DAT test +query I rowsort all_primitive_types +SELECT * +FROM delta_scan('azure://delta-testing-private/dat/all_primitive_types/delta') +---- + +query I rowsort all_primitive_types +SELECT * +FROM parquet_scan('azure://delta-testing-private/dat/all_primitive_types/expected/latest/**/*.parquet') +---- diff --git a/test/sql/cloud/azure/unauthenticated.test b/test/sql/cloud/azure/unauthenticated.test new file mode 100644 index 0000000..84c1f5f --- /dev/null +++ b/test/sql/cloud/azure/unauthenticated.test @@ -0,0 +1,47 @@ +# name: test/sql/cloud/unauthenticated.test +# description: test unauthenticated queries +# group: [azure] + +require azure + +require parquet + +require delta + +require-env DUCKDB_AZURE_PUBLIC_CONTAINER_AVAILABLE + +require-env AZURE_STORAGE_ACCOUNT + +statement ok +set allow_persistent_secrets=false + +# TODO: this doesn't work yet +mode skip + +query I +SELECT int32 +FROM delta_scan('azure://${AZURE_STORAGE_ACCOUNT}.blob.core.windows.net/dat/all_primitive_types/delta') +---- +0 +1 +2 +3 +4 + +mode unskip + +# Using a secret to set the account name, we can omit the fully qualified url +statement ok +CREATE SECRET s1 (TYPE AZURE, ACCOUNT_NAME '${AZURE_STORAGE_ACCOUNT}') + +query I +SELECT int32 +FROM delta_scan('azure://delta-testing-public/dat/all_primitive_types/delta') +---- +0 +1 +2 +3 +4 + + diff --git a/test/sql/cloud/azurite/azurite.test b/test/sql/cloud/azurite/azurite.test new file mode 100644 index 0000000..169615b --- /dev/null +++ b/test/sql/cloud/azurite/azurite.test @@ -0,0 +1,31 @@ +# name: test/sql/cloud/azurite/azurite.test +# description: test with azurite test server +# group: [azure] + +# Require statement will ensure this test is run with this extension loaded +require azure + +require parquet + +require delta + +require-env AZURE_STORAGE_CONNECTION_STRING + +# Set connection string from env var +statement ok +CREATE SECRET (TYPE AZURE, CONNECTION_STRING '${AZURE_STORAGE_CONNECTION_STRING}'); + +# We need a connection string to do requests +foreach prefix azure:// az:// + +query I +SELECT int32 +FROM delta_scan('${prefix}delta-testing-private/dat/all_primitive_types/delta') +---- +0 +1 +2 +3 +4 + +endloop diff --git a/vcpkg.json b/vcpkg.json index 85936bf..0cefd94 100644 --- a/vcpkg.json +++ b/vcpkg.json @@ -1,5 +1,8 @@ { "dependencies": [ + "azure-identity-cpp", + "azure-storage-blobs-cpp", + "azure-storage-files-datalake-cpp", "openssl" ] } \ No newline at end of file