Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support for abfss://[email protected]/path URL syntax #72

Merged
merged 3 commits into from
Aug 30, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion duckdb
Submodule duckdb updated 2277 files
18 changes: 4 additions & 14 deletions src/azure_filesystem.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -170,21 +170,11 @@ shared_ptr<AzureContextState> AzureStorageFileSystem::GetOrCreateStorageContext(
auto context_key = GetContextPrefix() + parsed_url.storage_account_name;

auto &registered_state = client_context->registered_state;
auto storage_account_it = registered_state.find(context_key);
if (storage_account_it == registered_state.end()) {

result = registered_state->Get<AzureContextState>(context_key);
if (!result || !result->IsValid()) {
result = CreateStorageContext(opener, path, parsed_url);
registered_state.insert(std::make_pair(context_key, result));
} else {
auto *azure_context_state = static_cast<AzureContextState *>(storage_account_it->second.get());
// We keep the context valid until the QueryEnd (cf: AzureBlobContextState#QueryEnd())
// we do so because between queries the user can change the secret/variable that has been set
// the side effect of that is that we will reconnect (potentially retrieve a new token) on each request
if (!azure_context_state->IsValid()) {
result = CreateStorageContext(opener, path, parsed_url);
registered_state[context_key] = result;
} else {
result = shared_ptr<AzureContextState>(storage_account_it->second, azure_context_state);
}
registered_state->Insert(context_key, result);
}
} else {
result = CreateStorageContext(opener, path, parsed_url);
Expand Down
10 changes: 1 addition & 9 deletions src/azure_http_state.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,15 +14,7 @@ void AzureHTTPState::Reset() {
}

shared_ptr<AzureHTTPState> AzureHTTPState::TryGetState(ClientContext &context) {
auto lookup = context.registered_state.find("azure_http_state");

if (lookup != context.registered_state.end()) {
return shared_ptr_cast<ClientContextState, AzureHTTPState>(lookup->second);
}

auto http_state = make_shared_ptr<AzureHTTPState>();
context.registered_state["azure_http_state"] = http_state;
return http_state;
return context.registered_state->GetOrCreate<AzureHTTPState>("azure_http_state");
}

shared_ptr<AzureHTTPState> AzureHTTPState::TryGetState(optional_ptr<FileOpener> opener) {
Expand Down
46 changes: 31 additions & 15 deletions src/azure_parsed_url.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,8 @@ namespace duckdb {
AzureParsedUrl ParseUrl(const std::string &url) {
constexpr auto invalid_url_format =
"The URL %s does not match the expected formats: (azure|az)://<container>/[<path>] or the fully qualified one: "
"(azure|az)://<storage account>.<endpoint>/<container>/[<path>]";
"(abfss|azure|az)://<storage account>.<endpoint>/<container>/[<path>] "
"or abfss://<container>@<storage account>.<endpoint>/[<path>]";
bool is_fully_qualified;
std::string container, storage_account_name, endpoint, prefix, path;

Expand All @@ -22,26 +23,41 @@ AzureParsedUrl ParseUrl(const std::string &url) {
// they will be no more changes to path format.
const auto dot_pos = url.find('.', prefix_end_pos);
const auto slash_pos = url.find('/', prefix_end_pos);
const auto at_pos = url.find('@', prefix_end_pos);
if (slash_pos == std::string::npos) {
throw duckdb::IOException(invalid_url_format, url);
}

if (dot_pos != std::string::npos && dot_pos < slash_pos) {
// syntax is (azure|az)://<storage account>.<endpoint>/<container>/[<path>]
const auto container_slash_pos = url.find('/', dot_pos);
if (container_slash_pos == string::npos) {
throw IOException(invalid_url_format, url);
}
const auto path_slash_pos = url.find('/', container_slash_pos + 1);
if (path_slash_pos == string::npos) {
throw IOException(invalid_url_format, url);
}

is_fully_qualified = true;
storage_account_name = url.substr(prefix_end_pos, dot_pos - prefix_end_pos);
endpoint = url.substr(dot_pos + 1, container_slash_pos - dot_pos - 1);
container = url.substr(container_slash_pos + 1, path_slash_pos - container_slash_pos - 1);
path = url.substr(path_slash_pos + 1);

if (url.rfind(AzureDfsStorageFileSystem::PATH_PREFIX, 0) == 0 &&
at_pos != std::string::npos) {
// syntax is abfss://<container>@<storage account>.<endpoint>/[<path>]
const auto path_slash_pos = url.find('/', prefix_end_pos + 1);
if (path_slash_pos == string::npos) {
throw IOException(invalid_url_format, url);
}

container = url.substr(prefix_end_pos, at_pos - prefix_end_pos);
storage_account_name = url.substr(at_pos + 1, dot_pos - at_pos - 1);
endpoint = url.substr(dot_pos + 1, path_slash_pos - dot_pos - 1);
path = url.substr(path_slash_pos + 1);
} else {
// syntax is (abfss|azure|az)://<storage account>.<endpoint>/<container>/[<path>]
const auto container_slash_pos = url.find('/', dot_pos);
if (container_slash_pos == string::npos) {
throw IOException(invalid_url_format, url);
}
const auto path_slash_pos = url.find('/', container_slash_pos + 1);
if (path_slash_pos == string::npos) {
throw IOException(invalid_url_format, url);
}
storage_account_name = url.substr(prefix_end_pos, dot_pos - prefix_end_pos);
endpoint = url.substr(dot_pos + 1, container_slash_pos - dot_pos - 1);
container = url.substr(container_slash_pos + 1, path_slash_pos - container_slash_pos - 1);
path = url.substr(path_slash_pos + 1);
}
} else {
// syntax is (azure|az)://<container>/[<path>]
// Storage account name will be retrieve from the variables or the secret information
Expand Down
6 changes: 6 additions & 0 deletions test/sql/cloud/hierarchical_namespace.test
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,12 @@ SELECT count(*) FROM 'abfss://${AZURE_STORAGE_ACCOUNT}.dfs.core.windows.net/test
----
2317

# Check fully qualified name abfss alternative syntax
query I
SELECT count(*) FROM 'abfss://testing-private@${AZURE_STORAGE_ACCOUNT}.dfs.core.windows.net/partitioned/l_receipmonth=*/l_shipmode=TRUCK/*.csv';
----
2317

# Enable http info for the explain analyze statement
statement ok
SET azure_http_stats = true;
Expand Down
Loading