Skip to content

Commit

Permalink
fix: update url parser to include alternative abfss syntax
Browse files Browse the repository at this point in the history
  • Loading branch information
gdubya committed Aug 18, 2024
1 parent 1907766 commit f2c97d8
Show file tree
Hide file tree
Showing 2 changed files with 37 additions and 15 deletions.
46 changes: 31 additions & 15 deletions src/azure_parsed_url.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,8 @@ namespace duckdb {
AzureParsedUrl ParseUrl(const std::string &url) {
constexpr auto invalid_url_format =
"The URL %s does not match the expected formats: (azure|az)://<container>/[<path>] or the fully qualified one: "
"(azure|az)://<storage account>.<endpoint>/<container>/[<path>]";
"(abfss|azure|az)://<storage account>.<endpoint>/<container>/[<path>] "
"or abfss://<container>@<storage account>.<endpoint>/[<path>]";
bool is_fully_qualified;
std::string container, storage_account_name, endpoint, prefix, path;

Expand All @@ -22,26 +23,41 @@ AzureParsedUrl ParseUrl(const std::string &url) {
// they will be no more changes to path format.
const auto dot_pos = url.find('.', prefix_end_pos);
const auto slash_pos = url.find('/', prefix_end_pos);
const auto at_pos = url.find('@', prefix_end_pos);
if (slash_pos == std::string::npos) {
throw duckdb::IOException(invalid_url_format, url);
}

if (dot_pos != std::string::npos && dot_pos < slash_pos) {
// syntax is (azure|az)://<storage account>.<endpoint>/<container>/[<path>]
const auto container_slash_pos = url.find('/', dot_pos);
if (container_slash_pos == string::npos) {
throw IOException(invalid_url_format, url);
}
const auto path_slash_pos = url.find('/', container_slash_pos + 1);
if (path_slash_pos == string::npos) {
throw IOException(invalid_url_format, url);
}

is_fully_qualified = true;
storage_account_name = url.substr(prefix_end_pos, dot_pos - prefix_end_pos);
endpoint = url.substr(dot_pos + 1, container_slash_pos - dot_pos - 1);
container = url.substr(container_slash_pos + 1, path_slash_pos - container_slash_pos - 1);
path = url.substr(path_slash_pos + 1);

if (url.rfind(AzureDfsStorageFileSystem::PATH_PREFIX, 0) == 0 &&
at_pos != std::string::npos) {
// syntax is abfss://<container>@<storage account>.<endpoint>/[<path>]
const auto path_slash_pos = url.find('/', prefix_end_pos + 1);
if (path_slash_pos == string::npos) {
throw IOException(invalid_url_format, url);
}

container = url.substr(prefix_end_pos, at_pos - prefix_end_pos);
storage_account_name = url.substr(at_pos + 1, dot_pos - at_pos - 1);
endpoint = url.substr(dot_pos + 1, path_slash_pos - dot_pos - 1);
path = url.substr(path_slash_pos + 1);
} else {
// syntax is (abfss|azure|az)://<storage account>.<endpoint>/<container>/[<path>]
const auto container_slash_pos = url.find('/', dot_pos);
if (container_slash_pos == string::npos) {
throw IOException(invalid_url_format, url);
}
const auto path_slash_pos = url.find('/', container_slash_pos + 1);
if (path_slash_pos == string::npos) {
throw IOException(invalid_url_format, url);
}
storage_account_name = url.substr(prefix_end_pos, dot_pos - prefix_end_pos);
endpoint = url.substr(dot_pos + 1, container_slash_pos - dot_pos - 1);
container = url.substr(container_slash_pos + 1, path_slash_pos - container_slash_pos - 1);
path = url.substr(path_slash_pos + 1);
}
} else {
// syntax is (azure|az)://<container>/[<path>]
// Storage account name will be retrieve from the variables or the secret information
Expand Down
6 changes: 6 additions & 0 deletions test/sql/cloud/hierarchical_namespace.test
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,12 @@ SELECT count(*) FROM 'abfss://${AZURE_STORAGE_ACCOUNT}.dfs.core.windows.net/test
----
2317

# Check fully qualified name abfss alternative syntax
query I
SELECT count(*) FROM 'abfss://testing-private@${AZURE_STORAGE_ACCOUNT}.dfs.core.windows.net/partitioned/l_receipmonth=*/l_shipmode=TRUCK/*.csv';
----
2317

# Enable http info for the explain analyze statement
statement ok
SET azure_http_stats = true;
Expand Down

0 comments on commit f2c97d8

Please sign in to comment.