Skip to content

Commit

Permalink
c
Browse files Browse the repository at this point in the history
  • Loading branch information
nameexhaustion committed Jan 13, 2025
1 parent dd9a180 commit bea1a83
Show file tree
Hide file tree
Showing 2 changed files with 6 additions and 198 deletions.
112 changes: 0 additions & 112 deletions crates/polars-io/src/cloud/options.rs
Original file line number Diff line number Diff line change
Expand Up @@ -401,7 +401,6 @@ impl CloudOptions {
use super::credential_provider::IntoCredentialProvider;

let verbose = polars_core::config::verbose();
let mut storage_account: Option<polars_utils::pl_str::PlSmallStr> = None;

// The credential provider `self.credentials` is prioritized if it is set. We also need
// `from_env()` as it may source environment configured storage account name.
Expand All @@ -412,9 +411,6 @@ impl CloudOptions {
panic!("impl error: cloud type mismatch")
};
for (key, value) in options.iter() {
if key == &AzureConfigKey::AccountName {
storage_account = Some(value.into());
}
builder = builder.with_config(*key, value);
}
}
Expand All @@ -432,22 +428,7 @@ impl CloudOptions {
);
}
builder.with_credentials(v.into_azure_provider())
} else if let Some(v) = extract_adls_uri_storage_account(url) // Prefer the one embedded in the path
.map(|x| x.into())
.or(storage_account)
.as_deref()
.and_then(get_azure_storage_account_key)
{
if verbose {
eprintln!("[CloudOptions::build_azure]: Retrieved account key from Azure CLI")
}
builder.with_access_key(v)
} else {
if verbose {
eprintln!(
"[CloudOptions::build_azure]: Could not retrieve account key from Azure CLI"
)
}
builder
};

Expand Down Expand Up @@ -630,99 +611,6 @@ impl CloudOptions {
}
}

/// ```text
/// "abfss://{CONTAINER}@{STORAGE_ACCOUNT}.dfs.core.windows.net/"
/// ^^^^^^^^^^^^^^^^^
/// ```
#[cfg(feature = "azure")]
fn extract_adls_uri_storage_account(path: &str) -> Option<&str> {
Some(
path.split_once("://")?
.1
.split_once('/')?
.0
.split_once('@')?
.1
.split_once(".dfs.core.windows.net")?
.0,
)
}

/// Attempt to retrieve the storage account key for this account using the Azure CLI.
#[cfg(feature = "azure")]
fn get_azure_storage_account_key(account_name: &str) -> Option<String> {
if polars_core::config::verbose() {
eprintln!(
"get_azure_storage_account_key: storage_account_name: {}",
account_name
);
}

let mut cmd = if cfg!(target_family = "windows") {
// https://github.com/apache/arrow-rs/blob/565c24b8071269b02c3937e34c51eacf0f4cbad6/object_store/src/azure/credential.rs#L877-L894
let mut v = std::process::Command::new("cmd");
v.args([
"/C",
"az",
"storage",
"account",
"keys",
"list",
"--output",
"json",
"--account-name",
account_name,
]);
v
} else {
let mut v = std::process::Command::new("az");
v.args([
"storage",
"account",
"keys",
"list",
"--output",
"json",
"--account-name",
account_name,
]);
v
};

let json_resp = cmd
.output()
.ok()
.filter(|x| x.status.success())
.map(|x| String::from_utf8(x.stdout))?
.ok()?;

// [
// {
// "creationTime": "1970-01-01T00:00:00.000000+00:00",
// "keyName": "key1",
// "permissions": "FULL",
// "value": "..."
// },
// {
// "creationTime": "1970-01-01T00:00:00.000000+00:00",
// "keyName": "key2",
// "permissions": "FULL",
// "value": "..."
// }
// ]

#[derive(Debug, serde::Deserialize)]
struct S {
value: String,
}

let resp: Vec<S> = serde_json::from_str(&json_resp).ok()?;

let access_key = resp.into_iter().next()?.value;

Some(access_key)
}

#[cfg(feature = "cloud")]
#[cfg(test)]
mod tests {
Expand Down
92 changes: 6 additions & 86 deletions py-polars/polars/io/cloud/credential_provider.py
Original file line number Diff line number Diff line change
Expand Up @@ -156,7 +156,6 @@ def __init__(
self,
*,
scopes: list[str] | None = None,
storage_account: str | None = None,
tenant_id: str | None = None,
_verbose: bool = False,
) -> None:
Expand All @@ -169,11 +168,6 @@ def __init__(
----------
scopes
Scopes to pass to `get_token`
storage_account
If specified, an attempt will be made to retrieve the account keys
for this account using the Azure CLI. If this is successful, the
account keys will be used instead of
`DefaultAzureCredential.get_token()`
tenant_id
Azure tenant ID.
"""
Expand All @@ -182,7 +176,6 @@ def __init__(

self._check_module_availability()

self.account_name = storage_account
self.tenant_id = tenant_id
# Done like this to bypass mypy, we don't have stubs for azure.identity
self.credential = importlib.import_module("azure.identity").__dict__[
Expand All @@ -197,7 +190,6 @@ def __init__(
print(
(
"CredentialProviderAzure "
f"{self.account_name = } "
f"{self.tenant_id = } "
f"{self.scopes = } "
),
Expand All @@ -206,28 +198,6 @@ def __init__(

def __call__(self) -> CredentialProviderFunctionReturn:
"""Fetch the credentials."""
if self.account_name is not None:
try:
creds = {
"account_key": self._get_azure_storage_account_key_az_cli(
self.account_name
)
}

if self._verbose:
print(
"[CredentialProviderAzure]: Retrieved account key from Azure CLI",
file=sys.stderr,
)
except Exception as e:
if self._verbose:
print(
f"[CredentialProviderAzure]: Could not retrieve account key from Azure CLI: {e}",
file=sys.stderr,
)
else:
return creds, None # type: ignore[return-value]

token = self.credential.get_token(*self.scopes, tenant_id=self.tenant_id)

return {
Expand All @@ -240,51 +210,6 @@ def _check_module_availability(cls) -> None:
msg = "azure-identity must be installed to use `CredentialProviderAzure`"
raise ImportError(msg)

@staticmethod
def _extract_adls_uri_storage_account(uri: str) -> str | None:
# "abfss://{CONTAINER}@{STORAGE_ACCOUNT}.dfs.core.windows.net/"
# ^^^^^^^^^^^^^^^^^
try:
return (
uri.split("://", 1)[1]
.split("/", 1)[0]
.split("@", 1)[1]
.split(".dfs.core.windows.net", 1)[0]
)

except IndexError:
return None

@classmethod
def _get_azure_storage_account_key_az_cli(cls, account_name: str) -> str:
# [
# {
# "creationTime": "1970-01-01T00:00:00.000000+00:00",
# "keyName": "key1",
# "permissions": "FULL",
# "value": "..."
# },
# {
# "creationTime": "1970-01-01T00:00:00.000000+00:00",
# "keyName": "key2",
# "permissions": "FULL",
# "value": "..."
# }
# ]

return json.loads(
cls._azcli(
"storage",
"account",
"keys",
"list",
"--output",
"json",
"--account-name",
account_name,
)
)[0]["value"]

@classmethod
def _azcli_version(cls) -> str | None:
try:
Expand Down Expand Up @@ -423,7 +348,6 @@ def _maybe_init_credential_provider(
# For Azure we dispatch to `azure.identity` as much as possible
if _is_azure_cloud(scheme):
tenant_id = None
storage_account = None

if storage_options is not None:
for k, v in storage_options.items():
Expand All @@ -437,23 +361,19 @@ def _maybe_init_credential_provider(
"authority_id",
}:
tenant_id = v
elif k in {"azure_storage_account_name", "account_name"}:
storage_account = v
elif k in {"azure_use_azure_cli", "use_azure_cli"}:
elif k in {
"azure_storage_account_name",
"account_name",
"azure_use_azure_cli",
"use_azure_cli",
}:
continue
else:
# We assume some sort of access key was given, so we
# just dispatch to the rust side.
return None

storage_account = (
# Prefer the one embedded in the path
CredentialProviderAzure._extract_adls_uri_storage_account(str(path))
or storage_account
)

provider = CredentialProviderAzure(
storage_account=storage_account,
tenant_id=tenant_id,
_verbose=verbose,
)
Expand Down

0 comments on commit bea1a83

Please sign in to comment.