Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[FEATURE] SnowflakeDatasource update #10005

Merged
merged 28 commits into from
Jun 10, 2024
Merged
Show file tree
Hide file tree
Changes from 27 commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
947d74d
Require `database` & `schema`
Kilo59 Jun 5, 2024
fe40816
use `schema` when creating `TableAsset`
Kilo59 Jun 5, 2024
d39a82a
[FEATURE] Add new `ConfigUri` type (#10000)
Kilo59 Jun 4, 2024
9353f15
Restrict substitutable sections for Snowflake.connection_string
Kilo59 Jun 5, 2024
8abc721
revert change
Kilo59 Jun 5, 2024
3f31ec8
fix syntax
Kilo59 Jun 5, 2024
34732f5
fix test merge
Kilo59 Jun 5, 2024
20b583b
revert unneeded changes
Kilo59 Jun 5, 2024
7b4e41e
error on connection_string + kwargs
Kilo59 Jun 5, 2024
7ff0989
linting ignores
Kilo59 Jun 5, 2024
54ea2a6
revert applicatioin query param tests
Kilo59 Jun 5, 2024
6a52bf9
fix bad merge
Kilo59 Jun 5, 2024
886602e
linting fixes
Kilo59 Jun 5, 2024
719bfed
fix cloud table_asset fixture
Kilo59 Jun 5, 2024
20e879e
type ignores
Kilo59 Jun 5, 2024
a63eb8c
Merge branch 'develop' into f/lak-975/sf-require-fields
Kilo59 Jun 6, 2024
268d262
Merge branch 'develop' into f/lak-975/sf-require-fields
Kilo59 Jun 6, 2024
10e07d9
don't provide `schema` name
Kilo59 Jun 6, 2024
0da2c21
Merge branch 'develop' into f/lak-975/sf-require-fields
Kilo59 Jun 6, 2024
999d0d9
catch deprecation warning
Kilo59 Jun 6, 2024
5a430cd
catch GxDatasourceWarning
Kilo59 Jun 6, 2024
226fc91
Merge branch 'develop' into f/lak-975/sf-require-fields
Kilo59 Jun 6, 2024
e26410c
fix filter regex
Kilo59 Jun 6, 2024
3628293
Merge branch 'develop' into f/lak-975/sf-require-fields
Kilo59 Jun 7, 2024
245e02f
Merge branch 'develop' into f/lak-975/sf-require-fields
Kilo59 Jun 7, 2024
e850096
Merge branch 'develop' into f/lak-975/sf-require-fields
Kilo59 Jun 7, 2024
cd07e4c
Merge branch 'develop' into f/lak-975/sf-require-fields
Kilo59 Jun 7, 2024
ad6cfee
Update great_expectations/datasource/fluent/interfaces.py
Kilo59 Jun 7, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions great_expectations/datasource/fluent/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
Sorter,
BatchMetadata,
GxDatasourceWarning,
GxContextWarning,
TestConnectionError,
)
from great_expectations.datasource.fluent.invalid_datasource import (
Expand Down
145 changes: 140 additions & 5 deletions great_expectations/datasource/fluent/config_str.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,22 @@

import logging
import warnings
from typing import TYPE_CHECKING, Mapping

from great_expectations.compatibility.pydantic import SecretStr
from typing import (
TYPE_CHECKING,
ClassVar,
Literal,
Mapping,
Optional,
TypedDict,
)

from great_expectations.compatibility.pydantic import AnyUrl, SecretStr, parse_obj_as
from great_expectations.compatibility.typing_extensions import override
from great_expectations.core.config_substitutor import TEMPLATE_STR_REGEX

if TYPE_CHECKING:
from typing_extensions import Self, TypeAlias

from great_expectations.core.config_provider import _ConfigurationProvider
from great_expectations.datasource.fluent import Datasource

Expand Down Expand Up @@ -54,8 +63,15 @@ def __repr__(self) -> str:
return f"{self.__class__.__name__}({self._display()!r})"

@classmethod
def _validate_template_str_format(cls, v):
if TEMPLATE_STR_REGEX.search(v):
def str_contains_config_template(cls, v: str) -> bool:
"""
Returns True if the input string contains a config template string.
"""
return TEMPLATE_STR_REGEX.search(v) is not None

@classmethod
def _validate_template_str_format(cls, v: str) -> str | None:
if cls.str_contains_config_template(v):
return v
raise ValueError(
cls.__name__
Expand All @@ -72,6 +88,125 @@ def __get_validators__(cls):
yield cls.validate


UriParts: TypeAlias = Literal[ # https://docs.pydantic.dev/1.10/usage/types/#url-properties
"scheme", "host", "user", "password", "port", "path", "query", "fragment", "tld"
]


class UriPartsDict(TypedDict, total=False):
scheme: str
user: str | None
password: str | None
ipv4: str | None
ipv6: str | None
domain: str | None
port: str | None
path: str | None
query: str | None
fragment: str | None


class ConfigUri(AnyUrl, ConfigStr): # type: ignore[misc] # Mixin "validate" signature mismatch
"""
Special type that enables great_expectation config variable substitution for the
`user` and `password` section of a URI.

Example:
```
"snowflake://${MY_USER}:${MY_PASSWORD}@account/database/schema/table"
```

Note: this type is meant to used as part of pydantic model.
To use this outside of a model see the pydantic docs below.
https://docs.pydantic.dev/usage/models/#parsing-data-into-a-specified-type
"""

ALLOWED_SUBSTITUTIONS: ClassVar[set[UriParts]] = {"user", "password"}

min_length: int = 1
max_length: int = 2**16

def __init__( # noqa: PLR0913 # for compatibility with AnyUrl
self,
template_str: str,
*,
scheme: str,
user: Optional[str] = None,
password: Optional[str] = None,
host: Optional[str] = None,
tld: Optional[str] = None,
host_type: str = "domain",
port: Optional[str] = None,
path: Optional[str] = None,
query: Optional[str] = None,
fragment: Optional[str] = None,
) -> None:
if template_str: # may have already been set in __new__
self.template_str: str = template_str
Comment on lines +144 to +145
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

A bit confused by this. Under what circumstances would self.template_str not be set by the time we get here?

Copy link
Contributor Author

@Kilo59 Kilo59 Jun 10, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah this confused me too.

When the ConfigUri is created using parse_object_as() or as part of standard pydantic model validation.

ConfigUri.__new__ is called (sometimes without the full URL/template_str). It then builds the full URL. But we need that full url to be set as the template_str attribute.
https://github.com/pydantic/pydantic/blob/8333bd59dd3424811f4e73cdd6e9414fd15bb6d7/pydantic/v1/networks.py#L183-L185

Config.__init__ can be called with 2 ways and we need to account for both.
We do this by overriding AnyUrl.__new__ + ConfigStr.__init__

  1. With URL part keywords, without template_str
  2. With template_str, without URL part keywords

Without this added logic the tests fail.

self._secret_value = template_str # for compatibility with SecretStr
super().__init__(
template_str,
scheme=scheme,
user=user,
password=password,
host=host,
tld=tld,
host_type=host_type,
port=port,
path=path,
query=query,
fragment=fragment,
)

def __new__(cls: type[Self], template_str: Optional[str], **kwargs) -> Self:
"""custom __new__ for compatibility with pydantic.parse_obj_as()"""
built_url = cls.build(**kwargs) if template_str is None else template_str
instance = str.__new__(cls, built_url)
instance.template_str = str(instance)
return instance

@classmethod
@override
def validate_parts(cls, parts: UriPartsDict, validate_port: bool = True) -> UriPartsDict:
"""
Ensure that only the `user` and `password` parts have config template strings.
Also validate that all parts of the URI are valid.
"""
allowed_substitutions = sorted(cls.ALLOWED_SUBSTITUTIONS)

for name, part in parts.items():
if not part:
continue
if (
cls.str_contains_config_template(part) # type: ignore[arg-type] # is str
and name not in cls.ALLOWED_SUBSTITUTIONS
):
raise ValueError( # noqa: TRY003
f"Only {', '.join(allowed_substitutions)} may use config substitution; '{name}'"
" substitution not allowed"
)
return AnyUrl.validate_parts(parts, validate_port)

@override
def get_config_value(self, config_provider: _ConfigurationProvider) -> AnyUrl:
"""
Resolve the config template string to its string value according to the passed
_ConfigurationProvider.
Parse the resolved URI string into an `AnyUrl` object.
"""
LOGGER.info(f"Substituting '{self}'")
raw_value = config_provider.substitute_config(self.template_str)
return parse_obj_as(AnyUrl, raw_value)

@classmethod
def __get_validators__(cls):
# one or more validators may be yielded which will be called in the
# order to validate the input, each validator will receive as an input
# the value returned from the previous validator
yield ConfigStr._validate_template_str_format
yield cls.validate # equivalent to AnyUrl.validate


def _check_config_substitutions_needed(
datasource: Datasource,
options: Mapping,
Expand Down
8 changes: 8 additions & 0 deletions great_expectations/datasource/fluent/interfaces.py
Original file line number Diff line number Diff line change
Expand Up @@ -181,6 +181,14 @@ class GxDatasourceWarning(UserWarning):
"""


class GxContextWarning(GxDatasourceWarning):
"""
Warning related to a Datasource that with a missing context.
Kilo59 marked this conversation as resolved.
Show resolved Hide resolved
Usually because the Datasource was created directly rather than using a
`context.sources` factory method.
"""


class GxSerializationWarning(GxDatasourceWarning):
pass

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -51,8 +51,9 @@
},
{
"type": "string",
"writeOnly": true,
"format": "password"
"minLength": 1,
"maxLength": 65536,
"format": "uri"
Comment on lines +54 to +56
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This appears to be a duplicate of the following item?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

One represents the ConfigUri type and the other a standard SnowflakeDsn connection string.
I'll update this in a followup.
Either to distinguish these two items in the generated Schema, or remove the duplicate.

Copy link
Contributor Author

@Kilo59 Kilo59 Jun 14, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

},
{
"type": "string",
Expand Down Expand Up @@ -538,7 +539,7 @@
},
"ConnectionDetails": {
"title": "ConnectionDetails",
"description": "Information needed to connect to a Snowflake database.\nAlternative to a connection string.",
"description": "Information needed to connect to a Snowflake database.\nAlternative to a connection string.\n\nhttps://docs.snowflake.com/en/developer-guide/python-connector/sqlalchemy#additional-connection-parameters",
"type": "object",
"properties": {
"account": {
Expand All @@ -564,10 +565,12 @@
},
"database": {
"title": "Database",
"description": "`database` that the Datasource is mapped to.",
"type": "string"
},
"schema": {
"title": "Schema",
"description": "`schema` that the Datasource is mapped to.",
"type": "string"
},
"warehouse": {
Expand All @@ -587,7 +590,9 @@
"required": [
"account",
"user",
"password"
"password",
"database",
"schema"
],
"additionalProperties": false
}
Expand Down
Loading