From 93995b97e373d561f4f772f6643137a2389e2d92 Mon Sep 17 00:00:00 2001 From: Marcin Rudolf Date: Tue, 20 Feb 2024 22:35:46 +0100 Subject: [PATCH 001/105] allows to decorate async function with dlt.source --- dlt/extract/decorators.py | 70 ++++++++++++++++++++++---------- tests/extract/test_decorators.py | 57 ++++++++++++++++++++++++++ 2 files changed, 106 insertions(+), 21 deletions(-) diff --git a/dlt/extract/decorators.py b/dlt/extract/decorators.py index d86fd04ef4..2835d0aafa 100644 --- a/dlt/extract/decorators.py +++ b/dlt/extract/decorators.py @@ -5,6 +5,7 @@ from typing import ( TYPE_CHECKING, Any, + Awaitable, Callable, ClassVar, Iterator, @@ -39,7 +40,6 @@ ) from dlt.extract.hints import make_hints from dlt.extract.utils import ( - ensure_table_schema_columns_hint, simulate_func_call, wrap_compat_transformer, wrap_resource_gen, @@ -186,7 +186,9 @@ def source( "'name' has no effect when `schema` argument is present", source.__name__ ) - def decorator(f: Callable[TSourceFunParams, Any]) -> Callable[TSourceFunParams, TDltSourceImpl]: + def decorator( + f: Callable[TSourceFunParams, Any] + ) -> Callable[TSourceFunParams, Union[Awaitable[TDltSourceImpl], TDltSourceImpl]]: nonlocal schema, name if not callable(f) or isinstance(f, DltResource): @@ -212,9 +214,27 @@ def decorator(f: Callable[TSourceFunParams, Any]) -> Callable[TSourceFunParams, source_sections = (known_sections.SOURCES, source_section, effective_name) conf_f = with_config(f, spec=spec, sections=source_sections) + def _eval_rv(_rv: Any) -> TDltSourceImpl: + """Evaluates return value from the source function or coroutine""" + if _rv is None: + raise SourceDataIsNone(schema.name) + # if generator, consume it immediately + if inspect.isgenerator(_rv): + _rv = list(_rv) + + # convert to source + s = _impl_cls.from_data(schema.clone(update_normalizers=True), source_section, _rv) + # apply hints + if max_table_nesting is not None: + s.max_table_nesting = max_table_nesting + s.schema_contract = schema_contract + # enable root propagation + s.root_key = root_key + return s + @wraps(conf_f) def _wrap(*args: Any, **kwargs: Any) -> TDltSourceImpl: - # make schema available to the source + """Wrap a regular function, injection context must be a part of the wrap""" with Container().injectable_context(SourceSchemaInjectableContext(schema)): # configurations will be accessed in this section in the source proxy = Container()[PipelineContext] @@ -227,29 +247,37 @@ def _wrap(*args: Any, **kwargs: Any) -> TDltSourceImpl: ) ): rv = conf_f(*args, **kwargs) - if rv is None: - raise SourceDataIsNone(schema.name) - # if generator, consume it immediately - if inspect.isgenerator(rv): - rv = list(rv) + return _eval_rv(rv) - # convert to source - s = _impl_cls.from_data(schema.clone(update_normalizers=True), source_section, rv) - # apply hints - if max_table_nesting is not None: - s.max_table_nesting = max_table_nesting - s.schema_contract = schema_contract - # enable root propagation - s.root_key = root_key - return s + @wraps(conf_f) + async def _wrap_coro(*args: Any, **kwargs: Any) -> TDltSourceImpl: + """In case of co-routine we must wrap the whole injection context in awaitable, + there's no easy way to avoid some code duplication + """ + with Container().injectable_context(SourceSchemaInjectableContext(schema)): + # configurations will be accessed in this section in the source + proxy = Container()[PipelineContext] + pipeline_name = None if not proxy.is_active() else proxy.pipeline().pipeline_name + with inject_section( + ConfigSectionContext( + pipeline_name=pipeline_name, + sections=source_sections, + source_state_key=schema.name, + ) + ): + rv = await conf_f(*args, **kwargs) + return _eval_rv(rv) # get spec for wrapped function SPEC = get_fun_spec(conf_f) + # get correct wrapper + wrapper = _wrap_coro if inspect.iscoroutinefunction(f) else _wrap # store the source information - _SOURCES[_wrap.__qualname__] = SourceInfo(SPEC, _wrap, func_module) - - # the typing is right, but makefun.wraps does not preserve signatures - return _wrap + _SOURCES[_wrap.__qualname__] = SourceInfo(SPEC, wrapper, func_module) + if inspect.iscoroutinefunction(f): + return _wrap_coro + else: + return _wrap if func is None: # we're called with parens. diff --git a/tests/extract/test_decorators.py b/tests/extract/test_decorators.py index 3c15bf37f5..f1c89a5bc1 100644 --- a/tests/extract/test_decorators.py +++ b/tests/extract/test_decorators.py @@ -1,3 +1,4 @@ +import inspect import os from typing import List, Optional, Dict, Iterator, Any, cast @@ -21,6 +22,7 @@ from dlt.common.typing import TDataItem from dlt.cli.source_detection import detect_source_configs +from dlt.common.utils import custom_environ from dlt.extract import DltResource, DltSource from dlt.extract.exceptions import ( DynamicNameNotStandaloneResource, @@ -850,6 +852,61 @@ def __call__(self, more: int = 1): return dlt.resource(["A", "V"] * self.elems * more, name="_list") +@pytest.mark.asyncio +async def test_async_source() -> None: + @dlt.source + async def source_rv_no_parens(reverse: bool = False): + # test is expected context is present + dlt.current.state() + dlt.current.source_schema() + data = [1, 2, 3] + if reverse: + data = list(reversed(data)) + return dlt.resource(data, name="data") + + @dlt.source(name="with_parens") + async def source_rv_with_parens(reverse: bool = False): + # test is expected context is present + dlt.current.state() + dlt.current.source_schema() + data = [4, 5, 6] + if reverse: + data = list(reversed(data)) + return dlt.resource(data, name="data") + + @dlt.source(name="with_parens") + async def source_yield_with_parens(reverse: bool = False): + # test is expected context is present + dlt.current.state() + dlt.current.source_schema() + data = [7, 8, 9] + if reverse: + data = list(reversed(data)) + return dlt.resource(data, name="data") + + # create a pipeline so current.state() works + dlt.pipeline("async_state_pipeline") + + async def _assert_source(source_coro_f, expected_data) -> None: + # test various forms of source decorator, parens, no parens, yield, return + source_coro = source_coro_f() + assert inspect.iscoroutinefunction(source_coro_f) + assert inspect.iscoroutine(source_coro) + source = await source_coro + assert "data" in source.resources + assert list(source) == expected_data + + # make sure the config injection works + with custom_environ( + {f"SOURCES__{source.section.upper()}__{source.name.upper()}__REVERSE": "True"} + ): + assert list(await source_coro_f()) == list(reversed(expected_data)) + + await _assert_source(source_rv_no_parens, [1, 2, 3]) + await _assert_source(source_rv_with_parens, [4, 5, 6]) + await _assert_source(source_yield_with_parens, [7, 8, 9]) + + @pytest.mark.skip("Not implemented") def test_class_resource() -> None: pass From 44448784a0e10f78e93fd769f3010e55aa79c7ef Mon Sep 17 00:00:00 2001 From: Marcin Rudolf Date: Tue, 20 Feb 2024 22:36:07 +0100 Subject: [PATCH 002/105] adds pytest-async and updates pytest to 7.x --- poetry.lock | 65 ++++++++++++++++++++++---------------------------- pyproject.toml | 10 ++++---- pytest.ini | 1 - 3 files changed, 33 insertions(+), 43 deletions(-) diff --git a/poetry.lock b/poetry.lock index 046dc13918..7bb6803560 100644 --- a/poetry.lock +++ b/poetry.lock @@ -556,14 +556,6 @@ category = "main" optional = false python-versions = ">=3.7" -[[package]] -name = "atomicwrites" -version = "1.4.1" -description = "Atomic file writes." -category = "dev" -optional = false -python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" - [[package]] name = "attrs" version = "23.1.0" @@ -4065,24 +4057,37 @@ python-versions = "*" [[package]] name = "pytest" -version = "6.2.5" +version = "7.4.4" description = "pytest: simple powerful testing with Python" category = "dev" optional = false -python-versions = ">=3.6" +python-versions = ">=3.7" [package.dependencies] -atomicwrites = {version = ">=1.0", markers = "sys_platform == \"win32\""} -attrs = ">=19.2.0" colorama = {version = "*", markers = "sys_platform == \"win32\""} +exceptiongroup = {version = ">=1.0.0rc8", markers = "python_version < \"3.11\""} iniconfig = "*" packaging = "*" pluggy = ">=0.12,<2.0" -py = ">=1.8.2" -toml = "*" +tomli = {version = ">=1.0.0", markers = "python_version < \"3.11\""} + +[package.extras] +testing = ["argcomplete", "attrs (>=19.2.0)", "hypothesis (>=3.56)", "mock", "nose", "pygments (>=2.7.2)", "requests", "setuptools", "xmlschema"] + +[[package]] +name = "pytest-asyncio" +version = "0.23.5" +description = "Pytest support for asyncio" +category = "dev" +optional = false +python-versions = ">=3.8" + +[package.dependencies] +pytest = ">=7.0.0,<9" [package.extras] -testing = ["argcomplete", "hypothesis (>=3.56)", "mock", "nose", "requests", "xmlschema"] +docs = ["sphinx (>=5.3)", "sphinx-rtd-theme (>=1.0)"] +testing = ["coverage (>=6.2)", "hypothesis (>=5.7.1)"] [[package]] name = "pytest-cases" @@ -4134,17 +4139,6 @@ pytest = [ {version = ">=6.2.4", markers = "python_version >= \"3.10\""}, ] -[[package]] -name = "pytest-pythonpath" -version = "0.7.4" -description = "pytest plugin for adding to the PYTHONPATH from command line or configs." -category = "dev" -optional = false -python-versions = ">=2.6, <4" - -[package.dependencies] -pytest = ">=2.5.2,<7" - [[package]] name = "python-daemon" version = "3.0.1" @@ -5268,7 +5262,7 @@ weaviate = ["weaviate-client"] [metadata] lock-version = "1.1" python-versions = ">=3.8.1,<3.13" -content-hash = "2b5e7aed017ef56aaad21932f9ce9806f47351ac05f76d1fcf62f4e61aff1f6a" +content-hash = "2a79839114197182918d1422bc51f037ff53ffb40325f652285a26eb774919d1" [metadata.files] about-time = [ @@ -5464,9 +5458,6 @@ async-timeout = [ {file = "async-timeout-4.0.3.tar.gz", hash = "sha256:4640d96be84d82d02ed59ea2b7105a0f7b33abe8703703cd0ab0bf87c427522f"}, {file = "async_timeout-4.0.3-py3-none-any.whl", hash = "sha256:7405140ff1230c310e51dc27b3145b9092d659ce68ff733fb0cefe3ee42be028"}, ] -atomicwrites = [ - {file = "atomicwrites-1.4.1.tar.gz", hash = "sha256:81b2c9071a49367a7f770170e5eec8cb66567cfbbc8c73d20ce5ca4a8d71cf11"}, -] attrs = [ {file = "attrs-23.1.0-py3-none-any.whl", hash = "sha256:1f28b4522cdc2fb4256ac1a020c78acf9cba2c6b461ccd2c126f3aa8e8335d04"}, {file = "attrs-23.1.0.tar.gz", hash = "sha256:6279836d581513a26f1bf235f9acd333bc9115683f14f7e8fae46c98fc50e015"}, @@ -7914,8 +7905,12 @@ pyreadline3 = [ {file = "pyreadline3-3.4.1.tar.gz", hash = "sha256:6f3d1f7b8a31ba32b73917cefc1f28cc660562f39aea8646d30bd6eff21f7bae"}, ] pytest = [ - {file = "pytest-6.2.5-py3-none-any.whl", hash = "sha256:7310f8d27bc79ced999e760ca304d69f6ba6c6649c0b60fb0e04a4a77cacc134"}, - {file = "pytest-6.2.5.tar.gz", hash = "sha256:131b36680866a76e6781d13f101efb86cf674ebb9762eb70d3082b6f29889e89"}, + {file = "pytest-7.4.4-py3-none-any.whl", hash = "sha256:b090cdf5ed60bf4c45261be03239c2c1c22df034fbffe691abe93cd80cea01d8"}, + {file = "pytest-7.4.4.tar.gz", hash = "sha256:2cf0005922c6ace4a3e2ec8b4080eb0d9753fdc93107415332f50ce9e7994280"}, +] +pytest-asyncio = [ + {file = "pytest-asyncio-0.23.5.tar.gz", hash = "sha256:3a048872a9c4ba14c3e90cc1aa20cbc2def7d01c7c8db3777ec281ba9c057675"}, + {file = "pytest_asyncio-0.23.5-py3-none-any.whl", hash = "sha256:4e7093259ba018d58ede7d5315131d21923a60f8a6e9ee266ce1589685c89eac"}, ] pytest-cases = [ {file = "pytest-cases-3.6.14.tar.gz", hash = "sha256:7455e6ca57a544c1bfdd8b56ace08c1c1ce4c6572a8aab8f1bd351dc25a10b6b"}, @@ -7933,10 +7928,6 @@ pytest-order = [ {file = "pytest-order-1.1.0.tar.gz", hash = "sha256:139d25b30826b78eebb42722f747eab14c44b88059d7a71d4f79d14a057269a5"}, {file = "pytest_order-1.1.0-py3-none-any.whl", hash = "sha256:3b3730969c97900fa5cd31ecff80847680ed56b2490954565c14949ba60d9371"}, ] -pytest-pythonpath = [ - {file = "pytest-pythonpath-0.7.4.tar.gz", hash = "sha256:64e195b23a8f8c0c631fb16882d9ad6fa4137ed1f2961ddd15d52065cd435db6"}, - {file = "pytest_pythonpath-0.7.4-py3-none-any.whl", hash = "sha256:e73e11dab2f0b83e73229e261242b251f0a369d7f527dbfec068822fd26a6ce5"}, -] python-daemon = [ {file = "python-daemon-3.0.1.tar.gz", hash = "sha256:6c57452372f7eaff40934a1c03ad1826bf5e793558e87fef49131e6464b4dae5"}, {file = "python_daemon-3.0.1-py3-none-any.whl", hash = "sha256:42bb848a3260a027fa71ad47ecd959e471327cb34da5965962edd5926229f341"}, @@ -8966,4 +8957,4 @@ yarl = [ zipp = [ {file = "zipp-3.16.2-py3-none-any.whl", hash = "sha256:679e51dd4403591b2d6838a48de3d283f3d188412a9782faadf845f298736ba0"}, {file = "zipp-3.16.2.tar.gz", hash = "sha256:ebc15946aa78bd63458992fc81ec3b6f7b1e92d51c35e6de1c3804e73b799147"}, -] \ No newline at end of file +] diff --git a/pyproject.toml b/pyproject.toml index 9a77b15cb7..af77df148c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -111,17 +111,16 @@ types-click = "^7.1.8" sqlfluff = "^2.3.2" types-deprecated = "^1.2.9.2" pytest-console-scripts = "^1.4.1" -pytest = "^6.2.4" +pytest = "^7.0.0" mypy = "^1.6.1" flake8 = "^5.0.0" bandit = "^1.7.0" black = "^23.7.0" isort = "^5.12.0" flake8-bugbear = "^22.0.0" -pytest-pythonpath = "^0.7.3" -pytest-order = "^1.0.0" -pytest-cases = "^3.6.9" -pytest-forked = "^1.3.0" +pytest-order = ">=1.0.0" +pytest-cases = ">=3.6.9" +pytest-forked = ">=1.3.0" types-PyYAML = ">=6.0.7" types-cachetools = ">=4.2.9" types-protobuf = ">=3.19.8" @@ -138,6 +137,7 @@ types-psutil = "^5.9.5.16" types-psycopg2 = "^2.9.21.14" cryptography = "^41.0.7" google-api-python-client = ">=1.7.11" +pytest-asyncio = "^0.23.5" [tool.poetry.group.pipeline] optional=true diff --git a/pytest.ini b/pytest.ini index 88c8353a69..6c2e207664 100644 --- a/pytest.ini +++ b/pytest.ini @@ -1,5 +1,4 @@ [pytest] -python_paths= dlt norecursedirs= .direnv .eggs build dist addopts= -v --showlocals --durations 10 xfail_strict= true From b3b70f627c5a1bc84715bf65784dc9be81cb226e Mon Sep 17 00:00:00 2001 From: Marcin Rudolf Date: Tue, 20 Feb 2024 22:56:17 +0100 Subject: [PATCH 003/105] fixes forked teardown issue 7.x --- pytest.ini | 1 + tests/common/runtime/test_signals.py | 5 +++++ 2 files changed, 6 insertions(+) diff --git a/pytest.ini b/pytest.ini index 6c2e207664..81f5451239 100644 --- a/pytest.ini +++ b/pytest.ini @@ -1,4 +1,5 @@ [pytest] +pythonpath=dlt norecursedirs= .direnv .eggs build dist addopts= -v --showlocals --durations 10 xfail_strict= true diff --git a/tests/common/runtime/test_signals.py b/tests/common/runtime/test_signals.py index 179491de16..af30ea16ce 100644 --- a/tests/common/runtime/test_signals.py +++ b/tests/common/runtime/test_signals.py @@ -121,3 +121,8 @@ def _thread() -> None: assert exc.value.signal_code == 15 p.join() assert thread_signal == 15 + + +def test_cleanup() -> None: + # this must happen after all forked tests (problems with tests teardowns in other tests) + pass From f5d7a0adf1354976dd5327d9103338ba3d5b0a5e Mon Sep 17 00:00:00 2001 From: Marcin Rudolf Date: Wed, 21 Feb 2024 01:16:48 +0100 Subject: [PATCH 004/105] bumps deps for py 3.12 --- poetry.lock | 8582 ++++++++++++++++++++++++------------------------ pyproject.toml | 11 +- 2 files changed, 4291 insertions(+), 4302 deletions(-) diff --git a/poetry.lock b/poetry.lock index 7bb6803560..62f7a4892e 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,18 +1,26 @@ +# This file is automatically @generated by Poetry 1.7.1 and should not be changed by hand. + [[package]] name = "about-time" version = "4.2.1" description = "Easily measure timing and throughput of code blocks, with beautiful human friendly representations." -category = "dev" optional = false python-versions = ">=3.7, <4" +files = [ + {file = "about-time-4.2.1.tar.gz", hash = "sha256:6a538862d33ce67d997429d14998310e1dbfda6cb7d9bbfbf799c4709847fece"}, + {file = "about_time-4.2.1-py3-none-any.whl", hash = "sha256:8bbf4c75fe13cbd3d72f49a03b02c5c7dca32169b6d49117c257e7eb3eaee341"}, +] [[package]] name = "adlfs" version = "2023.8.0" description = "Access Azure Datalake Gen1 with fsspec and dask" -category = "main" optional = true python-versions = ">=3.8" +files = [ + {file = "adlfs-2023.8.0-py3-none-any.whl", hash = "sha256:3eb248a3c2a30b419f1147bd7676d156b5219f96ef7f11d47166afd2a3bdb07e"}, + {file = "adlfs-2023.8.0.tar.gz", hash = "sha256:07e804f6df4593acfcaf01025b162e30ac13e523d3570279c98b2d91a18026d9"}, +] [package.dependencies] aiohttp = ">=3.7.0" @@ -29,9 +37,12 @@ docs = ["furo", "myst-parser", "numpydoc", "sphinx"] name = "agate" version = "1.7.1" description = "A data analysis library that is optimized for humans instead of machines." -category = "main" optional = false python-versions = "*" +files = [ + {file = "agate-1.7.1-py2.py3-none-any.whl", hash = "sha256:23f9f412f74f97b72f82b1525ab235cc816bc8c8525d968a091576a0dbc54a5f"}, + {file = "agate-1.7.1.tar.gz", hash = "sha256:eadf46d980168b8922d5d396d6258eecd5e7dbef7e6f0c0b71e968545ea96389"}, +] [package.dependencies] Babel = ">=2.0" @@ -49,9 +60,12 @@ test = ["PyICU (>=2.4.2)", "coverage (>=3.7.1)", "cssselect (>=0.9.1)", "lxml (> name = "aiobotocore" version = "2.5.2" description = "Async client for aws services using botocore and aiohttp" -category = "main" optional = true python-versions = ">=3.7" +files = [ + {file = "aiobotocore-2.5.2-py3-none-any.whl", hash = "sha256:337429ffd3cc367532572d40be809a84c7b5335f3f8eca2f23e09dfaa9a9ef90"}, + {file = "aiobotocore-2.5.2.tar.gz", hash = "sha256:e7399f21570db1c287f1c0c814dd3475dfe1c8166722e2c77ce67f172cbcfa89"}, +] [package.dependencies] aiohttp = ">=3.3.1,<4.0.0" @@ -67,9 +81,97 @@ boto3 = ["boto3 (>=1.26.161,<1.26.162)"] name = "aiohttp" version = "3.8.5" description = "Async http client/server framework (asyncio)" -category = "main" optional = false python-versions = ">=3.6" +files = [ + {file = "aiohttp-3.8.5-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:a94159871304770da4dd371f4291b20cac04e8c94f11bdea1c3478e557fbe0d8"}, + {file = "aiohttp-3.8.5-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:13bf85afc99ce6f9ee3567b04501f18f9f8dbbb2ea11ed1a2e079670403a7c84"}, + {file = "aiohttp-3.8.5-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:2ce2ac5708501afc4847221a521f7e4b245abf5178cf5ddae9d5b3856ddb2f3a"}, + {file = "aiohttp-3.8.5-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:96943e5dcc37a6529d18766597c491798b7eb7a61d48878611298afc1fca946c"}, + {file = "aiohttp-3.8.5-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:2ad5c3c4590bb3cc28b4382f031f3783f25ec223557124c68754a2231d989e2b"}, + {file = "aiohttp-3.8.5-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:0c413c633d0512df4dc7fd2373ec06cc6a815b7b6d6c2f208ada7e9e93a5061d"}, + {file = "aiohttp-3.8.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:df72ac063b97837a80d80dec8d54c241af059cc9bb42c4de68bd5b61ceb37caa"}, + {file = "aiohttp-3.8.5-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c48c5c0271149cfe467c0ff8eb941279fd6e3f65c9a388c984e0e6cf57538e14"}, + {file = "aiohttp-3.8.5-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:368a42363c4d70ab52c2c6420a57f190ed3dfaca6a1b19afda8165ee16416a82"}, + {file = "aiohttp-3.8.5-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:7607ec3ce4993464368505888af5beb446845a014bc676d349efec0e05085905"}, + {file = "aiohttp-3.8.5-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:0d21c684808288a98914e5aaf2a7c6a3179d4df11d249799c32d1808e79503b5"}, + {file = "aiohttp-3.8.5-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:312fcfbacc7880a8da0ae8b6abc6cc7d752e9caa0051a53d217a650b25e9a691"}, + {file = "aiohttp-3.8.5-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:ad093e823df03bb3fd37e7dec9d4670c34f9e24aeace76808fc20a507cace825"}, + {file = "aiohttp-3.8.5-cp310-cp310-win32.whl", hash = "sha256:33279701c04351a2914e1100b62b2a7fdb9a25995c4a104259f9a5ead7ed4802"}, + {file = "aiohttp-3.8.5-cp310-cp310-win_amd64.whl", hash = "sha256:6e4a280e4b975a2e7745573e3fc9c9ba0d1194a3738ce1cbaa80626cc9b4f4df"}, + {file = "aiohttp-3.8.5-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:ae871a964e1987a943d83d6709d20ec6103ca1eaf52f7e0d36ee1b5bebb8b9b9"}, + {file = "aiohttp-3.8.5-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:461908b2578955045efde733719d62f2b649c404189a09a632d245b445c9c975"}, + {file = "aiohttp-3.8.5-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:72a860c215e26192379f57cae5ab12b168b75db8271f111019509a1196dfc780"}, + {file = "aiohttp-3.8.5-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cc14be025665dba6202b6a71cfcdb53210cc498e50068bc088076624471f8bb9"}, + {file = "aiohttp-3.8.5-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:8af740fc2711ad85f1a5c034a435782fbd5b5f8314c9a3ef071424a8158d7f6b"}, + {file = "aiohttp-3.8.5-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:841cd8233cbd2111a0ef0a522ce016357c5e3aff8a8ce92bcfa14cef890d698f"}, + {file = "aiohttp-3.8.5-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5ed1c46fb119f1b59304b5ec89f834f07124cd23ae5b74288e364477641060ff"}, + {file = "aiohttp-3.8.5-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:84f8ae3e09a34f35c18fa57f015cc394bd1389bce02503fb30c394d04ee6b938"}, + {file = "aiohttp-3.8.5-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:62360cb771707cb70a6fd114b9871d20d7dd2163a0feafe43fd115cfe4fe845e"}, + {file = "aiohttp-3.8.5-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:23fb25a9f0a1ca1f24c0a371523546366bb642397c94ab45ad3aedf2941cec6a"}, + {file = "aiohttp-3.8.5-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:b0ba0d15164eae3d878260d4c4df859bbdc6466e9e6689c344a13334f988bb53"}, + {file = "aiohttp-3.8.5-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:5d20003b635fc6ae3f96d7260281dfaf1894fc3aa24d1888a9b2628e97c241e5"}, + {file = "aiohttp-3.8.5-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:0175d745d9e85c40dcc51c8f88c74bfbaef9e7afeeeb9d03c37977270303064c"}, + {file = "aiohttp-3.8.5-cp311-cp311-win32.whl", hash = "sha256:2e1b1e51b0774408f091d268648e3d57f7260c1682e7d3a63cb00d22d71bb945"}, + {file = "aiohttp-3.8.5-cp311-cp311-win_amd64.whl", hash = "sha256:043d2299f6dfdc92f0ac5e995dfc56668e1587cea7f9aa9d8a78a1b6554e5755"}, + {file = "aiohttp-3.8.5-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:cae533195e8122584ec87531d6df000ad07737eaa3c81209e85c928854d2195c"}, + {file = "aiohttp-3.8.5-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4f21e83f355643c345177a5d1d8079f9f28b5133bcd154193b799d380331d5d3"}, + {file = "aiohttp-3.8.5-cp36-cp36m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a7a75ef35f2df54ad55dbf4b73fe1da96f370e51b10c91f08b19603c64004acc"}, + {file = "aiohttp-3.8.5-cp36-cp36m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2e2e9839e14dd5308ee773c97115f1e0a1cb1d75cbeeee9f33824fa5144c7634"}, + {file = "aiohttp-3.8.5-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c44e65da1de4403d0576473e2344828ef9c4c6244d65cf4b75549bb46d40b8dd"}, + {file = "aiohttp-3.8.5-cp36-cp36m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:78d847e4cde6ecc19125ccbc9bfac4a7ab37c234dd88fbb3c5c524e8e14da543"}, + {file = "aiohttp-3.8.5-cp36-cp36m-musllinux_1_1_aarch64.whl", hash = "sha256:c7a815258e5895d8900aec4454f38dca9aed71085f227537208057853f9d13f2"}, + {file = "aiohttp-3.8.5-cp36-cp36m-musllinux_1_1_i686.whl", hash = "sha256:8b929b9bd7cd7c3939f8bcfffa92fae7480bd1aa425279d51a89327d600c704d"}, + {file = "aiohttp-3.8.5-cp36-cp36m-musllinux_1_1_ppc64le.whl", hash = "sha256:5db3a5b833764280ed7618393832e0853e40f3d3e9aa128ac0ba0f8278d08649"}, + {file = "aiohttp-3.8.5-cp36-cp36m-musllinux_1_1_s390x.whl", hash = "sha256:a0215ce6041d501f3155dc219712bc41252d0ab76474615b9700d63d4d9292af"}, + {file = "aiohttp-3.8.5-cp36-cp36m-musllinux_1_1_x86_64.whl", hash = "sha256:fd1ed388ea7fbed22c4968dd64bab0198de60750a25fe8c0c9d4bef5abe13824"}, + {file = "aiohttp-3.8.5-cp36-cp36m-win32.whl", hash = "sha256:6e6783bcc45f397fdebc118d772103d751b54cddf5b60fbcc958382d7dd64f3e"}, + {file = "aiohttp-3.8.5-cp36-cp36m-win_amd64.whl", hash = "sha256:b5411d82cddd212644cf9360879eb5080f0d5f7d809d03262c50dad02f01421a"}, + {file = "aiohttp-3.8.5-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:01d4c0c874aa4ddfb8098e85d10b5e875a70adc63db91f1ae65a4b04d3344cda"}, + {file = "aiohttp-3.8.5-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e5980a746d547a6ba173fd5ee85ce9077e72d118758db05d229044b469d9029a"}, + {file = "aiohttp-3.8.5-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:2a482e6da906d5e6e653be079b29bc173a48e381600161c9932d89dfae5942ef"}, + {file = "aiohttp-3.8.5-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:80bd372b8d0715c66c974cf57fe363621a02f359f1ec81cba97366948c7fc873"}, + {file = "aiohttp-3.8.5-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c1161b345c0a444ebcf46bf0a740ba5dcf50612fd3d0528883fdc0eff578006a"}, + {file = "aiohttp-3.8.5-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:cd56db019015b6acfaaf92e1ac40eb8434847d9bf88b4be4efe5bfd260aee692"}, + {file = "aiohttp-3.8.5-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:153c2549f6c004d2754cc60603d4668899c9895b8a89397444a9c4efa282aaf4"}, + {file = "aiohttp-3.8.5-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:4a01951fabc4ce26ab791da5f3f24dca6d9a6f24121746eb19756416ff2d881b"}, + {file = "aiohttp-3.8.5-cp37-cp37m-musllinux_1_1_ppc64le.whl", hash = "sha256:bfb9162dcf01f615462b995a516ba03e769de0789de1cadc0f916265c257e5d8"}, + {file = "aiohttp-3.8.5-cp37-cp37m-musllinux_1_1_s390x.whl", hash = "sha256:7dde0009408969a43b04c16cbbe252c4f5ef4574ac226bc8815cd7342d2028b6"}, + {file = "aiohttp-3.8.5-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:4149d34c32f9638f38f544b3977a4c24052042affa895352d3636fa8bffd030a"}, + {file = "aiohttp-3.8.5-cp37-cp37m-win32.whl", hash = "sha256:68c5a82c8779bdfc6367c967a4a1b2aa52cd3595388bf5961a62158ee8a59e22"}, + {file = "aiohttp-3.8.5-cp37-cp37m-win_amd64.whl", hash = "sha256:2cf57fb50be5f52bda004b8893e63b48530ed9f0d6c96c84620dc92fe3cd9b9d"}, + {file = "aiohttp-3.8.5-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:eca4bf3734c541dc4f374ad6010a68ff6c6748f00451707f39857f429ca36ced"}, + {file = "aiohttp-3.8.5-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:1274477e4c71ce8cfe6c1ec2f806d57c015ebf84d83373676036e256bc55d690"}, + {file = "aiohttp-3.8.5-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:28c543e54710d6158fc6f439296c7865b29e0b616629767e685a7185fab4a6b9"}, + {file = "aiohttp-3.8.5-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:910bec0c49637d213f5d9877105d26e0c4a4de2f8b1b29405ff37e9fc0ad52b8"}, + {file = "aiohttp-3.8.5-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5443910d662db951b2e58eb70b0fbe6b6e2ae613477129a5805d0b66c54b6cb7"}, + {file = "aiohttp-3.8.5-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2e460be6978fc24e3df83193dc0cc4de46c9909ed92dd47d349a452ef49325b7"}, + {file = "aiohttp-3.8.5-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fb1558def481d84f03b45888473fc5a1f35747b5f334ef4e7a571bc0dfcb11f8"}, + {file = "aiohttp-3.8.5-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:34dd0c107799dcbbf7d48b53be761a013c0adf5571bf50c4ecad5643fe9cfcd0"}, + {file = "aiohttp-3.8.5-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:aa1990247f02a54185dc0dff92a6904521172a22664c863a03ff64c42f9b5410"}, + {file = "aiohttp-3.8.5-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:0e584a10f204a617d71d359fe383406305a4b595b333721fa50b867b4a0a1548"}, + {file = "aiohttp-3.8.5-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:a3cf433f127efa43fee6b90ea4c6edf6c4a17109d1d037d1a52abec84d8f2e42"}, + {file = "aiohttp-3.8.5-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:c11f5b099adafb18e65c2c997d57108b5bbeaa9eeee64a84302c0978b1ec948b"}, + {file = "aiohttp-3.8.5-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:84de26ddf621d7ac4c975dbea4c945860e08cccde492269db4e1538a6a6f3c35"}, + {file = "aiohttp-3.8.5-cp38-cp38-win32.whl", hash = "sha256:ab88bafedc57dd0aab55fa728ea10c1911f7e4d8b43e1d838a1739f33712921c"}, + {file = "aiohttp-3.8.5-cp38-cp38-win_amd64.whl", hash = "sha256:5798a9aad1879f626589f3df0f8b79b3608a92e9beab10e5fda02c8a2c60db2e"}, + {file = "aiohttp-3.8.5-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:a6ce61195c6a19c785df04e71a4537e29eaa2c50fe745b732aa937c0c77169f3"}, + {file = "aiohttp-3.8.5-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:773dd01706d4db536335fcfae6ea2440a70ceb03dd3e7378f3e815b03c97ab51"}, + {file = "aiohttp-3.8.5-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:f83a552443a526ea38d064588613aca983d0ee0038801bc93c0c916428310c28"}, + {file = "aiohttp-3.8.5-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1f7372f7341fcc16f57b2caded43e81ddd18df53320b6f9f042acad41f8e049a"}, + {file = "aiohttp-3.8.5-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ea353162f249c8097ea63c2169dd1aa55de1e8fecbe63412a9bc50816e87b761"}, + {file = "aiohttp-3.8.5-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e5d47ae48db0b2dcf70bc8a3bc72b3de86e2a590fc299fdbbb15af320d2659de"}, + {file = "aiohttp-3.8.5-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d827176898a2b0b09694fbd1088c7a31836d1a505c243811c87ae53a3f6273c1"}, + {file = "aiohttp-3.8.5-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3562b06567c06439d8b447037bb655ef69786c590b1de86c7ab81efe1c9c15d8"}, + {file = "aiohttp-3.8.5-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:4e874cbf8caf8959d2adf572a78bba17cb0e9d7e51bb83d86a3697b686a0ab4d"}, + {file = "aiohttp-3.8.5-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:6809a00deaf3810e38c628e9a33271892f815b853605a936e2e9e5129762356c"}, + {file = "aiohttp-3.8.5-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:33776e945d89b29251b33a7e7d006ce86447b2cfd66db5e5ded4e5cd0340585c"}, + {file = "aiohttp-3.8.5-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:eaeed7abfb5d64c539e2db173f63631455f1196c37d9d8d873fc316470dfbacd"}, + {file = "aiohttp-3.8.5-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:e91d635961bec2d8f19dfeb41a539eb94bd073f075ca6dae6c8dc0ee89ad6f91"}, + {file = "aiohttp-3.8.5-cp39-cp39-win32.whl", hash = "sha256:00ad4b6f185ec67f3e6562e8a1d2b69660be43070bd0ef6fcec5211154c7df67"}, + {file = "aiohttp-3.8.5-cp39-cp39-win_amd64.whl", hash = "sha256:c0a9034379a37ae42dea7ac1e048352d96286626251862e448933c0f59cbd79c"}, + {file = "aiohttp-3.8.5.tar.gz", hash = "sha256:b9552ec52cc147dbf1944ac7ac98af7602e51ea2dcd076ed194ca3c0d1c7d0bc"}, +] [package.dependencies] aiosignal = ">=1.1.2" @@ -87,9 +189,12 @@ speedups = ["Brotli", "aiodns", "cchardet"] name = "aioitertools" version = "0.11.0" description = "itertools and builtins for AsyncIO and mixed iterables" -category = "main" optional = true python-versions = ">=3.6" +files = [ + {file = "aioitertools-0.11.0-py3-none-any.whl", hash = "sha256:04b95e3dab25b449def24d7df809411c10e62aab0cbe31a50ca4e68748c43394"}, + {file = "aioitertools-0.11.0.tar.gz", hash = "sha256:42c68b8dd3a69c2bf7f2233bf7df4bb58b557bca5252ac02ed5187bbc67d6831"}, +] [package.dependencies] typing_extensions = {version = ">=4.0", markers = "python_version < \"3.10\""} @@ -98,9 +203,12 @@ typing_extensions = {version = ">=4.0", markers = "python_version < \"3.10\""} name = "aiosignal" version = "1.3.1" description = "aiosignal: a list of registered asynchronous callbacks" -category = "main" optional = false python-versions = ">=3.7" +files = [ + {file = "aiosignal-1.3.1-py3-none-any.whl", hash = "sha256:f8376fb07dd1e86a584e4fcdec80b36b7f81aac666ebc724e2c090300dd83b17"}, + {file = "aiosignal-1.3.1.tar.gz", hash = "sha256:54cd96e15e1649b75d6c87526a6ff0b6c1b0dd3459f43d9ca11d48c339b68cfc"}, +] [package.dependencies] frozenlist = ">=1.1.0" @@ -109,9 +217,12 @@ frozenlist = ">=1.1.0" name = "alembic" version = "1.12.0" description = "A database migration tool for SQLAlchemy." -category = "main" optional = false python-versions = ">=3.7" +files = [ + {file = "alembic-1.12.0-py3-none-any.whl", hash = "sha256:03226222f1cf943deee6c85d9464261a6c710cd19b4fe867a3ad1f25afda610f"}, + {file = "alembic-1.12.0.tar.gz", hash = "sha256:8e7645c32e4f200675e69f0745415335eb59a3663f5feb487abfa0b30c45888b"}, +] [package.dependencies] importlib-metadata = {version = "*", markers = "python_version < \"3.9\""} @@ -127,9 +238,12 @@ tz = ["python-dateutil"] name = "alive-progress" version = "3.1.4" description = "A new kind of Progress Bar, with real-time throughput, ETA, and very cool animations!" -category = "dev" optional = false python-versions = ">=3.7, <4" +files = [ + {file = "alive-progress-3.1.4.tar.gz", hash = "sha256:74a95d8d0d42bc99d3a3725dbd06ebb852245f1b64e301a7c375b92b22663f7b"}, + {file = "alive_progress-3.1.4-py3-none-any.whl", hash = "sha256:c80ad87ce9c1054b01135a87fae69ecebbfc2107497ae87cbe6aec7e534903db"}, +] [package.dependencies] about-time = "4.2.1" @@ -139,9 +253,12 @@ grapheme = "0.6.0" name = "annotated-types" version = "0.6.0" description = "Reusable constraint types to use with typing.Annotated" -category = "main" optional = false python-versions = ">=3.8" +files = [ + {file = "annotated_types-0.6.0-py3-none-any.whl", hash = "sha256:0641064de18ba7a25dee8f96403ebc39113d0cb953a01429249d5c7564666a43"}, + {file = "annotated_types-0.6.0.tar.gz", hash = "sha256:563339e807e53ffd9c267e99fc6d9ea23eb8443c08f112651963e24e22f84a5d"}, +] [package.dependencies] typing-extensions = {version = ">=4.0.0", markers = "python_version < \"3.9\""} @@ -150,17 +267,23 @@ typing-extensions = {version = ">=4.0.0", markers = "python_version < \"3.9\""} name = "ansicon" version = "1.89.0" description = "Python wrapper for loading Jason Hood's ANSICON" -category = "dev" optional = false python-versions = "*" +files = [ + {file = "ansicon-1.89.0-py2.py3-none-any.whl", hash = "sha256:f1def52d17f65c2c9682cf8370c03f541f410c1752d6a14029f97318e4b9dfec"}, + {file = "ansicon-1.89.0.tar.gz", hash = "sha256:e4d039def5768a47e4afec8e89e83ec3ae5a26bf00ad851f914d1240b444d2b1"}, +] [[package]] name = "anyio" version = "4.0.0" description = "High level compatibility layer for multiple asynchronous event loop implementations" -category = "main" optional = false python-versions = ">=3.8" +files = [ + {file = "anyio-4.0.0-py3-none-any.whl", hash = "sha256:cfdb2b588b9fc25ede96d8db56ed50848b0b649dca3dd1df0b11f683bb9e0b5f"}, + {file = "anyio-4.0.0.tar.gz", hash = "sha256:f7ed51751b2c2add651e5747c891b47e26d2a21be5d32d9311dfe9692f3e5d7a"}, +] [package.dependencies] exceptiongroup = {version = ">=1.0.2", markers = "python_version < \"3.11\""} @@ -176,9 +299,12 @@ trio = ["trio (>=0.22)"] name = "apache-airflow" version = "2.7.2" description = "Programmatically author, schedule and monitor data pipelines" -category = "dev" optional = false python-versions = "~=3.8" +files = [ + {file = "apache-airflow-2.7.2.tar.gz", hash = "sha256:c6fab3449066867d9a7728f40b6b9e27f1ea68bca39b064a27f5c5ddc3262224"}, + {file = "apache_airflow-2.7.2-py3-none-any.whl", hash = "sha256:1bc2c022bcae24b911e49fafd5fb619b49efba87ed7bc8561a2065810d8fe899"}, +] [package.dependencies] alembic = ">=1.6.3,<2.0" @@ -381,9 +507,12 @@ zendesk = ["apache-airflow-providers-zendesk"] name = "apache-airflow-providers-common-sql" version = "1.7.1" description = "Provider for Apache Airflow. Implements apache-airflow-providers-common-sql package" -category = "dev" optional = false python-versions = "~=3.8" +files = [ + {file = "apache-airflow-providers-common-sql-1.7.1.tar.gz", hash = "sha256:ba37f795d9656a87cf4661edc381b8ecfe930272c59324b59f8a158fd0971aeb"}, + {file = "apache_airflow_providers_common_sql-1.7.1-py3-none-any.whl", hash = "sha256:36da2f51b51a64765b0ed5e6a5fece8eaa3ca173dfbff803e2fe2a0afbb90944"}, +] [package.dependencies] apache-airflow = ">=2.4.0" @@ -397,9 +526,12 @@ pandas = ["pandas (>=0.17.1)"] name = "apache-airflow-providers-ftp" version = "3.5.1" description = "Provider for Apache Airflow. Implements apache-airflow-providers-ftp package" -category = "dev" optional = false python-versions = "~=3.8" +files = [ + {file = "apache-airflow-providers-ftp-3.5.1.tar.gz", hash = "sha256:dc6dc524dc7454857a0812154d7540172e36db3a87e48a4a91918ebf80898bbf"}, + {file = "apache_airflow_providers_ftp-3.5.1-py3-none-any.whl", hash = "sha256:e4ea77d6276355acfe2392c12155db7b9d51be460b7673b616dc1d8bee03c1d7"}, +] [package.dependencies] apache-airflow = ">=2.4.0" @@ -411,9 +543,12 @@ openlineage = ["apache-airflow-providers-openlineage"] name = "apache-airflow-providers-http" version = "4.5.1" description = "Provider for Apache Airflow. Implements apache-airflow-providers-http package" -category = "dev" optional = false python-versions = "~=3.8" +files = [ + {file = "apache-airflow-providers-http-4.5.1.tar.gz", hash = "sha256:ec90920ff980fc264af9811dc72c37ef272bcdb3d007c7114e12366559426460"}, + {file = "apache_airflow_providers_http-4.5.1-py3-none-any.whl", hash = "sha256:702f26938bc22684eefecd297c2b0809793f9e43b8d911d807a29f21e69da179"}, +] [package.dependencies] aiohttp = "*" @@ -426,9 +561,12 @@ requests-toolbelt = "*" name = "apache-airflow-providers-imap" version = "3.3.1" description = "Provider for Apache Airflow. Implements apache-airflow-providers-imap package" -category = "dev" optional = false python-versions = "~=3.8" +files = [ + {file = "apache-airflow-providers-imap-3.3.1.tar.gz", hash = "sha256:40bac2a75e4dfbcd7d397776d90d03938facaf2707acc6cc119a8db684e53f77"}, + {file = "apache_airflow_providers_imap-3.3.1-py3-none-any.whl", hash = "sha256:adb6ef7864a5a8e245fbbd555bb4ef1eecf5b094d6d23ca0edc5f0aded50490d"}, +] [package.dependencies] apache-airflow = ">=2.4.0" @@ -437,9 +575,12 @@ apache-airflow = ">=2.4.0" name = "apache-airflow-providers-sqlite" version = "3.4.3" description = "Provider for Apache Airflow. Implements apache-airflow-providers-sqlite package" -category = "dev" optional = false python-versions = "~=3.8" +files = [ + {file = "apache-airflow-providers-sqlite-3.4.3.tar.gz", hash = "sha256:347d2db03eaa5ea9fef414666565ffa5e849935cbc30e37237edcaa822b5ced8"}, + {file = "apache_airflow_providers_sqlite-3.4.3-py3-none-any.whl", hash = "sha256:4ffa6a50f0ea1b4e51240b657dfec3fb026c87bdfa71af908a56461df6a6f2e0"}, +] [package.dependencies] apache-airflow = ">=2.4.0" @@ -452,9 +593,12 @@ common-sql = ["apache-airflow-providers-common-sql"] name = "apispec" version = "6.3.0" description = "A pluggable API specification generator. Currently supports the OpenAPI Specification (f.k.a. the Swagger specification)." -category = "dev" optional = false python-versions = ">=3.7" +files = [ + {file = "apispec-6.3.0-py3-none-any.whl", hash = "sha256:95a0b9355785df998bb0e9b939237a30ee4c7428fd6ef97305eae3da06b9b339"}, + {file = "apispec-6.3.0.tar.gz", hash = "sha256:6cb08d92ce73ff0b3bf46cb2ea5c00d57289b0f279fb0256a3df468182ba5344"}, +] [package.dependencies] packaging = ">=21.3" @@ -473,17 +617,23 @@ yaml = ["PyYAML (>=3.10)"] name = "appdirs" version = "1.4.4" description = "A small Python module for determining appropriate platform-specific dirs, e.g. a \"user data dir\"." -category = "dev" optional = false python-versions = "*" +files = [ + {file = "appdirs-1.4.4-py2.py3-none-any.whl", hash = "sha256:a841dacd6b99318a741b166adb07e19ee71a274450e68237b4650ca1055ab128"}, + {file = "appdirs-1.4.4.tar.gz", hash = "sha256:7d5d0167b2b1ba821647616af46a749d1c653740dd0d2415100fe26e27afdf41"}, +] [[package]] name = "argcomplete" version = "3.1.1" description = "Bash tab completion for argparse" -category = "dev" optional = false python-versions = ">=3.6" +files = [ + {file = "argcomplete-3.1.1-py3-none-any.whl", hash = "sha256:35fa893a88deea85ea7b20d241100e64516d6af6d7b0ae2bed1d263d26f70948"}, + {file = "argcomplete-3.1.1.tar.gz", hash = "sha256:6c4c563f14f01440aaffa3eae13441c5db2357b5eec639abe7c0b15334627dff"}, +] [package.extras] test = ["coverage", "mypy", "pexpect", "ruff", "wheel"] @@ -492,9 +642,12 @@ test = ["coverage", "mypy", "pexpect", "ruff", "wheel"] name = "asgiref" version = "3.7.2" description = "ASGI specs, helper code, and adapters" -category = "dev" optional = false python-versions = ">=3.7" +files = [ + {file = "asgiref-3.7.2-py3-none-any.whl", hash = "sha256:89b2ef2247e3b562a16eef663bc0e2e703ec6468e2fa8a5cd61cd449786d4f6e"}, + {file = "asgiref-3.7.2.tar.gz", hash = "sha256:9e0ce3aa93a819ba5b45120216b23878cf6e8525eb3848653452b4192b92afed"}, +] [package.dependencies] typing-extensions = {version = ">=4", markers = "python_version < \"3.11\""} @@ -506,17 +659,23 @@ tests = ["mypy (>=0.800)", "pytest", "pytest-asyncio"] name = "asn1crypto" version = "1.5.1" description = "Fast ASN.1 parser and serializer with definitions for private keys, public keys, certificates, CRL, OCSP, CMS, PKCS#3, PKCS#7, PKCS#8, PKCS#12, PKCS#5, X.509 and TSP" -category = "main" optional = true python-versions = "*" +files = [ + {file = "asn1crypto-1.5.1-py2.py3-none-any.whl", hash = "sha256:db4e40728b728508912cbb3d44f19ce188f218e9eba635821bb4b68564f8fd67"}, + {file = "asn1crypto-1.5.1.tar.gz", hash = "sha256:13ae38502be632115abf8a24cbe5f4da52e3b5231990aff31123c805306ccb9c"}, +] [[package]] name = "astatine" version = "0.3.3" description = "Some handy helper functions for Python's AST module." -category = "dev" optional = false python-versions = ">=3.6" +files = [ + {file = "astatine-0.3.3-py3-none-any.whl", hash = "sha256:6d8c914f01fbea252cb8f31563f2e766a9ab03c02b9bcc37d18f7d9138828401"}, + {file = "astatine-0.3.3.tar.gz", hash = "sha256:0c58a7844b5890ff16da07dbfeb187341d8324cb4378940f89d795cbebebce08"}, +] [package.dependencies] asttokens = ">=1.1" @@ -526,9 +685,12 @@ domdf-python-tools = ">=2.7.0" name = "asttokens" version = "2.3.0" description = "Annotate AST trees with source code positions" -category = "dev" optional = false python-versions = "*" +files = [ + {file = "asttokens-2.3.0-py2.py3-none-any.whl", hash = "sha256:bef1a51bc256d349e9f94e7e40e44b705ed1162f55294220dd561d24583d9877"}, + {file = "asttokens-2.3.0.tar.gz", hash = "sha256:2552a88626aaa7f0f299f871479fc755bd4e7c11e89078965e928fb7bb9a6afe"}, +] [package.dependencies] six = ">=1.12.0" @@ -540,9 +702,12 @@ test = ["astroid", "pytest"] name = "astunparse" version = "1.6.3" description = "An AST unparser for Python" -category = "main" optional = false python-versions = "*" +files = [ + {file = "astunparse-1.6.3-py2.py3-none-any.whl", hash = "sha256:c2652417f2c8b5bb325c885ae329bdf3f86424075c4fd1a128674bc6fba4b8e8"}, + {file = "astunparse-1.6.3.tar.gz", hash = "sha256:5ad93a8456f0d084c3456d059fd9a92cce667963232cbf763eac3bc5b7940872"}, +] [package.dependencies] six = ">=1.6.1,<2.0" @@ -552,17 +717,23 @@ wheel = ">=0.23.0,<1.0" name = "async-timeout" version = "4.0.3" description = "Timeout context manager for asyncio programs" -category = "main" optional = false python-versions = ">=3.7" +files = [ + {file = "async-timeout-4.0.3.tar.gz", hash = "sha256:4640d96be84d82d02ed59ea2b7105a0f7b33abe8703703cd0ab0bf87c427522f"}, + {file = "async_timeout-4.0.3-py3-none-any.whl", hash = "sha256:7405140ff1230c310e51dc27b3145b9092d659ce68ff733fb0cefe3ee42be028"}, +] [[package]] name = "attrs" version = "23.1.0" description = "Classes Without Boilerplate" -category = "main" optional = false python-versions = ">=3.7" +files = [ + {file = "attrs-23.1.0-py3-none-any.whl", hash = "sha256:1f28b4522cdc2fb4256ac1a020c78acf9cba2c6b461ccd2c126f3aa8e8335d04"}, + {file = "attrs-23.1.0.tar.gz", hash = "sha256:6279836d581513a26f1bf235f9acd333bc9115683f14f7e8fae46c98fc50e015"}, +] [package.extras] cov = ["attrs[tests]", "coverage[toml] (>=5.3)"] @@ -575,9 +746,12 @@ tests-no-zope = ["cloudpickle", "hypothesis", "mypy (>=1.1.1)", "pympler", "pyte name = "authlib" version = "1.2.1" description = "The ultimate Python library in building OAuth and OpenID Connect servers and clients." -category = "main" optional = true python-versions = "*" +files = [ + {file = "Authlib-1.2.1-py2.py3-none-any.whl", hash = "sha256:c88984ea00149a90e3537c964327da930779afa4564e354edfd98410bea01911"}, + {file = "Authlib-1.2.1.tar.gz", hash = "sha256:421f7c6b468d907ca2d9afede256f068f87e34d23dd221c07d13d4c234726afb"}, +] [package.dependencies] cryptography = ">=3.2" @@ -586,9 +760,12 @@ cryptography = ">=3.2" name = "azure-core" version = "1.29.3" description = "Microsoft Azure Core Library for Python" -category = "main" optional = true python-versions = ">=3.7" +files = [ + {file = "azure-core-1.29.3.tar.gz", hash = "sha256:c92700af982e71c8c73de9f4c20da8b3f03ce2c22d13066e4d416b4629c87903"}, + {file = "azure_core-1.29.3-py3-none-any.whl", hash = "sha256:f8b2910f92b66293d93bd00564924ad20ad48f4a1e150577cf18d1e7d4f9263c"}, +] [package.dependencies] requests = ">=2.18.4" @@ -602,9 +779,12 @@ aio = ["aiohttp (>=3.0)"] name = "azure-datalake-store" version = "0.0.53" description = "Azure Data Lake Store Filesystem Client Library for Python" -category = "main" optional = true python-versions = "*" +files = [ + {file = "azure-datalake-store-0.0.53.tar.gz", hash = "sha256:05b6de62ee3f2a0a6e6941e6933b792b800c3e7f6ffce2fc324bc19875757393"}, + {file = "azure_datalake_store-0.0.53-py2.py3-none-any.whl", hash = "sha256:a30c902a6e360aa47d7f69f086b426729784e71c536f330b691647a51dc42b2b"}, +] [package.dependencies] cffi = "*" @@ -615,9 +795,12 @@ requests = ">=2.20.0" name = "azure-identity" version = "1.14.0" description = "Microsoft Azure Identity Library for Python" -category = "main" optional = true python-versions = ">=3.7" +files = [ + {file = "azure-identity-1.14.0.zip", hash = "sha256:72441799f8c5c89bfe21026965e266672a7c5d050c2c65119ef899dd5362e2b1"}, + {file = "azure_identity-1.14.0-py3-none-any.whl", hash = "sha256:edabf0e010eb85760e1dd19424d5e8f97ba2c9caff73a16e7b30ccbdbcce369b"}, +] [package.dependencies] azure-core = ">=1.11.0,<2.0.0" @@ -629,9 +812,12 @@ msal-extensions = ">=0.3.0,<2.0.0" name = "azure-storage-blob" version = "12.17.0" description = "Microsoft Azure Blob Storage Client Library for Python" -category = "main" optional = true python-versions = ">=3.7" +files = [ + {file = "azure-storage-blob-12.17.0.zip", hash = "sha256:c14b785a17050b30fc326a315bdae6bc4a078855f4f94a4c303ad74a48dc8c63"}, + {file = "azure_storage_blob-12.17.0-py3-none-any.whl", hash = "sha256:0016e0c549a80282d7b4920c03f2f4ba35c53e6e3c7dbcd2a4a8c8eb3882c1e7"}, +] [package.dependencies] azure-core = ">=1.28.0,<2.0.0" @@ -646,9 +832,12 @@ aio = ["azure-core[aio] (>=1.28.0,<2.0.0)"] name = "babel" version = "2.12.1" description = "Internationalization utilities" -category = "main" optional = false python-versions = ">=3.7" +files = [ + {file = "Babel-2.12.1-py3-none-any.whl", hash = "sha256:b4246fb7677d3b98f501a39d43396d3cafdc8eadb045f4a31be01863f655c610"}, + {file = "Babel-2.12.1.tar.gz", hash = "sha256:cc2d99999cd01d44420ae725a21c9e3711b3aadc7976d6147f622d8581963455"}, +] [package.dependencies] pytz = {version = ">=2015.7", markers = "python_version < \"3.9\""} @@ -657,17 +846,51 @@ pytz = {version = ">=2015.7", markers = "python_version < \"3.9\""} name = "backoff" version = "2.2.1" description = "Function decoration for backoff and retry" -category = "dev" optional = false python-versions = ">=3.7,<4.0" +files = [ + {file = "backoff-2.2.1-py3-none-any.whl", hash = "sha256:63579f9a0628e06278f7e47b7d7d5b6ce20dc65c5e96a6f3ca99a6adca0396e8"}, + {file = "backoff-2.2.1.tar.gz", hash = "sha256:03f829f5bb1923180821643f8753b0502c3b682293992485b0eef2807afa5cba"}, +] + +[[package]] +name = "backports-zoneinfo" +version = "0.2.1" +description = "Backport of the standard library zoneinfo module" +optional = false +python-versions = ">=3.6" +files = [ + {file = "backports.zoneinfo-0.2.1-cp36-cp36m-macosx_10_14_x86_64.whl", hash = "sha256:da6013fd84a690242c310d77ddb8441a559e9cb3d3d59ebac9aca1a57b2e18bc"}, + {file = "backports.zoneinfo-0.2.1-cp36-cp36m-manylinux1_i686.whl", hash = "sha256:89a48c0d158a3cc3f654da4c2de1ceba85263fafb861b98b59040a5086259722"}, + {file = "backports.zoneinfo-0.2.1-cp36-cp36m-manylinux1_x86_64.whl", hash = "sha256:1c5742112073a563c81f786e77514969acb58649bcdf6cdf0b4ed31a348d4546"}, + {file = "backports.zoneinfo-0.2.1-cp36-cp36m-win32.whl", hash = "sha256:e8236383a20872c0cdf5a62b554b27538db7fa1bbec52429d8d106effbaeca08"}, + {file = "backports.zoneinfo-0.2.1-cp36-cp36m-win_amd64.whl", hash = "sha256:8439c030a11780786a2002261569bdf362264f605dfa4d65090b64b05c9f79a7"}, + {file = "backports.zoneinfo-0.2.1-cp37-cp37m-macosx_10_14_x86_64.whl", hash = "sha256:f04e857b59d9d1ccc39ce2da1021d196e47234873820cbeaad210724b1ee28ac"}, + {file = "backports.zoneinfo-0.2.1-cp37-cp37m-manylinux1_i686.whl", hash = "sha256:17746bd546106fa389c51dbea67c8b7c8f0d14b5526a579ca6ccf5ed72c526cf"}, + {file = "backports.zoneinfo-0.2.1-cp37-cp37m-manylinux1_x86_64.whl", hash = "sha256:5c144945a7752ca544b4b78c8c41544cdfaf9786f25fe5ffb10e838e19a27570"}, + {file = "backports.zoneinfo-0.2.1-cp37-cp37m-win32.whl", hash = "sha256:e55b384612d93be96506932a786bbcde5a2db7a9e6a4bb4bffe8b733f5b9036b"}, + {file = "backports.zoneinfo-0.2.1-cp37-cp37m-win_amd64.whl", hash = "sha256:a76b38c52400b762e48131494ba26be363491ac4f9a04c1b7e92483d169f6582"}, + {file = "backports.zoneinfo-0.2.1-cp38-cp38-macosx_10_14_x86_64.whl", hash = "sha256:8961c0f32cd0336fb8e8ead11a1f8cd99ec07145ec2931122faaac1c8f7fd987"}, + {file = "backports.zoneinfo-0.2.1-cp38-cp38-manylinux1_i686.whl", hash = "sha256:e81b76cace8eda1fca50e345242ba977f9be6ae3945af8d46326d776b4cf78d1"}, + {file = "backports.zoneinfo-0.2.1-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:7b0a64cda4145548fed9efc10322770f929b944ce5cee6c0dfe0c87bf4c0c8c9"}, + {file = "backports.zoneinfo-0.2.1-cp38-cp38-win32.whl", hash = "sha256:1b13e654a55cd45672cb54ed12148cd33628f672548f373963b0bff67b217328"}, + {file = "backports.zoneinfo-0.2.1-cp38-cp38-win_amd64.whl", hash = "sha256:4a0f800587060bf8880f954dbef70de6c11bbe59c673c3d818921f042f9954a6"}, + {file = "backports.zoneinfo-0.2.1.tar.gz", hash = "sha256:fadbfe37f74051d024037f223b8e001611eac868b5c5b06144ef4d8b799862f2"}, +] + +[package.extras] +tzdata = ["tzdata"] [[package]] name = "bandit" version = "1.7.5" description = "Security oriented static analyser for python code." -category = "dev" optional = false python-versions = ">=3.7" +files = [ + {file = "bandit-1.7.5-py3-none-any.whl", hash = "sha256:75665181dc1e0096369112541a056c59d1c5f66f9bb74a8d686c3c362b83f549"}, + {file = "bandit-1.7.5.tar.gz", hash = "sha256:bdfc739baa03b880c2d15d0431b31c658ffc348e907fe197e54e0389dd59e11e"}, +] [package.dependencies] colorama = {version = ">=0.3.9", markers = "platform_system == \"Windows\""} @@ -685,9 +908,12 @@ yaml = ["PyYAML"] name = "beautifulsoup4" version = "4.12.2" description = "Screen-scraping library" -category = "main" optional = true python-versions = ">=3.6.0" +files = [ + {file = "beautifulsoup4-4.12.2-py3-none-any.whl", hash = "sha256:bd2520ca0d9d7d12694a53d44ac482d181b4ec1888909b035a3dbf40d0f57d4a"}, + {file = "beautifulsoup4-4.12.2.tar.gz", hash = "sha256:492bbc69dca35d12daac71c4db1bfff0c876c00ef4a2ffacce226d4638eb72da"}, +] [package.dependencies] soupsieve = ">1.2" @@ -700,9 +926,32 @@ lxml = ["lxml"] name = "black" version = "23.9.1" description = "The uncompromising code formatter." -category = "dev" optional = false python-versions = ">=3.8" +files = [ + {file = "black-23.9.1-cp310-cp310-macosx_10_16_arm64.whl", hash = "sha256:d6bc09188020c9ac2555a498949401ab35bb6bf76d4e0f8ee251694664df6301"}, + {file = "black-23.9.1-cp310-cp310-macosx_10_16_universal2.whl", hash = "sha256:13ef033794029b85dfea8032c9d3b92b42b526f1ff4bf13b2182ce4e917f5100"}, + {file = "black-23.9.1-cp310-cp310-macosx_10_16_x86_64.whl", hash = "sha256:75a2dc41b183d4872d3a500d2b9c9016e67ed95738a3624f4751a0cb4818fe71"}, + {file = "black-23.9.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:13a2e4a93bb8ca74a749b6974925c27219bb3df4d42fc45e948a5d9feb5122b7"}, + {file = "black-23.9.1-cp310-cp310-win_amd64.whl", hash = "sha256:adc3e4442eef57f99b5590b245a328aad19c99552e0bdc7f0b04db6656debd80"}, + {file = "black-23.9.1-cp311-cp311-macosx_10_16_arm64.whl", hash = "sha256:8431445bf62d2a914b541da7ab3e2b4f3bc052d2ccbf157ebad18ea126efb91f"}, + {file = "black-23.9.1-cp311-cp311-macosx_10_16_universal2.whl", hash = "sha256:8fc1ddcf83f996247505db6b715294eba56ea9372e107fd54963c7553f2b6dfe"}, + {file = "black-23.9.1-cp311-cp311-macosx_10_16_x86_64.whl", hash = "sha256:7d30ec46de88091e4316b17ae58bbbfc12b2de05e069030f6b747dfc649ad186"}, + {file = "black-23.9.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:031e8c69f3d3b09e1aa471a926a1eeb0b9071f80b17689a655f7885ac9325a6f"}, + {file = "black-23.9.1-cp311-cp311-win_amd64.whl", hash = "sha256:538efb451cd50f43aba394e9ec7ad55a37598faae3348d723b59ea8e91616300"}, + {file = "black-23.9.1-cp38-cp38-macosx_10_16_arm64.whl", hash = "sha256:638619a559280de0c2aa4d76f504891c9860bb8fa214267358f0a20f27c12948"}, + {file = "black-23.9.1-cp38-cp38-macosx_10_16_universal2.whl", hash = "sha256:a732b82747235e0542c03bf352c126052c0fbc458d8a239a94701175b17d4855"}, + {file = "black-23.9.1-cp38-cp38-macosx_10_16_x86_64.whl", hash = "sha256:cf3a4d00e4cdb6734b64bf23cd4341421e8953615cba6b3670453737a72ec204"}, + {file = "black-23.9.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cf99f3de8b3273a8317681d8194ea222f10e0133a24a7548c73ce44ea1679377"}, + {file = "black-23.9.1-cp38-cp38-win_amd64.whl", hash = "sha256:14f04c990259576acd093871e7e9b14918eb28f1866f91968ff5524293f9c573"}, + {file = "black-23.9.1-cp39-cp39-macosx_10_16_arm64.whl", hash = "sha256:c619f063c2d68f19b2d7270f4cf3192cb81c9ec5bc5ba02df91471d0b88c4c5c"}, + {file = "black-23.9.1-cp39-cp39-macosx_10_16_universal2.whl", hash = "sha256:6a3b50e4b93f43b34a9d3ef00d9b6728b4a722c997c99ab09102fd5efdb88325"}, + {file = "black-23.9.1-cp39-cp39-macosx_10_16_x86_64.whl", hash = "sha256:c46767e8df1b7beefb0899c4a95fb43058fa8500b6db144f4ff3ca38eb2f6393"}, + {file = "black-23.9.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:50254ebfa56aa46a9fdd5d651f9637485068a1adf42270148cd101cdf56e0ad9"}, + {file = "black-23.9.1-cp39-cp39-win_amd64.whl", hash = "sha256:403397c033adbc45c2bd41747da1f7fc7eaa44efbee256b53842470d4ac5a70f"}, + {file = "black-23.9.1-py3-none-any.whl", hash = "sha256:6ccd59584cc834b6d127628713e4b6b968e5f79572da66284532525a042549f9"}, + {file = "black-23.9.1.tar.gz", hash = "sha256:24b6b3ff5c6d9ea08a8888f6977eae858e1f340d7260cf56d70a49823236b62d"}, +] [package.dependencies] click = ">=8.0.0" @@ -723,9 +972,12 @@ uvloop = ["uvloop (>=0.15.2)"] name = "blessed" version = "1.20.0" description = "Easy, practical library for making terminal apps, by providing an elegant, well-documented interface to Colors, Keyboard input, and screen Positioning capabilities." -category = "dev" optional = false python-versions = ">=2.7" +files = [ + {file = "blessed-1.20.0-py2.py3-none-any.whl", hash = "sha256:0c542922586a265e699188e52d5f5ac5ec0dd517e5a1041d90d2bbf23f906058"}, + {file = "blessed-1.20.0.tar.gz", hash = "sha256:2cdd67f8746e048f00df47a2880f4d6acbcdb399031b604e34ba8f71d5787680"}, +] [package.dependencies] jinxed = {version = ">=1.1.0", markers = "platform_system == \"Windows\""} @@ -736,17 +988,23 @@ wcwidth = ">=0.1.4" name = "blinker" version = "1.6.2" description = "Fast, simple object-to-object and broadcast signaling" -category = "dev" optional = false python-versions = ">=3.7" +files = [ + {file = "blinker-1.6.2-py3-none-any.whl", hash = "sha256:c3d739772abb7bc2860abf5f2ec284223d9ad5c76da018234f6f50d6f31ab1f0"}, + {file = "blinker-1.6.2.tar.gz", hash = "sha256:4afd3de66ef3a9f8067559fb7a1cbe555c17dcbe15971b05d1b625c3e7abe213"}, +] [[package]] name = "boto3" version = "1.26.161" description = "The AWS SDK for Python" -category = "main" optional = true python-versions = ">= 3.7" +files = [ + {file = "boto3-1.26.161-py3-none-any.whl", hash = "sha256:f66e5c9dbe7f34383bcf64fa6070771355c11a44dd75c7f1279f2f37e1c89183"}, + {file = "boto3-1.26.161.tar.gz", hash = "sha256:662731e464d14af1035f44fc6a46b0e3112ee011ac0a5ed416d205daa3e15f25"}, +] [package.dependencies] botocore = ">=1.29.161,<1.30.0" @@ -760,9 +1018,12 @@ crt = ["botocore[crt] (>=1.21.0,<2.0a0)"] name = "boto3-stubs" version = "1.28.40" description = "Type annotations for boto3 1.28.40 generated with mypy-boto3-builder 7.18.2" -category = "main" optional = false python-versions = ">=3.7" +files = [ + {file = "boto3-stubs-1.28.40.tar.gz", hash = "sha256:76079a82f199087319762c931f13506e02129132e80257dab0888d3da7dc11c7"}, + {file = "boto3_stubs-1.28.40-py3-none-any.whl", hash = "sha256:bd1d1cbdcbf18902a090d4a746cdecef2a7ebe31cf9a474bbe407d57eaa79a6a"}, +] [package.dependencies] botocore-stubs = "*" @@ -1137,9 +1398,12 @@ xray = ["mypy-boto3-xray (>=1.28.0,<1.29.0)"] name = "botocore" version = "1.29.161" description = "Low-level, data-driven core of boto 3." -category = "main" optional = true python-versions = ">= 3.7" +files = [ + {file = "botocore-1.29.161-py3-none-any.whl", hash = "sha256:b906999dd53dda2ef0ef6f7f55fcc81a4b06b9f1c8a9f65c546e0b981f959f5f"}, + {file = "botocore-1.29.161.tar.gz", hash = "sha256:a50edd715eb510343e27849f36483804aae4b871590db4d4996aa53368dcac40"}, +] [package.dependencies] jmespath = ">=0.7.1,<2.0.0" @@ -1153,9 +1417,12 @@ crt = ["awscrt (==0.16.9)"] name = "botocore-stubs" version = "1.31.40" description = "Type annotations and code completion for botocore" -category = "main" optional = false python-versions = ">=3.7,<4.0" +files = [ + {file = "botocore_stubs-1.31.40-py3-none-any.whl", hash = "sha256:aab534d7e7949cd543bc9b2fadc1a36712033cb00e6f31e2475eefe8486d19ae"}, + {file = "botocore_stubs-1.31.40.tar.gz", hash = "sha256:2001a253daf4ae2e171e6137b9982a00a7fbfc7a53449a16856dc049e7cd5214"}, +] [package.dependencies] types-awscrt = "*" @@ -1165,25 +1432,34 @@ typing-extensions = {version = ">=4.1.0", markers = "python_version < \"3.9\""} name = "cachelib" version = "0.9.0" description = "A collection of cache libraries in the same API interface." -category = "dev" optional = false python-versions = ">=3.7" +files = [ + {file = "cachelib-0.9.0-py3-none-any.whl", hash = "sha256:811ceeb1209d2fe51cd2b62810bd1eccf70feba5c52641532498be5c675493b3"}, + {file = "cachelib-0.9.0.tar.gz", hash = "sha256:38222cc7c1b79a23606de5c2607f4925779e37cdcea1c2ad21b8bae94b5425a5"}, +] [[package]] name = "cachetools" version = "5.3.1" description = "Extensible memoizing collections and decorators" -category = "main" optional = false python-versions = ">=3.7" +files = [ + {file = "cachetools-5.3.1-py3-none-any.whl", hash = "sha256:95ef631eeaea14ba2e36f06437f36463aac3a096799e876ee55e5cdccb102590"}, + {file = "cachetools-5.3.1.tar.gz", hash = "sha256:dce83f2d9b4e1f732a8cd44af8e8fab2dbe46201467fc98b3ef8f269092bf62b"}, +] [[package]] name = "cattrs" version = "23.1.2" description = "Composable complex class support for attrs and dataclasses." -category = "dev" optional = false python-versions = ">=3.7" +files = [ + {file = "cattrs-23.1.2-py3-none-any.whl", hash = "sha256:b2bb14311ac17bed0d58785e5a60f022e5431aca3932e3fc5cc8ed8639de50a4"}, + {file = "cattrs-23.1.2.tar.gz", hash = "sha256:db1c821b8c537382b2c7c66678c3790091ca0275ac486c76f3c8f3920e83c657"}, +] [package.dependencies] attrs = ">=20" @@ -1203,17 +1479,73 @@ ujson = ["ujson (>=5.4.0,<6.0.0)"] name = "certifi" version = "2023.7.22" description = "Python package for providing Mozilla's CA Bundle." -category = "main" optional = false python-versions = ">=3.6" - +files = [ + {file = "certifi-2023.7.22-py3-none-any.whl", hash = "sha256:92d6037539857d8206b8f6ae472e8b77db8058fec5937a1ef3f54304089edbb9"}, + {file = "certifi-2023.7.22.tar.gz", hash = "sha256:539cc1d13202e33ca466e88b2807e29f4c13049d6d87031a3c110744495cb082"}, +] + [[package]] name = "cffi" -version = "1.15.1" +version = "1.16.0" description = "Foreign Function Interface for Python calling C code." -category = "main" optional = false -python-versions = "*" +python-versions = ">=3.8" +files = [ + {file = "cffi-1.16.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:6b3d6606d369fc1da4fd8c357d026317fbb9c9b75d36dc16e90e84c26854b088"}, + {file = "cffi-1.16.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:ac0f5edd2360eea2f1daa9e26a41db02dd4b0451b48f7c318e217ee092a213e9"}, + {file = "cffi-1.16.0-cp310-cp310-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7e61e3e4fa664a8588aa25c883eab612a188c725755afff6289454d6362b9673"}, + {file = "cffi-1.16.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a72e8961a86d19bdb45851d8f1f08b041ea37d2bd8d4fd19903bc3083d80c896"}, + {file = "cffi-1.16.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5b50bf3f55561dac5438f8e70bfcdfd74543fd60df5fa5f62d94e5867deca684"}, + {file = "cffi-1.16.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:7651c50c8c5ef7bdb41108b7b8c5a83013bfaa8a935590c5d74627c047a583c7"}, + {file = "cffi-1.16.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e4108df7fe9b707191e55f33efbcb2d81928e10cea45527879a4749cbe472614"}, + {file = "cffi-1.16.0-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:32c68ef735dbe5857c810328cb2481e24722a59a2003018885514d4c09af9743"}, + {file = "cffi-1.16.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:673739cb539f8cdaa07d92d02efa93c9ccf87e345b9a0b556e3ecc666718468d"}, + {file = "cffi-1.16.0-cp310-cp310-win32.whl", hash = "sha256:9f90389693731ff1f659e55c7d1640e2ec43ff725cc61b04b2f9c6d8d017df6a"}, + {file = "cffi-1.16.0-cp310-cp310-win_amd64.whl", hash = "sha256:e6024675e67af929088fda399b2094574609396b1decb609c55fa58b028a32a1"}, + {file = "cffi-1.16.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:b84834d0cf97e7d27dd5b7f3aca7b6e9263c56308ab9dc8aae9784abb774d404"}, + {file = "cffi-1.16.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:1b8ebc27c014c59692bb2664c7d13ce7a6e9a629be20e54e7271fa696ff2b417"}, + {file = "cffi-1.16.0-cp311-cp311-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ee07e47c12890ef248766a6e55bd38ebfb2bb8edd4142d56db91b21ea68b7627"}, + {file = "cffi-1.16.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d8a9d3ebe49f084ad71f9269834ceccbf398253c9fac910c4fd7053ff1386936"}, + {file = "cffi-1.16.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e70f54f1796669ef691ca07d046cd81a29cb4deb1e5f942003f401c0c4a2695d"}, + {file = "cffi-1.16.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5bf44d66cdf9e893637896c7faa22298baebcd18d1ddb6d2626a6e39793a1d56"}, + {file = "cffi-1.16.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7b78010e7b97fef4bee1e896df8a4bbb6712b7f05b7ef630f9d1da00f6444d2e"}, + {file = "cffi-1.16.0-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:c6a164aa47843fb1b01e941d385aab7215563bb8816d80ff3a363a9f8448a8dc"}, + {file = "cffi-1.16.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:e09f3ff613345df5e8c3667da1d918f9149bd623cd9070c983c013792a9a62eb"}, + {file = "cffi-1.16.0-cp311-cp311-win32.whl", hash = "sha256:2c56b361916f390cd758a57f2e16233eb4f64bcbeee88a4881ea90fca14dc6ab"}, + {file = "cffi-1.16.0-cp311-cp311-win_amd64.whl", hash = "sha256:db8e577c19c0fda0beb7e0d4e09e0ba74b1e4c092e0e40bfa12fe05b6f6d75ba"}, + {file = "cffi-1.16.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:fa3a0128b152627161ce47201262d3140edb5a5c3da88d73a1b790a959126956"}, + {file = "cffi-1.16.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:68e7c44931cc171c54ccb702482e9fc723192e88d25a0e133edd7aff8fcd1f6e"}, + {file = "cffi-1.16.0-cp312-cp312-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:abd808f9c129ba2beda4cfc53bde801e5bcf9d6e0f22f095e45327c038bfe68e"}, + {file = "cffi-1.16.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:88e2b3c14bdb32e440be531ade29d3c50a1a59cd4e51b1dd8b0865c54ea5d2e2"}, + {file = "cffi-1.16.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:fcc8eb6d5902bb1cf6dc4f187ee3ea80a1eba0a89aba40a5cb20a5087d961357"}, + {file = "cffi-1.16.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b7be2d771cdba2942e13215c4e340bfd76398e9227ad10402a8767ab1865d2e6"}, + {file = "cffi-1.16.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e715596e683d2ce000574bae5d07bd522c781a822866c20495e52520564f0969"}, + {file = "cffi-1.16.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:2d92b25dbf6cae33f65005baf472d2c245c050b1ce709cc4588cdcdd5495b520"}, + {file = "cffi-1.16.0-cp312-cp312-win32.whl", hash = "sha256:b2ca4e77f9f47c55c194982e10f058db063937845bb2b7a86c84a6cfe0aefa8b"}, + {file = "cffi-1.16.0-cp312-cp312-win_amd64.whl", hash = "sha256:68678abf380b42ce21a5f2abde8efee05c114c2fdb2e9eef2efdb0257fba1235"}, + {file = "cffi-1.16.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:0c9ef6ff37e974b73c25eecc13952c55bceed9112be2d9d938ded8e856138bcc"}, + {file = "cffi-1.16.0-cp38-cp38-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a09582f178759ee8128d9270cd1344154fd473bb77d94ce0aeb2a93ebf0feaf0"}, + {file = "cffi-1.16.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e760191dd42581e023a68b758769e2da259b5d52e3103c6060ddc02c9edb8d7b"}, + {file = "cffi-1.16.0-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:80876338e19c951fdfed6198e70bc88f1c9758b94578d5a7c4c91a87af3cf31c"}, + {file = "cffi-1.16.0-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a6a14b17d7e17fa0d207ac08642c8820f84f25ce17a442fd15e27ea18d67c59b"}, + {file = "cffi-1.16.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6602bc8dc6f3a9e02b6c22c4fc1e47aa50f8f8e6d3f78a5e16ac33ef5fefa324"}, + {file = "cffi-1.16.0-cp38-cp38-win32.whl", hash = "sha256:131fd094d1065b19540c3d72594260f118b231090295d8c34e19a7bbcf2e860a"}, + {file = "cffi-1.16.0-cp38-cp38-win_amd64.whl", hash = "sha256:31d13b0f99e0836b7ff893d37af07366ebc90b678b6664c955b54561fc36ef36"}, + {file = "cffi-1.16.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:582215a0e9adbe0e379761260553ba11c58943e4bbe9c36430c4ca6ac74b15ed"}, + {file = "cffi-1.16.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:b29ebffcf550f9da55bec9e02ad430c992a87e5f512cd63388abb76f1036d8d2"}, + {file = "cffi-1.16.0-cp39-cp39-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:dc9b18bf40cc75f66f40a7379f6a9513244fe33c0e8aa72e2d56b0196a7ef872"}, + {file = "cffi-1.16.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9cb4a35b3642fc5c005a6755a5d17c6c8b6bcb6981baf81cea8bfbc8903e8ba8"}, + {file = "cffi-1.16.0-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b86851a328eedc692acf81fb05444bdf1891747c25af7529e39ddafaf68a4f3f"}, + {file = "cffi-1.16.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c0f31130ebc2d37cdd8e44605fb5fa7ad59049298b3f745c74fa74c62fbfcfc4"}, + {file = "cffi-1.16.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8f8e709127c6c77446a8c0a8c8bf3c8ee706a06cd44b1e827c3e6a2ee6b8c098"}, + {file = "cffi-1.16.0-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:748dcd1e3d3d7cd5443ef03ce8685043294ad6bd7c02a38d1bd367cfd968e000"}, + {file = "cffi-1.16.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:8895613bcc094d4a1b2dbe179d88d7fb4a15cee43c052e8885783fac397d91fe"}, + {file = "cffi-1.16.0-cp39-cp39-win32.whl", hash = "sha256:ed86a35631f7bfbb28e108dd96773b9d5a6ce4811cf6ea468bb6a359b256b1e4"}, + {file = "cffi-1.16.0-cp39-cp39-win_amd64.whl", hash = "sha256:3686dffb02459559c74dd3d81748269ffb0eb027c39a6fc99502de37d501faa8"}, + {file = "cffi-1.16.0.tar.gz", hash = "sha256:bcb3ef43e58665bbda2fb198698fcae6776483e0c4a631aa5647806c25e02cc0"}, +] [package.dependencies] pycparser = "*" @@ -1222,25 +1554,107 @@ pycparser = "*" name = "chardet" version = "5.2.0" description = "Universal encoding detector for Python 3" -category = "dev" optional = false python-versions = ">=3.7" +files = [ + {file = "chardet-5.2.0-py3-none-any.whl", hash = "sha256:e1cf59446890a00105fe7b7912492ea04b6e6f06d4b742b2c788469e34c82970"}, + {file = "chardet-5.2.0.tar.gz", hash = "sha256:1b3b6ff479a8c414bc3fa2c0852995695c4a026dcd6d0633b2dd092ca39c1cf7"}, +] [[package]] name = "charset-normalizer" version = "3.2.0" description = "The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet." -category = "main" optional = false python-versions = ">=3.7.0" +files = [ + {file = "charset-normalizer-3.2.0.tar.gz", hash = "sha256:3bb3d25a8e6c0aedd251753a79ae98a093c7e7b471faa3aa9a93a81431987ace"}, + {file = "charset_normalizer-3.2.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:0b87549028f680ca955556e3bd57013ab47474c3124dc069faa0b6545b6c9710"}, + {file = "charset_normalizer-3.2.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:7c70087bfee18a42b4040bb9ec1ca15a08242cf5867c58726530bdf3945672ed"}, + {file = "charset_normalizer-3.2.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:a103b3a7069b62f5d4890ae1b8f0597618f628b286b03d4bc9195230b154bfa9"}, + {file = "charset_normalizer-3.2.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:94aea8eff76ee6d1cdacb07dd2123a68283cb5569e0250feab1240058f53b623"}, + {file = "charset_normalizer-3.2.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:db901e2ac34c931d73054d9797383d0f8009991e723dab15109740a63e7f902a"}, + {file = "charset_normalizer-3.2.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b0dac0ff919ba34d4df1b6131f59ce95b08b9065233446be7e459f95554c0dc8"}, + {file = "charset_normalizer-3.2.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:193cbc708ea3aca45e7221ae58f0fd63f933753a9bfb498a3b474878f12caaad"}, + {file = "charset_normalizer-3.2.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:09393e1b2a9461950b1c9a45d5fd251dc7c6f228acab64da1c9c0165d9c7765c"}, + {file = "charset_normalizer-3.2.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:baacc6aee0b2ef6f3d308e197b5d7a81c0e70b06beae1f1fcacffdbd124fe0e3"}, + {file = "charset_normalizer-3.2.0-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:bf420121d4c8dce6b889f0e8e4ec0ca34b7f40186203f06a946fa0276ba54029"}, + {file = "charset_normalizer-3.2.0-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:c04a46716adde8d927adb9457bbe39cf473e1e2c2f5d0a16ceb837e5d841ad4f"}, + {file = "charset_normalizer-3.2.0-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:aaf63899c94de41fe3cf934601b0f7ccb6b428c6e4eeb80da72c58eab077b19a"}, + {file = "charset_normalizer-3.2.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:d62e51710986674142526ab9f78663ca2b0726066ae26b78b22e0f5e571238dd"}, + {file = "charset_normalizer-3.2.0-cp310-cp310-win32.whl", hash = "sha256:04e57ab9fbf9607b77f7d057974694b4f6b142da9ed4a199859d9d4d5c63fe96"}, + {file = "charset_normalizer-3.2.0-cp310-cp310-win_amd64.whl", hash = "sha256:48021783bdf96e3d6de03a6e39a1171ed5bd7e8bb93fc84cc649d11490f87cea"}, + {file = "charset_normalizer-3.2.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:4957669ef390f0e6719db3613ab3a7631e68424604a7b448f079bee145da6e09"}, + {file = "charset_normalizer-3.2.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:46fb8c61d794b78ec7134a715a3e564aafc8f6b5e338417cb19fe9f57a5a9bf2"}, + {file = "charset_normalizer-3.2.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:f779d3ad205f108d14e99bb3859aa7dd8e9c68874617c72354d7ecaec2a054ac"}, + {file = "charset_normalizer-3.2.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f25c229a6ba38a35ae6e25ca1264621cc25d4d38dca2942a7fce0b67a4efe918"}, + {file = "charset_normalizer-3.2.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:2efb1bd13885392adfda4614c33d3b68dee4921fd0ac1d3988f8cbb7d589e72a"}, + {file = "charset_normalizer-3.2.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:1f30b48dd7fa1474554b0b0f3fdfdd4c13b5c737a3c6284d3cdc424ec0ffff3a"}, + {file = "charset_normalizer-3.2.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:246de67b99b6851627d945db38147d1b209a899311b1305dd84916f2b88526c6"}, + {file = "charset_normalizer-3.2.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:9bd9b3b31adcb054116447ea22caa61a285d92e94d710aa5ec97992ff5eb7cf3"}, + {file = "charset_normalizer-3.2.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:8c2f5e83493748286002f9369f3e6607c565a6a90425a3a1fef5ae32a36d749d"}, + {file = "charset_normalizer-3.2.0-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:3170c9399da12c9dc66366e9d14da8bf7147e1e9d9ea566067bbce7bb74bd9c2"}, + {file = "charset_normalizer-3.2.0-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:7a4826ad2bd6b07ca615c74ab91f32f6c96d08f6fcc3902ceeedaec8cdc3bcd6"}, + {file = "charset_normalizer-3.2.0-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:3b1613dd5aee995ec6d4c69f00378bbd07614702a315a2cf6c1d21461fe17c23"}, + {file = "charset_normalizer-3.2.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:9e608aafdb55eb9f255034709e20d5a83b6d60c054df0802fa9c9883d0a937aa"}, + {file = "charset_normalizer-3.2.0-cp311-cp311-win32.whl", hash = "sha256:f2a1d0fd4242bd8643ce6f98927cf9c04540af6efa92323e9d3124f57727bfc1"}, + {file = "charset_normalizer-3.2.0-cp311-cp311-win_amd64.whl", hash = "sha256:681eb3d7e02e3c3655d1b16059fbfb605ac464c834a0c629048a30fad2b27489"}, + {file = "charset_normalizer-3.2.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:c57921cda3a80d0f2b8aec7e25c8aa14479ea92b5b51b6876d975d925a2ea346"}, + {file = "charset_normalizer-3.2.0-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:41b25eaa7d15909cf3ac4c96088c1f266a9a93ec44f87f1d13d4a0e86c81b982"}, + {file = "charset_normalizer-3.2.0-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f058f6963fd82eb143c692cecdc89e075fa0828db2e5b291070485390b2f1c9c"}, + {file = "charset_normalizer-3.2.0-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a7647ebdfb9682b7bb97e2a5e7cb6ae735b1c25008a70b906aecca294ee96cf4"}, + {file = "charset_normalizer-3.2.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:eef9df1eefada2c09a5e7a40991b9fc6ac6ef20b1372abd48d2794a316dc0449"}, + {file = "charset_normalizer-3.2.0-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e03b8895a6990c9ab2cdcd0f2fe44088ca1c65ae592b8f795c3294af00a461c3"}, + {file = "charset_normalizer-3.2.0-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:ee4006268ed33370957f55bf2e6f4d263eaf4dc3cfc473d1d90baff6ed36ce4a"}, + {file = "charset_normalizer-3.2.0-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:c4983bf937209c57240cff65906b18bb35e64ae872da6a0db937d7b4af845dd7"}, + {file = "charset_normalizer-3.2.0-cp37-cp37m-musllinux_1_1_ppc64le.whl", hash = "sha256:3bb7fda7260735efe66d5107fb7e6af6a7c04c7fce9b2514e04b7a74b06bf5dd"}, + {file = "charset_normalizer-3.2.0-cp37-cp37m-musllinux_1_1_s390x.whl", hash = "sha256:72814c01533f51d68702802d74f77ea026b5ec52793c791e2da806a3844a46c3"}, + {file = "charset_normalizer-3.2.0-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:70c610f6cbe4b9fce272c407dd9d07e33e6bf7b4aa1b7ffb6f6ded8e634e3592"}, + {file = "charset_normalizer-3.2.0-cp37-cp37m-win32.whl", hash = "sha256:a401b4598e5d3f4a9a811f3daf42ee2291790c7f9d74b18d75d6e21dda98a1a1"}, + {file = "charset_normalizer-3.2.0-cp37-cp37m-win_amd64.whl", hash = "sha256:c0b21078a4b56965e2b12f247467b234734491897e99c1d51cee628da9786959"}, + {file = "charset_normalizer-3.2.0-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:95eb302ff792e12aba9a8b8f8474ab229a83c103d74a750ec0bd1c1eea32e669"}, + {file = "charset_normalizer-3.2.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:1a100c6d595a7f316f1b6f01d20815d916e75ff98c27a01ae817439ea7726329"}, + {file = "charset_normalizer-3.2.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:6339d047dab2780cc6220f46306628e04d9750f02f983ddb37439ca47ced7149"}, + {file = "charset_normalizer-3.2.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e4b749b9cc6ee664a3300bb3a273c1ca8068c46be705b6c31cf5d276f8628a94"}, + {file = "charset_normalizer-3.2.0-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a38856a971c602f98472050165cea2cdc97709240373041b69030be15047691f"}, + {file = "charset_normalizer-3.2.0-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f87f746ee241d30d6ed93969de31e5ffd09a2961a051e60ae6bddde9ec3583aa"}, + {file = "charset_normalizer-3.2.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:89f1b185a01fe560bc8ae5f619e924407efca2191b56ce749ec84982fc59a32a"}, + {file = "charset_normalizer-3.2.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e1c8a2f4c69e08e89632defbfabec2feb8a8d99edc9f89ce33c4b9e36ab63037"}, + {file = "charset_normalizer-3.2.0-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:2f4ac36d8e2b4cc1aa71df3dd84ff8efbe3bfb97ac41242fbcfc053c67434f46"}, + {file = "charset_normalizer-3.2.0-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:a386ebe437176aab38c041de1260cd3ea459c6ce5263594399880bbc398225b2"}, + {file = "charset_normalizer-3.2.0-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:ccd16eb18a849fd8dcb23e23380e2f0a354e8daa0c984b8a732d9cfaba3a776d"}, + {file = "charset_normalizer-3.2.0-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:e6a5bf2cba5ae1bb80b154ed68a3cfa2fa00fde979a7f50d6598d3e17d9ac20c"}, + {file = "charset_normalizer-3.2.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:45de3f87179c1823e6d9e32156fb14c1927fcc9aba21433f088fdfb555b77c10"}, + {file = "charset_normalizer-3.2.0-cp38-cp38-win32.whl", hash = "sha256:1000fba1057b92a65daec275aec30586c3de2401ccdcd41f8a5c1e2c87078706"}, + {file = "charset_normalizer-3.2.0-cp38-cp38-win_amd64.whl", hash = "sha256:8b2c760cfc7042b27ebdb4a43a4453bd829a5742503599144d54a032c5dc7e9e"}, + {file = "charset_normalizer-3.2.0-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:855eafa5d5a2034b4621c74925d89c5efef61418570e5ef9b37717d9c796419c"}, + {file = "charset_normalizer-3.2.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:203f0c8871d5a7987be20c72442488a0b8cfd0f43b7973771640fc593f56321f"}, + {file = "charset_normalizer-3.2.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:e857a2232ba53ae940d3456f7533ce6ca98b81917d47adc3c7fd55dad8fab858"}, + {file = "charset_normalizer-3.2.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5e86d77b090dbddbe78867a0275cb4df08ea195e660f1f7f13435a4649e954e5"}, + {file = "charset_normalizer-3.2.0-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c4fb39a81950ec280984b3a44f5bd12819953dc5fa3a7e6fa7a80db5ee853952"}, + {file = "charset_normalizer-3.2.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2dee8e57f052ef5353cf608e0b4c871aee320dd1b87d351c28764fc0ca55f9f4"}, + {file = "charset_normalizer-3.2.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8700f06d0ce6f128de3ccdbc1acaea1ee264d2caa9ca05daaf492fde7c2a7200"}, + {file = "charset_normalizer-3.2.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1920d4ff15ce893210c1f0c0e9d19bfbecb7983c76b33f046c13a8ffbd570252"}, + {file = "charset_normalizer-3.2.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:c1c76a1743432b4b60ab3358c937a3fe1341c828ae6194108a94c69028247f22"}, + {file = "charset_normalizer-3.2.0-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:f7560358a6811e52e9c4d142d497f1a6e10103d3a6881f18d04dbce3729c0e2c"}, + {file = "charset_normalizer-3.2.0-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:c8063cf17b19661471ecbdb3df1c84f24ad2e389e326ccaf89e3fb2484d8dd7e"}, + {file = "charset_normalizer-3.2.0-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:cd6dbe0238f7743d0efe563ab46294f54f9bc8f4b9bcf57c3c666cc5bc9d1299"}, + {file = "charset_normalizer-3.2.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:1249cbbf3d3b04902ff081ffbb33ce3377fa6e4c7356f759f3cd076cc138d020"}, + {file = "charset_normalizer-3.2.0-cp39-cp39-win32.whl", hash = "sha256:6c409c0deba34f147f77efaa67b8e4bb83d2f11c8806405f76397ae5b8c0d1c9"}, + {file = "charset_normalizer-3.2.0-cp39-cp39-win_amd64.whl", hash = "sha256:7095f6fbfaa55defb6b733cfeb14efaae7a29f0b59d8cf213be4e7ca0b857b80"}, + {file = "charset_normalizer-3.2.0-py3-none-any.whl", hash = "sha256:8e098148dd37b4ce3baca71fb394c81dc5d9c7728c95df695d2dca218edf40e6"}, +] [[package]] name = "click" version = "8.1.7" description = "Composable command line interface toolkit" -category = "main" optional = false python-versions = ">=3.7" +files = [ + {file = "click-8.1.7-py3-none-any.whl", hash = "sha256:ae74fb96c20a0277a1d615f1e4d73c8414f5a98db8b799a7931d1582f3390c28"}, + {file = "click-8.1.7.tar.gz", hash = "sha256:ca9853ad459e787e2192211578cc907e7594e294c7ccc834310722b41b9ca6de"}, +] [package.dependencies] colorama = {version = "*", markers = "platform_system == \"Windows\""} @@ -1249,9 +1663,12 @@ colorama = {version = "*", markers = "platform_system == \"Windows\""} name = "clickclick" version = "20.10.2" description = "Click utility functions" -category = "dev" optional = false python-versions = "*" +files = [ + {file = "clickclick-20.10.2-py2.py3-none-any.whl", hash = "sha256:c8f33e6d9ec83f68416dd2136a7950125bd256ec39ccc9a85c6e280a16be2bb5"}, + {file = "clickclick-20.10.2.tar.gz", hash = "sha256:4efb13e62353e34c5eef7ed6582c4920b418d7dedc86d819e22ee089ba01802c"}, +] [package.dependencies] click = ">=4.0" @@ -1261,17 +1678,23 @@ PyYAML = ">=3.11" name = "colorama" version = "0.4.6" description = "Cross-platform colored terminal text." -category = "main" optional = false python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7" +files = [ + {file = "colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6"}, + {file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"}, +] [[package]] name = "coloredlogs" version = "15.0.1" description = "Colored terminal output for Python's logging module" -category = "main" optional = true python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" +files = [ + {file = "coloredlogs-15.0.1-py2.py3-none-any.whl", hash = "sha256:612ee75c546f53e92e70049c9dbfcc18c935a2b9a53b66085ce9ef6a6e5c0934"}, + {file = "coloredlogs-15.0.1.tar.gz", hash = "sha256:7c991aa71a4577af2f82600d8f8f3a89f936baeaf9b50a9c197da014e5bf16b0"}, +] [package.dependencies] humanfriendly = ">=9.1" @@ -1283,9 +1706,12 @@ cron = ["capturer (>=2.4)"] name = "colorlog" version = "4.8.0" description = "Log formatting with colors!" -category = "dev" optional = false python-versions = "*" +files = [ + {file = "colorlog-4.8.0-py2.py3-none-any.whl", hash = "sha256:3dd15cb27e8119a24c1a7b5c93f9f3b455855e0f73993b1c25921b2f646f1dcd"}, + {file = "colorlog-4.8.0.tar.gz", hash = "sha256:59b53160c60902c405cdec28d38356e09d40686659048893e026ecbd589516b1"}, +] [package.dependencies] colorama = {version = "*", markers = "sys_platform == \"win32\""} @@ -1294,9 +1720,12 @@ colorama = {version = "*", markers = "sys_platform == \"win32\""} name = "configupdater" version = "3.1.1" description = "Parser like ConfigParser but for updating configuration files" -category = "dev" optional = false python-versions = ">=3.6" +files = [ + {file = "ConfigUpdater-3.1.1-py2.py3-none-any.whl", hash = "sha256:805986dbeba317886c7a8d348b2e34986dc9e3128cd3761ecc35decbd372b286"}, + {file = "ConfigUpdater-3.1.1.tar.gz", hash = "sha256:46f0c74d73efa723776764b43c9739f68052495dd3d734319c1d0eb58511f15b"}, +] [package.extras] testing = ["flake8", "pytest", "pytest-cov", "pytest-virtualenv", "pytest-xdist", "sphinx"] @@ -1305,17 +1734,37 @@ testing = ["flake8", "pytest", "pytest-cov", "pytest-virtualenv", "pytest-xdist" name = "connectorx" version = "0.3.2" description = "" -category = "dev" optional = false python-versions = "*" +files = [ + {file = "connectorx-0.3.2-cp310-cp310-macosx_10_7_x86_64.whl", hash = "sha256:98274242c64a2831a8b1c86e0fa2c46a557dd8cbcf00c3adcf5a602455fb02d7"}, + {file = "connectorx-0.3.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:e2b11ba49efd330a7348bef3ce09c98218eea21d92a12dd75cd8f0ade5c99ffc"}, + {file = "connectorx-0.3.2-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:3f6431a30304271f9137bd7854d2850231041f95164c6b749d9ede4c0d92d10c"}, + {file = "connectorx-0.3.2-cp310-none-win_amd64.whl", hash = "sha256:b370ebe8f44d2049254dd506f17c62322cc2db1b782a57f22cce01ddcdcc8fed"}, + {file = "connectorx-0.3.2-cp311-cp311-macosx_10_7_x86_64.whl", hash = "sha256:d5277fc936a80da3d1dcf889020e45da3493179070d9be8a47500c7001fab967"}, + {file = "connectorx-0.3.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:8cc6c963237c3d3b02f7dcd47e1be9fc6e8b93ef0aeed8694f65c62b3c4688a1"}, + {file = "connectorx-0.3.2-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:9403902685b3423cba786db01a36f36efef90ae3d429e45b74dadb4ae9e328dc"}, + {file = "connectorx-0.3.2-cp311-none-win_amd64.whl", hash = "sha256:6b5f518194a2cf12d5ad031d488ded4e4678eff3b63551856f2a6f1a83197bb8"}, + {file = "connectorx-0.3.2-cp38-cp38-macosx_10_7_x86_64.whl", hash = "sha256:a5602ae0531e55c58af8cfca92b8e9454fc1ccd82c801cff8ee0f17c728b4988"}, + {file = "connectorx-0.3.2-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:7c5959bfb4a049bb8ce1f590b5824cd1105460b6552ffec336c4bd740eebd5bd"}, + {file = "connectorx-0.3.2-cp38-cp38-manylinux_2_28_x86_64.whl", hash = "sha256:c4387bb27ba3acde0ab6921fdafa3811e09fce0db3d1f1ede8547d9de3aab685"}, + {file = "connectorx-0.3.2-cp38-none-win_amd64.whl", hash = "sha256:4b1920c191be9a372629c31c92d5f71fc63f49f283e5adfc4111169de40427d9"}, + {file = "connectorx-0.3.2-cp39-cp39-macosx_10_7_x86_64.whl", hash = "sha256:4473fc06ac3618c673cea63a7050e721fe536782d5c1b6e433589c37a63de704"}, + {file = "connectorx-0.3.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:4009b16399457340326137a223921a24e3e166b45db4dbf3ef637b9981914dc2"}, + {file = "connectorx-0.3.2-cp39-cp39-manylinux_2_28_x86_64.whl", hash = "sha256:74f5b93535663cf47f9fc3d7964f93e652c07003fa71c38d7a68f42167f54bba"}, + {file = "connectorx-0.3.2-cp39-none-win_amd64.whl", hash = "sha256:0b80acca13326856c14ee726b47699011ab1baa10897180240c8783423ca5e8c"}, +] [[package]] name = "connexion" version = "2.14.1" description = "Connexion - API first applications with OpenAPI/Swagger and Flask" -category = "dev" optional = false python-versions = ">=3.6" +files = [ + {file = "connexion-2.14.1-py2.py3-none-any.whl", hash = "sha256:f343717241b4c4802a694c38fee66fb1693c897fe4ea5a957fa9b3b07caf6394"}, + {file = "connexion-2.14.1.tar.gz", hash = "sha256:99aa5781e70a7b94f8ffae8cf89f309d49cdb811bbd65a8e2f2546f3b19a01e6"}, +] [package.dependencies] clickclick = ">=1.2,<21" @@ -1339,9 +1788,11 @@ tests = ["MarkupSafe (>=0.23)", "aiohttp (>=2.3.10,<4)", "aiohttp-jinja2 (>=0.14 name = "cron-descriptor" version = "1.4.0" description = "A Python library that converts cron expressions into human readable strings." -category = "main" optional = false python-versions = "*" +files = [ + {file = "cron_descriptor-1.4.0.tar.gz", hash = "sha256:b6ff4e3a988d7ca04a4ab150248e9f166fb7a5c828a85090e75bcc25aa93b4dd"}, +] [package.extras] dev = ["polib"] @@ -1350,9 +1801,12 @@ dev = ["polib"] name = "croniter" version = "1.4.1" description = "croniter provides iteration for datetime object with cron like format" -category = "dev" optional = false python-versions = ">=2.6, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" +files = [ + {file = "croniter-1.4.1-py2.py3-none-any.whl", hash = "sha256:9595da48af37ea06ec3a9f899738f1b2c1c13da3c38cea606ef7cd03ea421128"}, + {file = "croniter-1.4.1.tar.gz", hash = "sha256:1a6df60eacec3b7a0aa52a8f2ef251ae3dd2a7c7c8b9874e73e791636d55a361"}, +] [package.dependencies] python-dateutil = "*" @@ -1361,9 +1815,33 @@ python-dateutil = "*" name = "cryptography" version = "41.0.7" description = "cryptography is a package which provides cryptographic recipes and primitives to Python developers." -category = "main" optional = false python-versions = ">=3.7" +files = [ + {file = "cryptography-41.0.7-cp37-abi3-macosx_10_12_universal2.whl", hash = "sha256:3c78451b78313fa81607fa1b3f1ae0a5ddd8014c38a02d9db0616133987b9cdf"}, + {file = "cryptography-41.0.7-cp37-abi3-macosx_10_12_x86_64.whl", hash = "sha256:928258ba5d6f8ae644e764d0f996d61a8777559f72dfeb2eea7e2fe0ad6e782d"}, + {file = "cryptography-41.0.7-cp37-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5a1b41bc97f1ad230a41657d9155113c7521953869ae57ac39ac7f1bb471469a"}, + {file = "cryptography-41.0.7-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:841df4caa01008bad253bce2a6f7b47f86dc9f08df4b433c404def869f590a15"}, + {file = "cryptography-41.0.7-cp37-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:5429ec739a29df2e29e15d082f1d9ad683701f0ec7709ca479b3ff2708dae65a"}, + {file = "cryptography-41.0.7-cp37-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:43f2552a2378b44869fe8827aa19e69512e3245a219104438692385b0ee119d1"}, + {file = "cryptography-41.0.7-cp37-abi3-musllinux_1_1_aarch64.whl", hash = "sha256:af03b32695b24d85a75d40e1ba39ffe7db7ffcb099fe507b39fd41a565f1b157"}, + {file = "cryptography-41.0.7-cp37-abi3-musllinux_1_1_x86_64.whl", hash = "sha256:49f0805fc0b2ac8d4882dd52f4a3b935b210935d500b6b805f321addc8177406"}, + {file = "cryptography-41.0.7-cp37-abi3-win32.whl", hash = "sha256:f983596065a18a2183e7f79ab3fd4c475205b839e02cbc0efbbf9666c4b3083d"}, + {file = "cryptography-41.0.7-cp37-abi3-win_amd64.whl", hash = "sha256:90452ba79b8788fa380dfb587cca692976ef4e757b194b093d845e8d99f612f2"}, + {file = "cryptography-41.0.7-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:079b85658ea2f59c4f43b70f8119a52414cdb7be34da5d019a77bf96d473b960"}, + {file = "cryptography-41.0.7-pp310-pypy310_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:b640981bf64a3e978a56167594a0e97db71c89a479da8e175d8bb5be5178c003"}, + {file = "cryptography-41.0.7-pp310-pypy310_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:e3114da6d7f95d2dee7d3f4eec16dacff819740bbab931aff8648cb13c5ff5e7"}, + {file = "cryptography-41.0.7-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:d5ec85080cce7b0513cfd233914eb8b7bbd0633f1d1703aa28d1dd5a72f678ec"}, + {file = "cryptography-41.0.7-pp38-pypy38_pp73-macosx_10_12_x86_64.whl", hash = "sha256:7a698cb1dac82c35fcf8fe3417a3aaba97de16a01ac914b89a0889d364d2f6be"}, + {file = "cryptography-41.0.7-pp38-pypy38_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:37a138589b12069efb424220bf78eac59ca68b95696fc622b6ccc1c0a197204a"}, + {file = "cryptography-41.0.7-pp38-pypy38_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:68a2dec79deebc5d26d617bfdf6e8aab065a4f34934b22d3b5010df3ba36612c"}, + {file = "cryptography-41.0.7-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:09616eeaef406f99046553b8a40fbf8b1e70795a91885ba4c96a70793de5504a"}, + {file = "cryptography-41.0.7-pp39-pypy39_pp73-macosx_10_12_x86_64.whl", hash = "sha256:48a0476626da912a44cc078f9893f292f0b3e4c739caf289268168d8f4702a39"}, + {file = "cryptography-41.0.7-pp39-pypy39_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:c7f3201ec47d5207841402594f1d7950879ef890c0c495052fa62f58283fde1a"}, + {file = "cryptography-41.0.7-pp39-pypy39_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:c5ca78485a255e03c32b513f8c2bc39fedb7f5c5f8535545bdc223a03b24f248"}, + {file = "cryptography-41.0.7-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:d6c391c021ab1f7a82da5d8d0b3cee2f4b2c455ec86c8aebbc84837a631ff309"}, + {file = "cryptography-41.0.7.tar.gz", hash = "sha256:13f93ce9bea8016c253b34afc6bd6a75993e5c40672ed5405a9c832f0d4a00bc"}, +] [package.dependencies] cffi = ">=1.12" @@ -1382,9 +1860,12 @@ test-randomorder = ["pytest-randomly"] name = "databind-core" version = "4.4.0" description = "Databind is a library inspired by jackson-databind to de-/serialize Python dataclasses. Compatible with Python 3.7 and newer." -category = "dev" optional = false python-versions = ">=3.6.3,<4.0.0" +files = [ + {file = "databind.core-4.4.0-py3-none-any.whl", hash = "sha256:3c8a4d9abc93e158af9931d8cec389ddfc0514e02aec03b397948d243db11881"}, + {file = "databind.core-4.4.0.tar.gz", hash = "sha256:715d485e934c073f819f0250bbfcaf59c1319f83427365bc7cfd4c347f87576d"}, +] [package.dependencies] Deprecated = ">=1.2.12,<2.0.0" @@ -1397,9 +1878,12 @@ typing-extensions = ">=3.10.0" name = "databind-json" version = "4.4.0" description = "De-/serialize Python dataclasses to or from JSON payloads. Compatible with Python 3.7 and newer." -category = "dev" optional = false python-versions = ">=3.6.3,<4.0.0" +files = [ + {file = "databind.json-4.4.0-py3-none-any.whl", hash = "sha256:df8874118cfba6fd0e77ec3d41a87e04e26034bd545230cab0db1fe904bf1b09"}, + {file = "databind.json-4.4.0.tar.gz", hash = "sha256:4356afdf0aeefcc053eda0888650c59cc558be2686f08a58324d675ccd023586"}, +] [package.dependencies] "databind.core" = ">=4.4.0,<5.0.0" @@ -1411,9 +1895,12 @@ typing-extensions = ">=3.10.0" name = "databricks-sdk" version = "0.17.0" description = "Databricks SDK for Python (Beta)" -category = "main" optional = true python-versions = ">=3.7" +files = [ + {file = "databricks-sdk-0.17.0.tar.gz", hash = "sha256:0a1baa6783aba9b034b9a017da8d0cf839ec61ae8318792b78bfb3db0374dd9c"}, + {file = "databricks_sdk-0.17.0-py3-none-any.whl", hash = "sha256:ad90e01c7b1a9d60a3de6a35606c79ac982e8972d3ad3ff89c251c24439c8bb9"}, +] [package.dependencies] google-auth = ">=2.0,<3.0" @@ -1427,9 +1914,12 @@ notebook = ["ipython (>=8,<9)", "ipywidgets (>=8,<9)"] name = "databricks-sql-connector" version = "2.9.3" description = "Databricks SQL Connector for Python" -category = "main" optional = true python-versions = ">=3.7.1,<4.0.0" +files = [ + {file = "databricks_sql_connector-2.9.3-py3-none-any.whl", hash = "sha256:e37b5aa8bea22e84a9920e87ad9ba6cafbe656008c180a790baa53b711dd9889"}, + {file = "databricks_sql_connector-2.9.3.tar.gz", hash = "sha256:09a1686de3470091e78640de276053d4e18f8c03ba3627ed45b368f78bf87db9"}, +] [package.dependencies] alembic = ">=1.0.11,<2.0.0" @@ -1454,9 +1944,12 @@ urllib3 = ">=1.0" name = "dbt-athena-community" version = "1.7.1" description = "The athena adapter plugin for dbt (data build tool)" -category = "main" optional = true python-versions = ">=3.8" +files = [ + {file = "dbt-athena-community-1.7.1.tar.gz", hash = "sha256:02c7bc461628e2adbfaf9d3f51fbe9a5cb5e06ee2ea8329259758518ceafdc12"}, + {file = "dbt_athena_community-1.7.1-py3-none-any.whl", hash = "sha256:2a376fa128e2bd98cb774fcbf718ebe4fbc9cac7857aa037b9e36bec75448361"}, +] [package.dependencies] boto3 = ">=1.26,<2.0" @@ -1471,9 +1964,12 @@ tenacity = ">=8.2,<9.0" name = "dbt-bigquery" version = "1.7.2" description = "The Bigquery adapter plugin for dbt" -category = "main" optional = true python-versions = ">=3.8" +files = [ + {file = "dbt-bigquery-1.7.2.tar.gz", hash = "sha256:27c7f492f65ab5d1d43432a4467a436fc3637e3cb72c5b4ab07ddf7573c43596"}, + {file = "dbt_bigquery-1.7.2-py3-none-any.whl", hash = "sha256:75015755363d9e8b8cebe190d59a5e08375032b37bcfec41ec8753e7dea29f6e"}, +] [package.dependencies] dbt-core = ">=1.7.0,<1.8.0" @@ -1486,9 +1982,12 @@ google-cloud-storage = ">=2.4,<3.0" name = "dbt-core" version = "1.7.4" description = "With dbt, data analysts and engineers can build analytics the way engineers build applications." -category = "main" optional = false python-versions = ">=3.8" +files = [ + {file = "dbt-core-1.7.4.tar.gz", hash = "sha256:769b95949210cb0d1eafdb7be48b01e59984650403f86510fdee65bd0f70f76d"}, + {file = "dbt_core-1.7.4-py3-none-any.whl", hash = "sha256:50050ae44fe9bad63e1b639810ed3629822cdc7a2af0eff6e08461c94c4527c0"}, +] [package.dependencies] agate = ">=1.7.0,<1.8.0" @@ -1519,9 +2018,12 @@ urllib3 = ">=1.0,<2.0" name = "dbt-databricks" version = "1.7.3" description = "The Databricks adapter plugin for dbt" -category = "main" optional = true python-versions = ">=3.8" +files = [ + {file = "dbt-databricks-1.7.3.tar.gz", hash = "sha256:045e26240c825342259a59004c2e35e7773b0b6cbb255e6896bd46d3810f9607"}, + {file = "dbt_databricks-1.7.3-py3-none-any.whl", hash = "sha256:7c2b7bd7228a401d8262781749fc496c825fe6050e661e5ab3f1c66343e311cc"}, +] [package.dependencies] databricks-sdk = ">=0.9.0" @@ -1533,9 +2035,12 @@ keyring = ">=23.13.0" name = "dbt-duckdb" version = "1.7.1" description = "The duckdb adapter plugin for dbt (data build tool)" -category = "main" optional = false python-versions = ">=3.8" +files = [ + {file = "dbt-duckdb-1.7.1.tar.gz", hash = "sha256:e59b3e58d7a461988d000892b75ce95245cdf899c847e3a430eb2e9e10e63bb9"}, + {file = "dbt_duckdb-1.7.1-py3-none-any.whl", hash = "sha256:bd75b1a72924b942794d0c3293a1159a01f21ab9d82c9f18b22c253dedad101a"}, +] [package.dependencies] dbt-core = ">=1.7.0,<1.8.0" @@ -1548,17 +2053,37 @@ glue = ["boto3", "mypy-boto3-glue"] name = "dbt-extractor" version = "0.5.1" description = "A tool to analyze and extract information from Jinja used in dbt projects." -category = "main" optional = false python-versions = ">=3.6.1" +files = [ + {file = "dbt_extractor-0.5.1-cp38-abi3-macosx_10_12_x86_64.macosx_11_0_arm64.macosx_10_12_universal2.whl", hash = "sha256:3b91e6106b967d908b34f83929d3f50ee2b498876a1be9c055fe060ed728c556"}, + {file = "dbt_extractor-0.5.1-cp38-abi3-macosx_10_12_x86_64.whl", hash = "sha256:3614ce9f83ae4cd0dc95f77730034a793a1c090a52dcf698ba1c94050afe3a8b"}, + {file = "dbt_extractor-0.5.1-cp38-abi3-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:ea4edf33035d0a060b1e01c42fb2d99316457d44c954d6ed4eed9f1948664d87"}, + {file = "dbt_extractor-0.5.1-cp38-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d3b9bf50eb062b4344d9546fe42038996c6e7e7daa10724aa955d64717260e5d"}, + {file = "dbt_extractor-0.5.1-cp38-abi3-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:c0ce901d4ebf0664977e4e1cbf596d4afc6c1339fcc7d2cf67ce3481566a626f"}, + {file = "dbt_extractor-0.5.1-cp38-abi3-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:cbe338b76e9ffaa18275456e041af56c21bb517f6fbda7a58308138703da0996"}, + {file = "dbt_extractor-0.5.1-cp38-abi3-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1b25fa7a276ab26aa2d70ff6e0cf4cfb1490d7831fb57ee1337c24d2b0333b84"}, + {file = "dbt_extractor-0.5.1-cp38-abi3-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c5651e458be910ff567c0da3ea2eb084fd01884cc88888ac2cf1e240dcddacc2"}, + {file = "dbt_extractor-0.5.1-cp38-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:62e4f040fd338b652683421ce48e903812e27fd6e7af58b1b70a4e1f9f2c79e3"}, + {file = "dbt_extractor-0.5.1-cp38-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:91e25ad78f1f4feadd27587ebbcc46ad909cfad843118908f30336d08d8400ca"}, + {file = "dbt_extractor-0.5.1-cp38-abi3-musllinux_1_2_armv7l.whl", hash = "sha256:cdf9938b36cd098bcdd80f43dc03864da3f69f57d903a9160a32236540d4ddcd"}, + {file = "dbt_extractor-0.5.1-cp38-abi3-musllinux_1_2_i686.whl", hash = "sha256:475e2c05b17eb4976eff6c8f7635be42bec33f15a74ceb87a40242c94a99cebf"}, + {file = "dbt_extractor-0.5.1-cp38-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:100453ba06e169cbdb118234ab3f06f6722a2e0e316089b81c88dea701212abc"}, + {file = "dbt_extractor-0.5.1-cp38-abi3-win32.whl", hash = "sha256:6916aae085fd5f2af069fd6947933e78b742c9e3d2165e1740c2e28ae543309a"}, + {file = "dbt_extractor-0.5.1-cp38-abi3-win_amd64.whl", hash = "sha256:eecc08f3743e802a8ede60c89f7b2bce872acc86120cbc0ae7df229bb8a95083"}, + {file = "dbt_extractor-0.5.1.tar.gz", hash = "sha256:cd5d95576a8dea4190240aaf9936a37fd74b4b7913ca69a3c368fc4472bb7e13"}, +] [[package]] name = "dbt-postgres" version = "1.7.4" description = "The postgres adapter plugin for dbt (data build tool)" -category = "main" optional = true python-versions = ">=3.8" +files = [ + {file = "dbt-postgres-1.7.4.tar.gz", hash = "sha256:16185b8de36d1a2052a2e4b85512306ab55085b1ea323a353d0dc3628473208d"}, + {file = "dbt_postgres-1.7.4-py3-none-any.whl", hash = "sha256:d414b070ca5e48925ea9ab12706bbb9e2294f7d4509c28e7af42268596334044"}, +] [package.dependencies] agate = "*" @@ -1569,9 +2094,12 @@ psycopg2-binary = ">=2.8,<3.0" name = "dbt-redshift" version = "1.7.1" description = "The Redshift adapter plugin for dbt" -category = "main" optional = true python-versions = ">=3.8" +files = [ + {file = "dbt-redshift-1.7.1.tar.gz", hash = "sha256:6da69a83038d011570d131b85171842d0858a46bca3757419ae193b5724a2119"}, + {file = "dbt_redshift-1.7.1-py3-none-any.whl", hash = "sha256:2a48b9424934f5445e4285740ebe512afaa75882138121536ccc21d027ef62f2"}, +] [package.dependencies] agate = "*" @@ -1583,9 +2111,12 @@ redshift-connector = "2.0.915" name = "dbt-semantic-interfaces" version = "0.4.3" description = "The shared semantic layer definitions that dbt-core and MetricFlow use" -category = "main" optional = false python-versions = ">=3.8" +files = [ + {file = "dbt_semantic_interfaces-0.4.3-py3-none-any.whl", hash = "sha256:af6ab8509da81ae5f5f1d5631c9761cccaed8cd5311d4824a8d4168ecd0f2093"}, + {file = "dbt_semantic_interfaces-0.4.3.tar.gz", hash = "sha256:9a46d07ad022a4c48783565a776ebc6f1d19e0412e70c4759bc9d7bba461ea1c"}, +] [package.dependencies] click = ">=7.0,<9.0" @@ -1602,9 +2133,12 @@ typing-extensions = ">=4.4,<5.0" name = "dbt-snowflake" version = "1.7.1" description = "The Snowflake adapter plugin for dbt" -category = "main" optional = true python-versions = ">=3.8" +files = [ + {file = "dbt-snowflake-1.7.1.tar.gz", hash = "sha256:842a9e87b9e2d999e3bc27aaa369398a4d02bb3f8bb7447aa6151204d4eb90f0"}, + {file = "dbt_snowflake-1.7.1-py3-none-any.whl", hash = "sha256:32ef8733f67dcf4eb594d1b80852ef0b67e920f25bb8a2953031a3868a8d2b3e"}, +] [package.dependencies] agate = "*" @@ -1615,9 +2149,12 @@ snowflake-connector-python = {version = ">=3.0,<4.0", extras = ["secure-local-st name = "dbt-spark" version = "1.7.1" description = "The Apache Spark adapter plugin for dbt" -category = "main" optional = true python-versions = ">=3.8" +files = [ + {file = "dbt-spark-1.7.1.tar.gz", hash = "sha256:a10e5d1bfdb2ca98e7ae2badd06150e2695d9d4fa18ae2354ed5bd093d77f947"}, + {file = "dbt_spark-1.7.1-py3-none-any.whl", hash = "sha256:99b5002edcdb82058a3b0ad33eb18b91a4bdde887d94855e8bd6f633d78837dc"}, +] [package.dependencies] dbt-core = ">=1.7.0,<1.8.0" @@ -1633,9 +2170,12 @@ session = ["pyspark (>=3.0.0,<4.0.0)"] name = "decopatch" version = "1.4.10" description = "Create decorators easily in python." -category = "dev" optional = false python-versions = "*" +files = [ + {file = "decopatch-1.4.10-py2.py3-none-any.whl", hash = "sha256:e151f7f93de2b1b3fd3f3272dcc7cefd1a69f68ec1c2d8e288ecd9deb36dc5f7"}, + {file = "decopatch-1.4.10.tar.gz", hash = "sha256:957f49c93f4150182c23f8fb51d13bb3213e0f17a79e09c8cca7057598b55720"}, +] [package.dependencies] makefun = ">=1.5.0" @@ -1644,17 +2184,23 @@ makefun = ">=1.5.0" name = "decorator" version = "5.1.1" description = "Decorators for Humans" -category = "main" optional = false python-versions = ">=3.5" +files = [ + {file = "decorator-5.1.1-py3-none-any.whl", hash = "sha256:b8c3f85900b9dc423225913c5aace94729fe1fa9763b38939a95226f02d37186"}, + {file = "decorator-5.1.1.tar.gz", hash = "sha256:637996211036b6385ef91435e4fae22989472f9d571faba8927ba8253acbc330"}, +] [[package]] name = "deprecated" version = "1.2.14" description = "Python @deprecated decorator to deprecate old python classes, functions or methods." -category = "dev" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" +files = [ + {file = "Deprecated-1.2.14-py2.py3-none-any.whl", hash = "sha256:6fac8b097794a90302bdbb17b9b815e732d3c4720583ff1b198499d78470466c"}, + {file = "Deprecated-1.2.14.tar.gz", hash = "sha256:e5323eb936458dccc2582dc6f9c322c852a775a27065ff2b0c4970b9d53d01b3"}, +] [package.dependencies] wrapt = ">=1.10,<2" @@ -1666,9 +2212,12 @@ dev = ["PyTest", "PyTest-Cov", "bump2version (<1)", "sphinx (<2)", "tox"] name = "diff-cover" version = "7.7.0" description = "Run coverage and linting reports on diffs" -category = "dev" optional = false python-versions = ">=3.7.2,<4.0.0" +files = [ + {file = "diff_cover-7.7.0-py3-none-any.whl", hash = "sha256:bf86f32ec999f9a9e79bf24969f7127ea7b4e55c3ef3cd9300feb13188c89736"}, + {file = "diff_cover-7.7.0.tar.gz", hash = "sha256:60614cf7e722cf7fb1bde497afac0b514294e1e26534449622dac4da296123fb"}, +] [package.dependencies] chardet = ">=3.0.0" @@ -1683,9 +2232,12 @@ toml = ["tomli (>=1.2.1)"] name = "dill" version = "0.3.7" description = "serialize all of Python" -category = "dev" optional = false python-versions = ">=3.7" +files = [ + {file = "dill-0.3.7-py3-none-any.whl", hash = "sha256:76b122c08ef4ce2eedcd4d1abd8e641114bfc6c2867f49f3c41facf65bf19f5e"}, + {file = "dill-0.3.7.tar.gz", hash = "sha256:cc1c8b182eb3013e24bd475ff2e9295af86c1a38eb1aff128dac8962a9ce3c03"}, +] [package.extras] graph = ["objgraph (>=1.7.2)"] @@ -1694,9 +2246,12 @@ graph = ["objgraph (>=1.7.2)"] name = "dnspython" version = "2.4.2" description = "DNS toolkit" -category = "dev" optional = false python-versions = ">=3.8,<4.0" +files = [ + {file = "dnspython-2.4.2-py3-none-any.whl", hash = "sha256:57c6fbaaeaaf39c891292012060beb141791735dbb4004798328fc2c467402d8"}, + {file = "dnspython-2.4.2.tar.gz", hash = "sha256:8dcfae8c7460a2f84b4072e26f1c9f4101ca20c071649cb7c34e8b6a93d58984"}, +] [package.extras] dnssec = ["cryptography (>=2.6,<42.0)"] @@ -1710,9 +2265,12 @@ wmi = ["wmi (>=1.5.1,<2.0.0)"] name = "docspec" version = "2.2.1" description = "Docspec is a JSON object specification for representing API documentation of programming languages." -category = "dev" optional = false python-versions = ">=3.7,<4.0" +files = [ + {file = "docspec-2.2.1-py3-none-any.whl", hash = "sha256:7538f750095a9688c6980ff9a4e029a823a500f64bd00b6b4bdb27951feb31cb"}, + {file = "docspec-2.2.1.tar.gz", hash = "sha256:4854e77edc0e2de40e785e57e95880f7095a05fe978f8b54cef7a269586e15ff"}, +] [package.dependencies] "databind.core" = ">=4.2.6,<5.0.0" @@ -1723,9 +2281,12 @@ Deprecated = ">=1.2.12,<2.0.0" name = "docspec-python" version = "2.2.1" description = "A parser based on lib2to3 producing docspec data from Python source code." -category = "dev" optional = false python-versions = ">=3.7,<4.0" +files = [ + {file = "docspec_python-2.2.1-py3-none-any.whl", hash = "sha256:76ac41d35a8face35b2d766c2e8a416fb8832359785d396f0d53bcb00f178e54"}, + {file = "docspec_python-2.2.1.tar.gz", hash = "sha256:c41b850b4d6f4de30999ea6f82c9cdb9183d9bcba45559ee9173d3dab7281559"}, +] [package.dependencies] black = ">=23.1.0,<24.0.0" @@ -1736,9 +2297,11 @@ docspec = ">=2.2.1,<3.0.0" name = "docstring-parser" version = "0.11" description = "\"Parse Python docstrings in reST, Google and Numpydoc format\"" -category = "dev" optional = false python-versions = ">=3.6" +files = [ + {file = "docstring_parser-0.11.tar.gz", hash = "sha256:93b3f8f481c7d24e37c5d9f30293c89e2933fa209421c8abd731dd3ef0715ecb"}, +] [package.extras] test = ["black", "pytest"] @@ -1747,17 +2310,23 @@ test = ["black", "pytest"] name = "docutils" version = "0.20.1" description = "Docutils -- Python Documentation Utilities" -category = "dev" optional = false python-versions = ">=3.7" +files = [ + {file = "docutils-0.20.1-py3-none-any.whl", hash = "sha256:96f387a2c5562db4476f09f13bbab2192e764cac08ebbf3a34a95d9b1e4a59d6"}, + {file = "docutils-0.20.1.tar.gz", hash = "sha256:f08a4e276c3a1583a86dce3e34aba3fe04d02bba2dd51ed16106244e8a923e3b"}, +] [[package]] name = "domdf-python-tools" version = "3.6.1" description = "Helpful functions for Python 🐍 🛠️" -category = "dev" optional = false python-versions = ">=3.6" +files = [ + {file = "domdf_python_tools-3.6.1-py3-none-any.whl", hash = "sha256:e18158460850957f18e740eb94ede56f580ddb0cb162ab9d9834ed8bbb1b6431"}, + {file = "domdf_python_tools-3.6.1.tar.gz", hash = "sha256:acc04563d23bce4d437dd08af6b9bea788328c412772a044d8ca428a7ad861be"}, +] [package.dependencies] importlib-metadata = {version = ">=3.6.0", markers = "python_version < \"3.9\""} @@ -1770,19 +2339,70 @@ dates = ["pytz (>=2019.1)"] [[package]] name = "duckdb" -version = "0.9.1" -description = "DuckDB embedded database" -category = "main" +version = "0.10.0" +description = "DuckDB in-process database" optional = false python-versions = ">=3.7.0" +files = [ + {file = "duckdb-0.10.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:bd0ffb3fddef0f72a150e4d76e10942a84a1a0447d10907df1621b90d6668060"}, + {file = "duckdb-0.10.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:f3d709d5c7c1a12b5e10d0b05fa916c670cd2b50178e3696faa0cc16048a1745"}, + {file = "duckdb-0.10.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:9114aa22ec5d591a20ce5184be90f49d8e5b5348ceaab21e102c54560d07a5f8"}, + {file = "duckdb-0.10.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:77a37877efadf39caf7cadde0f430fedf762751b9c54750c821e2f1316705a21"}, + {file = "duckdb-0.10.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:87cbc9e1d9c3fc9f14307bea757f99f15f46843c0ab13a6061354410824ed41f"}, + {file = "duckdb-0.10.0-cp310-cp310-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:f0bfec79fed387201550517d325dff4fad2705020bc139d936cab08b9e845662"}, + {file = "duckdb-0.10.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:c5622134d2d9796b15e09de810e450859d4beb46d9b861357ec9ae40a61b775c"}, + {file = "duckdb-0.10.0-cp310-cp310-win_amd64.whl", hash = "sha256:089ee8e831ccaef1b73fc89c43b661567175eed0115454880bafed5e35cda702"}, + {file = "duckdb-0.10.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:a05af63747f1d7021995f0811c333dee7316cec3b06c0d3e4741b9bdb678dd21"}, + {file = "duckdb-0.10.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:072d6eba5d8a59e0069a8b5b4252fed8a21f9fe3f85a9129d186a39b3d0aea03"}, + {file = "duckdb-0.10.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:a77b85668f59b919042832e4659538337f1c7f197123076c5311f1c9cf077df7"}, + {file = "duckdb-0.10.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:96a666f1d2da65d03199a977aec246920920a5ea1da76b70ae02bd4fb1ffc48c"}, + {file = "duckdb-0.10.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2ec76a4262b783628d26612d184834852d9c92fb203e91af789100c17e3d7173"}, + {file = "duckdb-0.10.0-cp311-cp311-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:009dd9d2cdbd3b061a9efbdfc79f2d1a8377bcf49f1e5f430138621f8c083a6c"}, + {file = "duckdb-0.10.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:878f06766088090dad4a2e5ee0081555242b2e8dcb29415ecc97e388cf0cf8d8"}, + {file = "duckdb-0.10.0-cp311-cp311-win_amd64.whl", hash = "sha256:713ff0a1fb63a6d60f454acf67f31656549fb5d63f21ac68314e4f522daa1a89"}, + {file = "duckdb-0.10.0-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:9c0ee450dfedfb52dd4957244e31820feef17228da31af6d052979450a80fd19"}, + {file = "duckdb-0.10.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:ff79b2ea9994398b545c0d10601cd73565fbd09f8951b3d8003c7c5c0cebc7cb"}, + {file = "duckdb-0.10.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:6bdf1aa71b924ef651062e6b8ff9981ad85bec89598294af8a072062c5717340"}, + {file = "duckdb-0.10.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d0265bbc8216be3ced7b377ba8847128a3fc0ef99798a3c4557c1b88e3a01c23"}, + {file = "duckdb-0.10.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1d418a315a07707a693bd985274c0f8c4dd77015d9ef5d8d3da4cc1942fd82e0"}, + {file = "duckdb-0.10.0-cp312-cp312-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:2828475a292e68c71855190b818aded6bce7328f79e38c04a0c75f8f1c0ceef0"}, + {file = "duckdb-0.10.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:c3aaeaae2eba97035c65f31ffdb18202c951337bf2b3d53d77ce1da8ae2ecf51"}, + {file = "duckdb-0.10.0-cp312-cp312-win_amd64.whl", hash = "sha256:c51790aaaea97d8e4a58a114c371ed8d2c4e1ca7cbf29e3bdab6d8ccfc5afc1e"}, + {file = "duckdb-0.10.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:8af1ae7cc77a12206b6c47ade191882cc8f49f750bb3e72bb86ac1d4fa89926a"}, + {file = "duckdb-0.10.0-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:aa4f7e8e8dc0e376aeb280b83f2584d0e25ec38985c27d19f3107b2edc4f4a97"}, + {file = "duckdb-0.10.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:28ae942a79fad913defa912b56483cd7827a4e7721f4ce4bc9025b746ecb3c89"}, + {file = "duckdb-0.10.0-cp37-cp37m-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:01b57802898091455ca2a32c1335aac1e398da77c99e8a96a1e5de09f6a0add9"}, + {file = "duckdb-0.10.0-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:52e1ad4a55fa153d320c367046b9500578192e01c6d04308ba8b540441736f2c"}, + {file = "duckdb-0.10.0-cp37-cp37m-win_amd64.whl", hash = "sha256:904c47d04095af745e989c853f0bfc0776913dfc40dfbd2da7afdbbb5f67fed0"}, + {file = "duckdb-0.10.0-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:184ae7ea5874f3b8fa51ab0f1519bdd088a0b78c32080ee272b1d137e2c8fd9c"}, + {file = "duckdb-0.10.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:bd33982ecc9bac727a032d6cedced9f19033cbad56647147408891eb51a6cb37"}, + {file = "duckdb-0.10.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:f59bf0949899105dd5f8864cb48139bfb78454a8c017b8258ba2b5e90acf7afc"}, + {file = "duckdb-0.10.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:395f3b18948001e35dceb48a4423d574e38656606d033eef375408b539e7b076"}, + {file = "duckdb-0.10.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9b8eb2b803be7ee1df70435c33b03a4598cdaf676cd67ad782b288dcff65d781"}, + {file = "duckdb-0.10.0-cp38-cp38-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:31b2ddd331801064326c8e3587a4db8a31d02aef11332c168f45b3bd92effb41"}, + {file = "duckdb-0.10.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:c8b89e76a041424b8c2026c5dc1f74b53fbbc6c6f650d563259885ab2e7d093d"}, + {file = "duckdb-0.10.0-cp38-cp38-win_amd64.whl", hash = "sha256:79084a82f16c0a54f6bfb7ded5600400c2daa90eb0d83337d81a56924eaee5d4"}, + {file = "duckdb-0.10.0-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:79799b3a270dcd9070f677ba510f1e66b112df3068425691bac97c5e278929c7"}, + {file = "duckdb-0.10.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:e8fc394bfe3434920cdbcfbdd0ac3ba40902faa1dbda088db0ba44003a45318a"}, + {file = "duckdb-0.10.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:c116605551b4abf5786243a59bcef02bd69cc51837d0c57cafaa68cdc428aa0c"}, + {file = "duckdb-0.10.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3191170c3b0a43b0c12644800326f5afdea00d5a4621d59dbbd0c1059139e140"}, + {file = "duckdb-0.10.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fee69a50eb93c72dc77e7ab1fabe0c38d21a52c5da44a86aa217081e38f9f1bd"}, + {file = "duckdb-0.10.0-cp39-cp39-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c5f449e87dacb16b0d145dbe65fa6fdb5a55b2b6911a46d74876e445dd395bac"}, + {file = "duckdb-0.10.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:4487d0df221b17ea4177ad08131bc606b35f25cfadf890987833055b9d10cdf6"}, + {file = "duckdb-0.10.0-cp39-cp39-win_amd64.whl", hash = "sha256:c099ae2ff8fe939fda62da81704f91e2f92ac45e48dc0e37c679c9d243d01e65"}, + {file = "duckdb-0.10.0.tar.gz", hash = "sha256:c02bcc128002aa79e3c9d89b9de25e062d1096a8793bc0d7932317b7977f6845"}, +] [[package]] name = "email-validator" version = "1.3.1" description = "A robust email address syntax and deliverability validation library." -category = "dev" optional = false python-versions = ">=3.5" +files = [ + {file = "email_validator-1.3.1-py2.py3-none-any.whl", hash = "sha256:49a72f5fa6ed26be1c964f0567d931d10bf3fdeeacdf97bc26ef1cd2a44e0bda"}, + {file = "email_validator-1.3.1.tar.gz", hash = "sha256:d178c5c6fa6c6824e9b04f199cf23e79ac15756786573c190d2ad13089411ad2"}, +] [package.dependencies] dnspython = ">=1.15.0" @@ -1792,9 +2412,12 @@ idna = ">=2.0.0" name = "enlighten" version = "1.11.2" description = "Enlighten Progress Bar" -category = "dev" optional = false python-versions = "*" +files = [ + {file = "enlighten-1.11.2-py2.py3-none-any.whl", hash = "sha256:98c9eb20e022b6a57f1c8d4f17e16760780b6881e6d658c40f52d21255ea45f3"}, + {file = "enlighten-1.11.2.tar.gz", hash = "sha256:9284861dee5a272e0e1a3758cd3f3b7180b1bd1754875da76876f2a7f46ccb61"}, +] [package.dependencies] blessed = ">=1.17.7" @@ -1804,17 +2427,23 @@ prefixed = ">=0.3.2" name = "et-xmlfile" version = "1.1.0" description = "An implementation of lxml.xmlfile for the standard library" -category = "main" optional = true python-versions = ">=3.6" - -[[package]] +files = [ + {file = "et_xmlfile-1.1.0-py3-none-any.whl", hash = "sha256:a2ba85d1d6a74ef63837eed693bcb89c3f752169b0e3e7ae5b16ca5e1b3deada"}, + {file = "et_xmlfile-1.1.0.tar.gz", hash = "sha256:8eb9e2bc2f8c97e37a2dc85a09ecdcdec9d8a396530a6d5a33b30b9a92da0c5c"}, +] + +[[package]] name = "exceptiongroup" version = "1.1.3" description = "Backport of PEP 654 (exception groups)" -category = "main" optional = false python-versions = ">=3.7" +files = [ + {file = "exceptiongroup-1.1.3-py3-none-any.whl", hash = "sha256:343280667a4585d195ca1cf9cef84a4e178c4b6cf2274caef9859782b567d5e3"}, + {file = "exceptiongroup-1.1.3.tar.gz", hash = "sha256:097acd85d473d75af5bb98e41b61ff7fe35efe6675e4f9370ec6ec5126d160e9"}, +] [package.extras] test = ["pytest (>=6)"] @@ -1823,9 +2452,12 @@ test = ["pytest (>=6)"] name = "fastembed" version = "0.1.1" description = "Fast, light, accurate library built for retrieval embedding generation" -category = "main" optional = true python-versions = ">=3.8.0,<3.12" +files = [ + {file = "fastembed-0.1.1-py3-none-any.whl", hash = "sha256:131413ae52cd72f4c8cced7a675f8269dbfd1a852abade3c815e265114bcc05a"}, + {file = "fastembed-0.1.1.tar.gz", hash = "sha256:f7e524ee4f74bb8aad16be5b687d1f77f608d40e96e292c87881dc36baf8f4c7"}, +] [package.dependencies] onnx = ">=1.11,<2.0" @@ -1838,9 +2470,12 @@ tqdm = ">=4.65,<5.0" name = "filelock" version = "3.12.3" description = "A platform independent file lock." -category = "main" optional = true python-versions = ">=3.8" +files = [ + {file = "filelock-3.12.3-py3-none-any.whl", hash = "sha256:f067e40ccc40f2b48395a80fcbd4728262fab54e232e090a4063ab804179efeb"}, + {file = "filelock-3.12.3.tar.gz", hash = "sha256:0ecc1dd2ec4672a10c8550a8182f1bd0c0a5088470ecd5a125e45f49472fac3d"}, +] [package.dependencies] typing-extensions = {version = ">=4.7.1", markers = "python_version < \"3.11\""} @@ -1853,9 +2488,12 @@ testing = ["covdefaults (>=2.3)", "coverage (>=7.3)", "diff-cover (>=7.7)", "pyt name = "flake8" version = "5.0.4" description = "the modular source code checker: pep8 pyflakes and co" -category = "dev" optional = false python-versions = ">=3.6.1" +files = [ + {file = "flake8-5.0.4-py2.py3-none-any.whl", hash = "sha256:7a1cf6b73744f5806ab95e526f6f0d8c01c66d7bbe349562d22dfca20610b248"}, + {file = "flake8-5.0.4.tar.gz", hash = "sha256:6fbe320aad8d6b95cec8b8e47bc933004678dc63095be98528b7bdd2a9f510db"}, +] [package.dependencies] mccabe = ">=0.7.0,<0.8.0" @@ -1866,9 +2504,12 @@ pyflakes = ">=2.5.0,<2.6.0" name = "flake8-bugbear" version = "22.12.6" description = "A plugin for flake8 finding likely bugs and design problems in your program. Contains warnings that don't belong in pyflakes and pycodestyle." -category = "dev" optional = false python-versions = ">=3.7" +files = [ + {file = "flake8-bugbear-22.12.6.tar.gz", hash = "sha256:4cdb2c06e229971104443ae293e75e64c6107798229202fbe4f4091427a30ac0"}, + {file = "flake8_bugbear-22.12.6-py3-none-any.whl", hash = "sha256:b69a510634f8a9c298dfda2b18a8036455e6b19ecac4fe582e4d7a0abfa50a30"}, +] [package.dependencies] attrs = ">=19.2.0" @@ -1881,9 +2522,12 @@ dev = ["coverage", "hypothesis", "hypothesmith (>=0.2)", "pre-commit", "tox"] name = "flake8-builtins" version = "1.5.3" description = "Check for python builtins being used as variables or parameters." -category = "dev" optional = false python-versions = "*" +files = [ + {file = "flake8-builtins-1.5.3.tar.gz", hash = "sha256:09998853b2405e98e61d2ff3027c47033adbdc17f9fe44ca58443d876eb00f3b"}, + {file = "flake8_builtins-1.5.3-py2.py3-none-any.whl", hash = "sha256:7706babee43879320376861897e5d1468e396a40b8918ed7bccf70e5f90b8687"}, +] [package.dependencies] flake8 = "*" @@ -1895,9 +2539,12 @@ test = ["coverage", "coveralls", "mock", "pytest", "pytest-cov"] name = "flake8-encodings" version = "0.5.0.post1" description = "A Flake8 plugin to identify incorrect use of encodings." -category = "dev" optional = false python-versions = ">=3.6" +files = [ + {file = "flake8_encodings-0.5.0.post1-py3-none-any.whl", hash = "sha256:d2fecca0e89ba09c86e5d61cf6bdb1b337f0d74746aac67bbcf0c517b4cb6cba"}, + {file = "flake8_encodings-0.5.0.post1.tar.gz", hash = "sha256:082c0163325c85b438a8106e876283b5ed3cbfc53e68d89130d70be8be4c9977"}, +] [package.dependencies] astatine = ">=0.3.1" @@ -1913,9 +2560,12 @@ classes = ["jedi (>=0.18.0)"] name = "flake8-helper" version = "0.2.1" description = "A helper library for Flake8 plugins." -category = "dev" optional = false python-versions = ">=3.6" +files = [ + {file = "flake8_helper-0.2.1-py3-none-any.whl", hash = "sha256:9123cdf351ad32ee8a51b85036052302c478122d62fb512c0773e111b3d05241"}, + {file = "flake8_helper-0.2.1.tar.gz", hash = "sha256:479f86d1c52df8e49ff876ecd3873242699f93eeece7e6675cdca9c37c9b0a16"}, +] [package.dependencies] flake8 = ">=3.8.4" @@ -1924,9 +2574,12 @@ flake8 = ">=3.8.4" name = "flake8-tidy-imports" version = "4.10.0" description = "A flake8 plugin that helps you write tidier imports." -category = "dev" optional = false python-versions = ">=3.8" +files = [ + {file = "flake8_tidy_imports-4.10.0-py3-none-any.whl", hash = "sha256:b0387fb2ea200441bd142309e716fb7b8f4b0937bdf5f8b7c0c118a5f5e2b8ed"}, + {file = "flake8_tidy_imports-4.10.0.tar.gz", hash = "sha256:bd6cf86465402d2b86903009b748d85a628e599e17b76e810c9857e3a2815173"}, +] [package.dependencies] flake8 = ">=3.8.0" @@ -1935,9 +2588,12 @@ flake8 = ">=3.8.0" name = "flask" version = "2.2.5" description = "A simple framework for building complex web applications." -category = "dev" optional = false python-versions = ">=3.7" +files = [ + {file = "Flask-2.2.5-py3-none-any.whl", hash = "sha256:58107ed83443e86067e41eff4631b058178191a355886f8e479e347fa1285fdf"}, + {file = "Flask-2.2.5.tar.gz", hash = "sha256:edee9b0a7ff26621bd5a8c10ff484ae28737a2410d99b0bb9a6850c7fb977aa0"}, +] [package.dependencies] click = ">=8.0" @@ -1954,9 +2610,12 @@ dotenv = ["python-dotenv"] name = "flask-appbuilder" version = "4.3.6" description = "Simple and rapid application development framework, built on top of Flask. includes detailed security, auto CRUD generation for your models, google charts and much more." -category = "dev" optional = false python-versions = "~=3.7" +files = [ + {file = "Flask-AppBuilder-4.3.6.tar.gz", hash = "sha256:8ca9710fa7d2704747d195e11b487d45a571f40559d8399d9d5dfa42ea1f3c78"}, + {file = "Flask_AppBuilder-4.3.6-py3-none-any.whl", hash = "sha256:840480dfd43134bebf78f3c7dc909e324c2689d2d9f27aeb1880a8a25466bc8d"}, +] [package.dependencies] apispec = {version = ">=6.0.0,<7", extras = ["yaml"]} @@ -1990,9 +2649,12 @@ talisman = ["flask-talisman (>=1.0.0,<2.0)"] name = "flask-babel" version = "2.0.0" description = "Adds i18n/l10n support to Flask applications" -category = "dev" optional = false python-versions = "*" +files = [ + {file = "Flask-Babel-2.0.0.tar.gz", hash = "sha256:f9faf45cdb2e1a32ea2ec14403587d4295108f35017a7821a2b1acb8cfd9257d"}, + {file = "Flask_Babel-2.0.0-py3-none-any.whl", hash = "sha256:e6820a052a8d344e178cdd36dd4bb8aea09b4bda3d5f9fa9f008df2c7f2f5468"}, +] [package.dependencies] Babel = ">=2.3" @@ -2007,9 +2669,12 @@ dev = ["Pallets-Sphinx-Themes", "bumpversion", "ghp-import", "pytest", "pytest-m name = "flask-caching" version = "2.0.2" description = "Adds caching support to Flask applications." -category = "dev" optional = false python-versions = ">=3.7" +files = [ + {file = "Flask-Caching-2.0.2.tar.gz", hash = "sha256:24b60c552d59a9605cc1b6a42c56cdb39a82a28dab4532bbedb9222ae54ecb4e"}, + {file = "Flask_Caching-2.0.2-py3-none-any.whl", hash = "sha256:19571f2570e9b8dd9dd9d2f49d7cbee69c14ebe8cc001100b1eb98c379dd80ad"}, +] [package.dependencies] cachelib = ">=0.9.0,<0.10.0" @@ -2019,9 +2684,12 @@ Flask = "<3" name = "flask-jwt-extended" version = "4.5.2" description = "Extended JWT integration with Flask" -category = "dev" optional = false python-versions = ">=3.7,<4" +files = [ + {file = "Flask-JWT-Extended-4.5.2.tar.gz", hash = "sha256:ba56245ba43b71c8ae936784b867625dce8b9956faeedec2953222e57942fb0b"}, + {file = "Flask_JWT_Extended-4.5.2-py2.py3-none-any.whl", hash = "sha256:e0ef23d8c863746bd141046167073699e1a7b03c97169cbba70f05b8d9cd6b9e"}, +] [package.dependencies] Flask = ">=2.0,<3.0" @@ -2035,9 +2703,12 @@ asymmetric-crypto = ["cryptography (>=3.3.1)"] name = "flask-limiter" version = "3.5.0" description = "Rate limiting for flask applications" -category = "dev" optional = false python-versions = ">=3.7" +files = [ + {file = "Flask-Limiter-3.5.0.tar.gz", hash = "sha256:13a3491b994c49f7cb4706587a38ca47e8162b576530472df38be68104f299c0"}, + {file = "Flask_Limiter-3.5.0-py3-none-any.whl", hash = "sha256:dbda4174f44e6cb858c6eb75e7488186f2977dd5d33d7028ba1aabf179de1bee"}, +] [package.dependencies] Flask = ">=2" @@ -2055,9 +2726,12 @@ redis = ["limits[redis]"] name = "flask-login" version = "0.6.2" description = "User authentication and session management for Flask." -category = "dev" optional = false python-versions = ">=3.7" +files = [ + {file = "Flask-Login-0.6.2.tar.gz", hash = "sha256:c0a7baa9fdc448cdd3dd6f0939df72eec5177b2f7abe6cb82fc934d29caac9c3"}, + {file = "Flask_Login-0.6.2-py3-none-any.whl", hash = "sha256:1ef79843f5eddd0f143c2cd994c1b05ac83c0401dc6234c143495af9a939613f"}, +] [package.dependencies] Flask = ">=1.0.4" @@ -2067,9 +2741,12 @@ Werkzeug = ">=1.0.1" name = "flask-session" version = "0.5.0" description = "Server-side session support for Flask" -category = "dev" optional = false python-versions = ">=3.7" +files = [ + {file = "Flask-Session-0.5.0.tar.gz", hash = "sha256:190875e6aebf2953c6803d42379ef3b934bc209ef8ef006f97aecb08f5aaeb86"}, + {file = "flask_session-0.5.0-py3-none-any.whl", hash = "sha256:1619bcbc16f04f64e90f8e0b17145ba5c9700090bb1294e889956c1282d58631"}, +] [package.dependencies] cachelib = "*" @@ -2079,9 +2756,12 @@ flask = ">=2.2" name = "flask-sqlalchemy" version = "2.5.1" description = "Adds SQLAlchemy support to your Flask application." -category = "dev" optional = false python-versions = ">= 2.7, != 3.0.*, != 3.1.*, != 3.2.*, != 3.3.*" +files = [ + {file = "Flask-SQLAlchemy-2.5.1.tar.gz", hash = "sha256:2bda44b43e7cacb15d4e05ff3cc1f8bc97936cc464623424102bfc2c35e95912"}, + {file = "Flask_SQLAlchemy-2.5.1-py2.py3-none-any.whl", hash = "sha256:f12c3d4cc5cc7fdcc148b9527ea05671718c3ea45d50c7e732cceb33f574b390"}, +] [package.dependencies] Flask = ">=0.10" @@ -2091,9 +2771,12 @@ SQLAlchemy = ">=0.8.0" name = "flask-wtf" version = "1.1.1" description = "Form rendering, validation, and CSRF protection for Flask with WTForms." -category = "dev" optional = false python-versions = ">=3.7" +files = [ + {file = "Flask-WTF-1.1.1.tar.gz", hash = "sha256:41c4244e9ae626d63bed42ae4785b90667b885b1535d5a4095e1f63060d12aa9"}, + {file = "Flask_WTF-1.1.1-py3-none-any.whl", hash = "sha256:7887d6f1ebb3e17bf648647422f0944c9a469d0fcf63e3b66fb9a83037e38b2c"}, +] [package.dependencies] Flask = "*" @@ -2107,25 +2790,93 @@ email = ["email-validator"] name = "flatbuffers" version = "23.5.26" description = "The FlatBuffers serialization format for Python" -category = "main" optional = true python-versions = "*" +files = [ + {file = "flatbuffers-23.5.26-py2.py3-none-any.whl", hash = "sha256:c0ff356da363087b915fde4b8b45bdda73432fc17cddb3c8157472eab1422ad1"}, + {file = "flatbuffers-23.5.26.tar.gz", hash = "sha256:9ea1144cac05ce5d86e2859f431c6cd5e66cd9c78c558317c7955fb8d4c78d89"}, +] [[package]] name = "frozenlist" version = "1.4.0" description = "A list-like structure which implements collections.abc.MutableSequence" -category = "main" optional = false python-versions = ">=3.8" +files = [ + {file = "frozenlist-1.4.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:764226ceef3125e53ea2cb275000e309c0aa5464d43bd72abd661e27fffc26ab"}, + {file = "frozenlist-1.4.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:d6484756b12f40003c6128bfcc3fa9f0d49a687e171186c2d85ec82e3758c559"}, + {file = "frozenlist-1.4.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:9ac08e601308e41eb533f232dbf6b7e4cea762f9f84f6357136eed926c15d12c"}, + {file = "frozenlist-1.4.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d081f13b095d74b67d550de04df1c756831f3b83dc9881c38985834387487f1b"}, + {file = "frozenlist-1.4.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:71932b597f9895f011f47f17d6428252fc728ba2ae6024e13c3398a087c2cdea"}, + {file = "frozenlist-1.4.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:981b9ab5a0a3178ff413bca62526bb784249421c24ad7381e39d67981be2c326"}, + {file = "frozenlist-1.4.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e41f3de4df3e80de75845d3e743b3f1c4c8613c3997a912dbf0229fc61a8b963"}, + {file = "frozenlist-1.4.0-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6918d49b1f90821e93069682c06ffde41829c346c66b721e65a5c62b4bab0300"}, + {file = "frozenlist-1.4.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:0e5c8764c7829343d919cc2dfc587a8db01c4f70a4ebbc49abde5d4b158b007b"}, + {file = "frozenlist-1.4.0-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:8d0edd6b1c7fb94922bf569c9b092ee187a83f03fb1a63076e7774b60f9481a8"}, + {file = "frozenlist-1.4.0-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:e29cda763f752553fa14c68fb2195150bfab22b352572cb36c43c47bedba70eb"}, + {file = "frozenlist-1.4.0-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:0c7c1b47859ee2cac3846fde1c1dc0f15da6cec5a0e5c72d101e0f83dcb67ff9"}, + {file = "frozenlist-1.4.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:901289d524fdd571be1c7be054f48b1f88ce8dddcbdf1ec698b27d4b8b9e5d62"}, + {file = "frozenlist-1.4.0-cp310-cp310-win32.whl", hash = "sha256:1a0848b52815006ea6596c395f87449f693dc419061cc21e970f139d466dc0a0"}, + {file = "frozenlist-1.4.0-cp310-cp310-win_amd64.whl", hash = "sha256:b206646d176a007466358aa21d85cd8600a415c67c9bd15403336c331a10d956"}, + {file = "frozenlist-1.4.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:de343e75f40e972bae1ef6090267f8260c1446a1695e77096db6cfa25e759a95"}, + {file = "frozenlist-1.4.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:ad2a9eb6d9839ae241701d0918f54c51365a51407fd80f6b8289e2dfca977cc3"}, + {file = "frozenlist-1.4.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:bd7bd3b3830247580de99c99ea2a01416dfc3c34471ca1298bccabf86d0ff4dc"}, + {file = "frozenlist-1.4.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bdf1847068c362f16b353163391210269e4f0569a3c166bc6a9f74ccbfc7e839"}, + {file = "frozenlist-1.4.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:38461d02d66de17455072c9ba981d35f1d2a73024bee7790ac2f9e361ef1cd0c"}, + {file = "frozenlist-1.4.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d5a32087d720c608f42caed0ef36d2b3ea61a9d09ee59a5142d6070da9041b8f"}, + {file = "frozenlist-1.4.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:dd65632acaf0d47608190a71bfe46b209719bf2beb59507db08ccdbe712f969b"}, + {file = "frozenlist-1.4.0-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:261b9f5d17cac914531331ff1b1d452125bf5daa05faf73b71d935485b0c510b"}, + {file = "frozenlist-1.4.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:b89ac9768b82205936771f8d2eb3ce88503b1556324c9f903e7156669f521472"}, + {file = "frozenlist-1.4.0-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:008eb8b31b3ea6896da16c38c1b136cb9fec9e249e77f6211d479db79a4eaf01"}, + {file = "frozenlist-1.4.0-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:e74b0506fa5aa5598ac6a975a12aa8928cbb58e1f5ac8360792ef15de1aa848f"}, + {file = "frozenlist-1.4.0-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:490132667476f6781b4c9458298b0c1cddf237488abd228b0b3650e5ecba7467"}, + {file = "frozenlist-1.4.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:76d4711f6f6d08551a7e9ef28c722f4a50dd0fc204c56b4bcd95c6cc05ce6fbb"}, + {file = "frozenlist-1.4.0-cp311-cp311-win32.whl", hash = "sha256:a02eb8ab2b8f200179b5f62b59757685ae9987996ae549ccf30f983f40602431"}, + {file = "frozenlist-1.4.0-cp311-cp311-win_amd64.whl", hash = "sha256:515e1abc578dd3b275d6a5114030b1330ba044ffba03f94091842852f806f1c1"}, + {file = "frozenlist-1.4.0-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:f0ed05f5079c708fe74bf9027e95125334b6978bf07fd5ab923e9e55e5fbb9d3"}, + {file = "frozenlist-1.4.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:ca265542ca427bf97aed183c1676e2a9c66942e822b14dc6e5f42e038f92a503"}, + {file = "frozenlist-1.4.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:491e014f5c43656da08958808588cc6c016847b4360e327a62cb308c791bd2d9"}, + {file = "frozenlist-1.4.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:17ae5cd0f333f94f2e03aaf140bb762c64783935cc764ff9c82dff626089bebf"}, + {file = "frozenlist-1.4.0-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1e78fb68cf9c1a6aa4a9a12e960a5c9dfbdb89b3695197aa7064705662515de2"}, + {file = "frozenlist-1.4.0-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d5655a942f5f5d2c9ed93d72148226d75369b4f6952680211972a33e59b1dfdc"}, + {file = "frozenlist-1.4.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c11b0746f5d946fecf750428a95f3e9ebe792c1ee3b1e96eeba145dc631a9672"}, + {file = "frozenlist-1.4.0-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e66d2a64d44d50d2543405fb183a21f76b3b5fd16f130f5c99187c3fb4e64919"}, + {file = "frozenlist-1.4.0-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:88f7bc0fcca81f985f78dd0fa68d2c75abf8272b1f5c323ea4a01a4d7a614efc"}, + {file = "frozenlist-1.4.0-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:5833593c25ac59ede40ed4de6d67eb42928cca97f26feea219f21d0ed0959b79"}, + {file = "frozenlist-1.4.0-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:fec520865f42e5c7f050c2a79038897b1c7d1595e907a9e08e3353293ffc948e"}, + {file = "frozenlist-1.4.0-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:b826d97e4276750beca7c8f0f1a4938892697a6bcd8ec8217b3312dad6982781"}, + {file = "frozenlist-1.4.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:ceb6ec0a10c65540421e20ebd29083c50e6d1143278746a4ef6bcf6153171eb8"}, + {file = "frozenlist-1.4.0-cp38-cp38-win32.whl", hash = "sha256:2b8bcf994563466db019fab287ff390fffbfdb4f905fc77bc1c1d604b1c689cc"}, + {file = "frozenlist-1.4.0-cp38-cp38-win_amd64.whl", hash = "sha256:a6c8097e01886188e5be3e6b14e94ab365f384736aa1fca6a0b9e35bd4a30bc7"}, + {file = "frozenlist-1.4.0-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:6c38721585f285203e4b4132a352eb3daa19121a035f3182e08e437cface44bf"}, + {file = "frozenlist-1.4.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:a0c6da9aee33ff0b1a451e867da0c1f47408112b3391dd43133838339e410963"}, + {file = "frozenlist-1.4.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:93ea75c050c5bb3d98016b4ba2497851eadf0ac154d88a67d7a6816206f6fa7f"}, + {file = "frozenlist-1.4.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f61e2dc5ad442c52b4887f1fdc112f97caeff4d9e6ebe78879364ac59f1663e1"}, + {file = "frozenlist-1.4.0-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:aa384489fefeb62321b238e64c07ef48398fe80f9e1e6afeff22e140e0850eef"}, + {file = "frozenlist-1.4.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:10ff5faaa22786315ef57097a279b833ecab1a0bfb07d604c9cbb1c4cdc2ed87"}, + {file = "frozenlist-1.4.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:007df07a6e3eb3e33e9a1fe6a9db7af152bbd8a185f9aaa6ece10a3529e3e1c6"}, + {file = "frozenlist-1.4.0-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7f4f399d28478d1f604c2ff9119907af9726aed73680e5ed1ca634d377abb087"}, + {file = "frozenlist-1.4.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:c5374b80521d3d3f2ec5572e05adc94601985cc526fb276d0c8574a6d749f1b3"}, + {file = "frozenlist-1.4.0-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:ce31ae3e19f3c902de379cf1323d90c649425b86de7bbdf82871b8a2a0615f3d"}, + {file = "frozenlist-1.4.0-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:7211ef110a9194b6042449431e08c4d80c0481e5891e58d429df5899690511c2"}, + {file = "frozenlist-1.4.0-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:556de4430ce324c836789fa4560ca62d1591d2538b8ceb0b4f68fb7b2384a27a"}, + {file = "frozenlist-1.4.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:7645a8e814a3ee34a89c4a372011dcd817964ce8cb273c8ed6119d706e9613e3"}, + {file = "frozenlist-1.4.0-cp39-cp39-win32.whl", hash = "sha256:19488c57c12d4e8095a922f328df3f179c820c212940a498623ed39160bc3c2f"}, + {file = "frozenlist-1.4.0-cp39-cp39-win_amd64.whl", hash = "sha256:6221d84d463fb110bdd7619b69cb43878a11d51cbb9394ae3105d082d5199167"}, + {file = "frozenlist-1.4.0.tar.gz", hash = "sha256:09163bdf0b2907454042edb19f887c6d33806adc71fbd54afc14908bfdc22251"}, +] [[package]] name = "fsspec" version = "2023.6.0" description = "File-system specification" -category = "main" optional = false python-versions = ">=3.8" +files = [ + {file = "fsspec-2023.6.0-py3-none-any.whl", hash = "sha256:1cbad1faef3e391fba6dc005ae9b5bdcbf43005c9167ce78c915549c352c869a"}, + {file = "fsspec-2023.6.0.tar.gz", hash = "sha256:d0b2f935446169753e7a5c5c55681c54ea91996cc67be93c39a154fb3a2742af"}, +] [package.extras] abfs = ["adlfs"] @@ -2155,17 +2906,22 @@ tqdm = ["tqdm"] name = "future" version = "0.18.3" description = "Clean single-source support for Python 3 and 2" -category = "main" optional = false python-versions = ">=2.6, !=3.0.*, !=3.1.*, !=3.2.*" +files = [ + {file = "future-0.18.3.tar.gz", hash = "sha256:34a17436ed1e96697a86f9de3d15a3b0be01d8bc8de9c1dffd59fb8234ed5307"}, +] [[package]] name = "gcsfs" version = "2023.6.0" description = "Convenient Filesystem interface over GCS" -category = "main" optional = true python-versions = ">=3.8" +files = [ + {file = "gcsfs-2023.6.0-py2.py3-none-any.whl", hash = "sha256:3b3c7d8eddd4ec1380f3b49fbb861ee1e974adb223564401f10884b6260d406f"}, + {file = "gcsfs-2023.6.0.tar.gz", hash = "sha256:30b14fccadb3b7f0d99b2cd03bd8507c40f3a9a7d05847edca571f642bedbdff"}, +] [package.dependencies] aiohttp = "<4.0.0a0 || >4.0.0a0,<4.0.0a1 || >4.0.0a1" @@ -2184,9 +2940,12 @@ gcsfuse = ["fusepy"] name = "gitdb" version = "4.0.10" description = "Git Object Database" -category = "main" optional = false python-versions = ">=3.7" +files = [ + {file = "gitdb-4.0.10-py3-none-any.whl", hash = "sha256:c286cf298426064079ed96a9e4a9d39e7f3e9bf15ba60701e95f5492f28415c7"}, + {file = "gitdb-4.0.10.tar.gz", hash = "sha256:6eb990b69df4e15bad899ea868dc46572c3f75339735663b81de79b06f17eb9a"}, +] [package.dependencies] smmap = ">=3.0.1,<6" @@ -2195,9 +2954,12 @@ smmap = ">=3.0.1,<6" name = "gitpython" version = "3.1.34" description = "GitPython is a Python library used to interact with Git repositories" -category = "main" optional = false python-versions = ">=3.7" +files = [ + {file = "GitPython-3.1.34-py3-none-any.whl", hash = "sha256:5d3802b98a3bae1c2b8ae0e1ff2e4aa16bcdf02c145da34d092324f599f01395"}, + {file = "GitPython-3.1.34.tar.gz", hash = "sha256:85f7d365d1f6bf677ae51039c1ef67ca59091c7ebd5a3509aa399d4eda02d6dd"}, +] [package.dependencies] gitdb = ">=4.0.1,<5" @@ -2206,28 +2968,34 @@ gitdb = ">=4.0.1,<5" name = "giturlparse" version = "0.11.1" description = "A Git URL parsing module (supports parsing and rewriting)" -category = "main" optional = false python-versions = ">=3.8" +files = [ + {file = "giturlparse-0.11.1-py2.py3-none-any.whl", hash = "sha256:6422f25c8ca563e1a3cb6b85862e48614be804cd1334e6d84be5630eb26b343f"}, + {file = "giturlparse-0.11.1.tar.gz", hash = "sha256:cdbe0c062096c69e00f08397826dddebc1f73bc15b793994579c13aafc70c990"}, +] [[package]] name = "google-api-core" version = "2.11.1" description = "Google API client core library" -category = "main" optional = false python-versions = ">=3.7" +files = [ + {file = "google-api-core-2.11.1.tar.gz", hash = "sha256:25d29e05a0058ed5f19c61c0a78b1b53adea4d9364b464d014fbda941f6d1c9a"}, + {file = "google_api_core-2.11.1-py3-none-any.whl", hash = "sha256:d92a5a92dc36dd4f4b9ee4e55528a90e432b059f93aee6ad857f9de8cc7ae94a"}, +] [package.dependencies] google-auth = ">=2.14.1,<3.0.dev0" googleapis-common-protos = ">=1.56.2,<2.0.dev0" grpcio = [ - {version = ">=1.33.2,<2.0dev", optional = true, markers = "extra == \"grpc\""}, - {version = ">=1.49.1,<2.0dev", optional = true, markers = "python_version >= \"3.11\""}, + {version = ">=1.33.2,<2.0dev", optional = true, markers = "python_version < \"3.11\" and extra == \"grpc\""}, + {version = ">=1.49.1,<2.0dev", optional = true, markers = "python_version >= \"3.11\" and extra == \"grpc\""}, ] grpcio-status = [ - {version = ">=1.33.2,<2.0.dev0", optional = true, markers = "extra == \"grpc\""}, - {version = ">=1.49.1,<2.0.dev0", optional = true, markers = "python_version >= \"3.11\""}, + {version = ">=1.33.2,<2.0.dev0", optional = true, markers = "python_version < \"3.11\" and extra == \"grpc\""}, + {version = ">=1.49.1,<2.0.dev0", optional = true, markers = "python_version >= \"3.11\" and extra == \"grpc\""}, ] protobuf = ">=3.19.5,<3.20.0 || >3.20.0,<3.20.1 || >3.20.1,<4.21.0 || >4.21.0,<4.21.1 || >4.21.1,<4.21.2 || >4.21.2,<4.21.3 || >4.21.3,<4.21.4 || >4.21.4,<4.21.5 || >4.21.5,<5.0.0.dev0" requests = ">=2.18.0,<3.0.0.dev0" @@ -2241,12 +3009,15 @@ grpcio-gcp = ["grpcio-gcp (>=0.2.2,<1.0.dev0)"] name = "google-api-python-client" version = "2.97.0" description = "Google API Client Library for Python" -category = "dev" optional = false python-versions = ">=3.7" +files = [ + {file = "google-api-python-client-2.97.0.tar.gz", hash = "sha256:48277291894876a1ca7ed4127e055e81f81e6343ced1b544a7200ae2c119dcd7"}, + {file = "google_api_python_client-2.97.0-py2.py3-none-any.whl", hash = "sha256:5215f4cd577753fc4192ccfbe0bb8b55d4bb5fd68fa6268ac5cf271b6305de31"}, +] [package.dependencies] -google-api-core = ">=1.31.5,<2.0.0 || >2.3.0,<3.0.0.dev0" +google-api-core = ">=1.31.5,<2.0.dev0 || >2.3.0,<3.0.0.dev0" google-auth = ">=1.19.0,<3.0.0.dev0" google-auth-httplib2 = ">=0.1.0" httplib2 = ">=0.15.0,<1.dev0" @@ -2256,9 +3027,12 @@ uritemplate = ">=3.0.1,<5" name = "google-auth" version = "2.22.0" description = "Google Authentication Library" -category = "main" optional = false python-versions = ">=3.6" +files = [ + {file = "google-auth-2.22.0.tar.gz", hash = "sha256:164cba9af4e6e4e40c3a4f90a1a6c12ee56f14c0b4868d1ca91b32826ab334ce"}, + {file = "google_auth-2.22.0-py2.py3-none-any.whl", hash = "sha256:d61d1b40897407b574da67da1a833bdc10d5a11642566e506565d1b1a46ba873"}, +] [package.dependencies] cachetools = ">=2.0.0,<6.0" @@ -2278,9 +3052,12 @@ requests = ["requests (>=2.20.0,<3.0.0.dev0)"] name = "google-auth-httplib2" version = "0.1.0" description = "Google Authentication Library: httplib2 transport" -category = "dev" optional = false python-versions = "*" +files = [ + {file = "google-auth-httplib2-0.1.0.tar.gz", hash = "sha256:a07c39fd632becacd3f07718dfd6021bf396978f03ad3ce4321d060015cc30ac"}, + {file = "google_auth_httplib2-0.1.0-py2.py3-none-any.whl", hash = "sha256:31e49c36c6b5643b57e82617cb3e021e3e1d2df9da63af67252c02fa9c1f4a10"}, +] [package.dependencies] google-auth = "*" @@ -2291,9 +3068,12 @@ six = "*" name = "google-auth-oauthlib" version = "1.0.0" description = "Google Authentication Library" -category = "main" optional = false python-versions = ">=3.6" +files = [ + {file = "google-auth-oauthlib-1.0.0.tar.gz", hash = "sha256:e375064964820b47221a7e1b7ee1fd77051b6323c3f9e3e19785f78ab67ecfc5"}, + {file = "google_auth_oauthlib-1.0.0-py2.py3-none-any.whl", hash = "sha256:95880ca704928c300f48194d1770cf5b1462835b6e49db61445a520f793fd5fb"}, +] [package.dependencies] google-auth = ">=2.15.0" @@ -2306,12 +3086,15 @@ tool = ["click (>=6.0.0)"] name = "google-cloud-bigquery" version = "3.11.4" description = "Google BigQuery API client library" -category = "main" optional = true python-versions = ">=3.7" +files = [ + {file = "google-cloud-bigquery-3.11.4.tar.gz", hash = "sha256:697df117241a2283bcbb93b21e10badc14e51c9a90800d2a7e1a3e1c7d842974"}, + {file = "google_cloud_bigquery-3.11.4-py2.py3-none-any.whl", hash = "sha256:5fa7897743a0ed949ade25a0942fc9e7557d8fce307c6f8a76d1b604cf27f1b1"}, +] [package.dependencies] -google-api-core = {version = ">=1.31.5,<2.0.0 || >2.3.0,<3.0.0dev", extras = ["grpc"]} +google-api-core = {version = ">=1.31.5,<2.0.dev0 || >2.3.0,<3.0.0dev", extras = ["grpc"]} google-cloud-core = ">=1.6.0,<3.0.0dev" google-resumable-media = ">=0.6.0,<3.0dev" grpcio = [ @@ -2338,12 +3121,15 @@ tqdm = ["tqdm (>=4.7.4,<5.0.0dev)"] name = "google-cloud-core" version = "2.3.3" description = "Google Cloud API client core library" -category = "main" optional = true python-versions = ">=3.7" +files = [ + {file = "google-cloud-core-2.3.3.tar.gz", hash = "sha256:37b80273c8d7eee1ae816b3a20ae43585ea50506cb0e60f3cf5be5f87f1373cb"}, + {file = "google_cloud_core-2.3.3-py2.py3-none-any.whl", hash = "sha256:fbd11cad3e98a7e5b0343dc07cb1039a5ffd7a5bb96e1f1e27cee4bda4a90863"}, +] [package.dependencies] -google-api-core = ">=1.31.6,<2.0.0 || >2.3.0,<3.0.0dev" +google-api-core = ">=1.31.6,<2.0.dev0 || >2.3.0,<3.0.0dev" google-auth = ">=1.25.0,<3.0dev" [package.extras] @@ -2353,12 +3139,15 @@ grpc = ["grpcio (>=1.38.0,<2.0dev)"] name = "google-cloud-dataproc" version = "5.4.3" description = "Google Cloud Dataproc API client library" -category = "main" optional = true python-versions = ">=3.7" +files = [ + {file = "google-cloud-dataproc-5.4.3.tar.gz", hash = "sha256:d9c77c52aa5ddf52ae657736dbfb5312402933f72bab8480fc2d2afe98697402"}, + {file = "google_cloud_dataproc-5.4.3-py2.py3-none-any.whl", hash = "sha256:9cfff56cb53621cdffd0a3d6b10701e886e0a8ad54891e6c223eb67c0ff753ad"}, +] [package.dependencies] -google-api-core = {version = ">=1.34.0,<2.0.0 || >=2.11.0,<3.0.0dev", extras = ["grpc"]} +google-api-core = {version = ">=1.34.0,<2.0.dev0 || >=2.11.dev0,<3.0.0dev", extras = ["grpc"]} grpc-google-iam-v1 = ">=0.12.4,<1.0.0dev" proto-plus = [ {version = ">=1.22.0,<2.0.0dev", markers = "python_version < \"3.11\""}, @@ -2370,12 +3159,15 @@ protobuf = ">=3.19.5,<3.20.0 || >3.20.0,<3.20.1 || >3.20.1,<4.21.0 || >4.21.0,<4 name = "google-cloud-storage" version = "2.10.0" description = "Google Cloud Storage API client library" -category = "main" optional = true python-versions = ">=3.7" +files = [ + {file = "google-cloud-storage-2.10.0.tar.gz", hash = "sha256:934b31ead5f3994e5360f9ff5750982c5b6b11604dc072bc452c25965e076dc7"}, + {file = "google_cloud_storage-2.10.0-py2.py3-none-any.whl", hash = "sha256:9433cf28801671de1c80434238fb1e7e4a1ba3087470e90f70c928ea77c2b9d7"}, +] [package.dependencies] -google-api-core = ">=1.31.5,<2.0.0 || >2.3.0,<3.0.0dev" +google-api-core = ">=1.31.5,<2.0.dev0 || >2.3.0,<3.0.0dev" google-auth = ">=1.25.0,<3.0dev" google-cloud-core = ">=2.3.0,<3.0dev" google-resumable-media = ">=2.3.2" @@ -2388,9 +3180,78 @@ protobuf = ["protobuf (<5.0.0dev)"] name = "google-crc32c" version = "1.5.0" description = "A python wrapper of the C library 'Google CRC32C'" -category = "main" optional = true python-versions = ">=3.7" +files = [ + {file = "google-crc32c-1.5.0.tar.gz", hash = "sha256:89284716bc6a5a415d4eaa11b1726d2d60a0cd12aadf5439828353662ede9dd7"}, + {file = "google_crc32c-1.5.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:596d1f98fc70232fcb6590c439f43b350cb762fb5d61ce7b0e9db4539654cc13"}, + {file = "google_crc32c-1.5.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:be82c3c8cfb15b30f36768797a640e800513793d6ae1724aaaafe5bf86f8f346"}, + {file = "google_crc32c-1.5.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:461665ff58895f508e2866824a47bdee72497b091c730071f2b7575d5762ab65"}, + {file = "google_crc32c-1.5.0-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e2096eddb4e7c7bdae4bd69ad364e55e07b8316653234a56552d9c988bd2d61b"}, + {file = "google_crc32c-1.5.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:116a7c3c616dd14a3de8c64a965828b197e5f2d121fedd2f8c5585c547e87b02"}, + {file = "google_crc32c-1.5.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:5829b792bf5822fd0a6f6eb34c5f81dd074f01d570ed7f36aa101d6fc7a0a6e4"}, + {file = "google_crc32c-1.5.0-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:64e52e2b3970bd891309c113b54cf0e4384762c934d5ae56e283f9a0afcd953e"}, + {file = "google_crc32c-1.5.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:02ebb8bf46c13e36998aeaad1de9b48f4caf545e91d14041270d9dca767b780c"}, + {file = "google_crc32c-1.5.0-cp310-cp310-win32.whl", hash = "sha256:2e920d506ec85eb4ba50cd4228c2bec05642894d4c73c59b3a2fe20346bd00ee"}, + {file = "google_crc32c-1.5.0-cp310-cp310-win_amd64.whl", hash = "sha256:07eb3c611ce363c51a933bf6bd7f8e3878a51d124acfc89452a75120bc436289"}, + {file = "google_crc32c-1.5.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:cae0274952c079886567f3f4f685bcaf5708f0a23a5f5216fdab71f81a6c0273"}, + {file = "google_crc32c-1.5.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:1034d91442ead5a95b5aaef90dbfaca8633b0247d1e41621d1e9f9db88c36298"}, + {file = "google_crc32c-1.5.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7c42c70cd1d362284289c6273adda4c6af8039a8ae12dc451dcd61cdabb8ab57"}, + {file = "google_crc32c-1.5.0-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:8485b340a6a9e76c62a7dce3c98e5f102c9219f4cfbf896a00cf48caf078d438"}, + {file = "google_crc32c-1.5.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:77e2fd3057c9d78e225fa0a2160f96b64a824de17840351b26825b0848022906"}, + {file = "google_crc32c-1.5.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:f583edb943cf2e09c60441b910d6a20b4d9d626c75a36c8fcac01a6c96c01183"}, + {file = "google_crc32c-1.5.0-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:a1fd716e7a01f8e717490fbe2e431d2905ab8aa598b9b12f8d10abebb36b04dd"}, + {file = "google_crc32c-1.5.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:72218785ce41b9cfd2fc1d6a017dc1ff7acfc4c17d01053265c41a2c0cc39b8c"}, + {file = "google_crc32c-1.5.0-cp311-cp311-win32.whl", hash = "sha256:66741ef4ee08ea0b2cc3c86916ab66b6aef03768525627fd6a1b34968b4e3709"}, + {file = "google_crc32c-1.5.0-cp311-cp311-win_amd64.whl", hash = "sha256:ba1eb1843304b1e5537e1fca632fa894d6f6deca8d6389636ee5b4797affb968"}, + {file = "google_crc32c-1.5.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:98cb4d057f285bd80d8778ebc4fde6b4d509ac3f331758fb1528b733215443ae"}, + {file = "google_crc32c-1.5.0-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fd8536e902db7e365f49e7d9029283403974ccf29b13fc7028b97e2295b33556"}, + {file = "google_crc32c-1.5.0-cp37-cp37m-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:19e0a019d2c4dcc5e598cd4a4bc7b008546b0358bd322537c74ad47a5386884f"}, + {file = "google_crc32c-1.5.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:02c65b9817512edc6a4ae7c7e987fea799d2e0ee40c53ec573a692bee24de876"}, + {file = "google_crc32c-1.5.0-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:6ac08d24c1f16bd2bf5eca8eaf8304812f44af5cfe5062006ec676e7e1d50afc"}, + {file = "google_crc32c-1.5.0-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:3359fc442a743e870f4588fcf5dcbc1bf929df1fad8fb9905cd94e5edb02e84c"}, + {file = "google_crc32c-1.5.0-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:1e986b206dae4476f41bcec1faa057851f3889503a70e1bdb2378d406223994a"}, + {file = "google_crc32c-1.5.0-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:de06adc872bcd8c2a4e0dc51250e9e65ef2ca91be023b9d13ebd67c2ba552e1e"}, + {file = "google_crc32c-1.5.0-cp37-cp37m-win32.whl", hash = "sha256:d3515f198eaa2f0ed49f8819d5732d70698c3fa37384146079b3799b97667a94"}, + {file = "google_crc32c-1.5.0-cp37-cp37m-win_amd64.whl", hash = "sha256:67b741654b851abafb7bc625b6d1cdd520a379074e64b6a128e3b688c3c04740"}, + {file = "google_crc32c-1.5.0-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:c02ec1c5856179f171e032a31d6f8bf84e5a75c45c33b2e20a3de353b266ebd8"}, + {file = "google_crc32c-1.5.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:edfedb64740750e1a3b16152620220f51d58ff1b4abceb339ca92e934775c27a"}, + {file = "google_crc32c-1.5.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:84e6e8cd997930fc66d5bb4fde61e2b62ba19d62b7abd7a69920406f9ecca946"}, + {file = "google_crc32c-1.5.0-cp38-cp38-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:024894d9d3cfbc5943f8f230e23950cd4906b2fe004c72e29b209420a1e6b05a"}, + {file = "google_crc32c-1.5.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:998679bf62b7fb599d2878aa3ed06b9ce688b8974893e7223c60db155f26bd8d"}, + {file = "google_crc32c-1.5.0-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:83c681c526a3439b5cf94f7420471705bbf96262f49a6fe546a6db5f687a3d4a"}, + {file = "google_crc32c-1.5.0-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:4c6fdd4fccbec90cc8a01fc00773fcd5fa28db683c116ee3cb35cd5da9ef6c37"}, + {file = "google_crc32c-1.5.0-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:5ae44e10a8e3407dbe138984f21e536583f2bba1be9491239f942c2464ac0894"}, + {file = "google_crc32c-1.5.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:37933ec6e693e51a5b07505bd05de57eee12f3e8c32b07da7e73669398e6630a"}, + {file = "google_crc32c-1.5.0-cp38-cp38-win32.whl", hash = "sha256:fe70e325aa68fa4b5edf7d1a4b6f691eb04bbccac0ace68e34820d283b5f80d4"}, + {file = "google_crc32c-1.5.0-cp38-cp38-win_amd64.whl", hash = "sha256:74dea7751d98034887dbd821b7aae3e1d36eda111d6ca36c206c44478035709c"}, + {file = "google_crc32c-1.5.0-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:c6c777a480337ac14f38564ac88ae82d4cd238bf293f0a22295b66eb89ffced7"}, + {file = "google_crc32c-1.5.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:759ce4851a4bb15ecabae28f4d2e18983c244eddd767f560165563bf9aefbc8d"}, + {file = "google_crc32c-1.5.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f13cae8cc389a440def0c8c52057f37359014ccbc9dc1f0827936bcd367c6100"}, + {file = "google_crc32c-1.5.0-cp39-cp39-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e560628513ed34759456a416bf86b54b2476c59144a9138165c9a1575801d0d9"}, + {file = "google_crc32c-1.5.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e1674e4307fa3024fc897ca774e9c7562c957af85df55efe2988ed9056dc4e57"}, + {file = "google_crc32c-1.5.0-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:278d2ed7c16cfc075c91378c4f47924c0625f5fc84b2d50d921b18b7975bd210"}, + {file = "google_crc32c-1.5.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:d5280312b9af0976231f9e317c20e4a61cd2f9629b7bfea6a693d1878a264ebd"}, + {file = "google_crc32c-1.5.0-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:8b87e1a59c38f275c0e3676fc2ab6d59eccecfd460be267ac360cc31f7bcde96"}, + {file = "google_crc32c-1.5.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:7c074fece789b5034b9b1404a1f8208fc2d4c6ce9decdd16e8220c5a793e6f61"}, + {file = "google_crc32c-1.5.0-cp39-cp39-win32.whl", hash = "sha256:7f57f14606cd1dd0f0de396e1e53824c371e9544a822648cd76c034d209b559c"}, + {file = "google_crc32c-1.5.0-cp39-cp39-win_amd64.whl", hash = "sha256:a2355cba1f4ad8b6988a4ca3feed5bff33f6af2d7f134852cf279c2aebfde541"}, + {file = "google_crc32c-1.5.0-pp37-pypy37_pp73-macosx_10_9_x86_64.whl", hash = "sha256:f314013e7dcd5cf45ab1945d92e713eec788166262ae8deb2cfacd53def27325"}, + {file = "google_crc32c-1.5.0-pp37-pypy37_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3b747a674c20a67343cb61d43fdd9207ce5da6a99f629c6e2541aa0e89215bcd"}, + {file = "google_crc32c-1.5.0-pp37-pypy37_pp73-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:8f24ed114432de109aa9fd317278518a5af2d31ac2ea6b952b2f7782b43da091"}, + {file = "google_crc32c-1.5.0-pp37-pypy37_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b8667b48e7a7ef66afba2c81e1094ef526388d35b873966d8a9a447974ed9178"}, + {file = "google_crc32c-1.5.0-pp37-pypy37_pp73-win_amd64.whl", hash = "sha256:1c7abdac90433b09bad6c43a43af253e688c9cfc1c86d332aed13f9a7c7f65e2"}, + {file = "google_crc32c-1.5.0-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:6f998db4e71b645350b9ac28a2167e6632c239963ca9da411523bb439c5c514d"}, + {file = "google_crc32c-1.5.0-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9c99616c853bb585301df6de07ca2cadad344fd1ada6d62bb30aec05219c45d2"}, + {file = "google_crc32c-1.5.0-pp38-pypy38_pp73-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2ad40e31093a4af319dadf503b2467ccdc8f67c72e4bcba97f8c10cb078207b5"}, + {file = "google_crc32c-1.5.0-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cd67cf24a553339d5062eff51013780a00d6f97a39ca062781d06b3a73b15462"}, + {file = "google_crc32c-1.5.0-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:398af5e3ba9cf768787eef45c803ff9614cc3e22a5b2f7d7ae116df8b11e3314"}, + {file = "google_crc32c-1.5.0-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:b1f8133c9a275df5613a451e73f36c2aea4fe13c5c8997e22cf355ebd7bd0728"}, + {file = "google_crc32c-1.5.0-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9ba053c5f50430a3fcfd36f75aff9caeba0440b2d076afdb79a318d6ca245f88"}, + {file = "google_crc32c-1.5.0-pp39-pypy39_pp73-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:272d3892a1e1a2dbc39cc5cde96834c236d5327e2122d3aaa19f6614531bb6eb"}, + {file = "google_crc32c-1.5.0-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:635f5d4dd18758a1fbd1049a8e8d2fee4ffed124462d837d1a02a0e009c3ab31"}, + {file = "google_crc32c-1.5.0-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:c672d99a345849301784604bfeaeba4db0c7aae50b95be04dd651fd2a7310b93"}, +] [package.extras] testing = ["pytest"] @@ -2399,32 +3260,246 @@ testing = ["pytest"] name = "google-re2" version = "1.1" description = "RE2 Python bindings" -category = "dev" optional = false python-versions = "~=3.8" - -[[package]] -name = "google-resumable-media" -version = "2.5.0" -description = "Utilities for Google Media Downloads and Resumable Uploads" -category = "main" -optional = true -python-versions = ">= 3.7" - -[package.dependencies] -google-crc32c = ">=1.0,<2.0dev" - -[package.extras] -aiohttp = ["aiohttp (>=3.6.2,<4.0.0dev)"] -requests = ["requests (>=2.18.0,<3.0.0dev)"] - -[[package]] +files = [ + {file = "google-re2-1.1.tar.gz", hash = "sha256:d3a9467ee52b46ac77ca928f6d0cbeaccfd92f03ca0f0f65b9df6a95184f3a1c"}, + {file = "google_re2-1.1-1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:874d2e36dfa506b03d4f9c4aef1701a65304f4004c96c7edac7d8aea08fe193e"}, + {file = "google_re2-1.1-1-cp310-cp310-macosx_11_0_x86_64.whl", hash = "sha256:b66eb84850afdce09aabca40bcd6f2a0e96178a1b4990d555678edb1f59bf255"}, + {file = "google_re2-1.1-1-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:c461640a07db26dc2b51f43de607b7520e7debaf4f6a000f796a3c0196ca52af"}, + {file = "google_re2-1.1-1-cp310-cp310-macosx_12_0_x86_64.whl", hash = "sha256:7f9ba69eaee6e7a9f5ddfb919bf1a866af14a18b26a179e3fb1a6fe3d0cbf349"}, + {file = "google_re2-1.1-1-cp310-cp310-macosx_13_0_arm64.whl", hash = "sha256:f95cf16739cc3ea63728366881221b119f2322b4b739b7da6522d45a68792cea"}, + {file = "google_re2-1.1-1-cp310-cp310-macosx_13_0_x86_64.whl", hash = "sha256:9fb56a41250191298e6a2859b0fdea1e83330c9870fe8d84e5836c506ae46e96"}, + {file = "google_re2-1.1-1-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:fb22ea995564d87baf4a4bfbb3ca024be913683a710f4f0dc9c94dc663afab20"}, + {file = "google_re2-1.1-1-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:19b3f0bfbb2a2ca58ed0aaa9356d07a5c0921383a6dbeca086b2b74472f5ee08"}, + {file = "google_re2-1.1-1-cp310-cp310-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:34fd7f97b84af7453cf05b25adfe2491ba3cef1ca548ac2907efa63d3510954d"}, + {file = "google_re2-1.1-1-cp310-cp310-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:3e029664192d8d30f7c977706183ef483e82ca239302272df74e01d2e22897ca"}, + {file = "google_re2-1.1-1-cp310-cp310-win32.whl", hash = "sha256:41a8f222f9839d059d37efd28e4deec203502d7e39c3759d83d6a33deadf1d2e"}, + {file = "google_re2-1.1-1-cp310-cp310-win_amd64.whl", hash = "sha256:6141d569fdf72aa693f040ba05c469036587395af07ff419b9a3c009d6ffefd3"}, + {file = "google_re2-1.1-1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:a2d03f6aaf22788ba13a770f0d183b8eebe55545bcbb6e4c41dcccac7ded014d"}, + {file = "google_re2-1.1-1-cp311-cp311-macosx_11_0_x86_64.whl", hash = "sha256:a98f15fd9c31bf80d368698447191a2e9703880b305dbf34d9a63ce634b8a557"}, + {file = "google_re2-1.1-1-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:42128916cc2966623832aabbd224c88e862d1c531d6bc49ab141f565e6321a90"}, + {file = "google_re2-1.1-1-cp311-cp311-macosx_12_0_x86_64.whl", hash = "sha256:6e27986a166903ad7000635f6faed8ab5072d687f822ac9f692c40b2470aebcf"}, + {file = "google_re2-1.1-1-cp311-cp311-macosx_13_0_arm64.whl", hash = "sha256:5e9edcd743a830d0c0b2729201e42ab86fceef8f4086df65563f482e4544359e"}, + {file = "google_re2-1.1-1-cp311-cp311-macosx_13_0_x86_64.whl", hash = "sha256:d33145bbfd32e916f1c911cd9225be5364a36c3959742a0cc4dfc0692d6a2a5e"}, + {file = "google_re2-1.1-1-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:8b27cc2544b69a357ab2a749dc0c13a1b9055198c56f4c2c3b0f61d693f8e203"}, + {file = "google_re2-1.1-1-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:3cdf8982b6def987e95b37984d0c1c878de32635dd78acde3273f730b69708c9"}, + {file = "google_re2-1.1-1-cp311-cp311-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:71ac661a7365e134741fe5542f13d7ce1e6187446b96ddee4c8b7d153fc8f05a"}, + {file = "google_re2-1.1-1-cp311-cp311-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:35a902ba31a71a3e9e114e44473624d9aa9f9b85ec981bfa91671aefe0ef1a6c"}, + {file = "google_re2-1.1-1-cp311-cp311-win32.whl", hash = "sha256:9469f26b485da2784c658e687a766c72e1a17b1e63b3ed24b5f64c3d19fbae3d"}, + {file = "google_re2-1.1-1-cp311-cp311-win_amd64.whl", hash = "sha256:07dd0780240ee431781119b46c3bbf76f5cef24a2cbb542f6a08c643e0a68d98"}, + {file = "google_re2-1.1-1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:9857dc4d69b8025057c8129e98406a24d51bdaf1b96e481dbba7e69e0ec85104"}, + {file = "google_re2-1.1-1-cp38-cp38-macosx_11_0_x86_64.whl", hash = "sha256:a6eaaa5f200022eb0bdded5949c91454fc96e1edd6f9e9a96dd1dc32c821c00e"}, + {file = "google_re2-1.1-1-cp38-cp38-macosx_12_0_arm64.whl", hash = "sha256:a32bb2afe128d90b8edc20d4f7d297f7e2753206eba92937a57e5280736eac74"}, + {file = "google_re2-1.1-1-cp38-cp38-macosx_12_0_x86_64.whl", hash = "sha256:4f2754616c61b76ab4e5a4f39892a52a00897203b859c5abd7e3c630dd883cda"}, + {file = "google_re2-1.1-1-cp38-cp38-macosx_13_0_arm64.whl", hash = "sha256:b110f3d657e8f67a43a699d327ce47095b80180ea1118e2de44cb5c7002503d9"}, + {file = "google_re2-1.1-1-cp38-cp38-macosx_13_0_x86_64.whl", hash = "sha256:fd62ba2853eef65e249a9c4437a9ecac568222062bc956f0c61a3d1151a6271b"}, + {file = "google_re2-1.1-1-cp38-cp38-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:23b50eb74dc3e1d480b04b987c61242df5dade50d08bc16e25eb3582b83fca80"}, + {file = "google_re2-1.1-1-cp38-cp38-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:e1bde89855dd5ab0811187d21eec149975510c80e865c771c883524a452445e7"}, + {file = "google_re2-1.1-1-cp38-cp38-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:10c6cddc720151a509beb98ab310fa0cc8bcb265f83518ebf831de2c9ff73af0"}, + {file = "google_re2-1.1-1-cp38-cp38-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:9bea09c5e8401ec50b8f211bc820ec2f0ca5e744ac67431a1b39bdacbd266553"}, + {file = "google_re2-1.1-1-cp38-cp38-win32.whl", hash = "sha256:ffa51b118037518bcdf63c7649d0b4be7071982b83f48ee3bbabf24a9cb48f8a"}, + {file = "google_re2-1.1-1-cp38-cp38-win_amd64.whl", hash = "sha256:3b47715b6d43c9351957eb5092ad0fa625d04106d81f34cb8a726c53395ad474"}, + {file = "google_re2-1.1-1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:998f31bf7efbc9bb603d0c356c1c77e5331f689c71783df8e21e67bb025fc66a"}, + {file = "google_re2-1.1-1-cp39-cp39-macosx_11_0_x86_64.whl", hash = "sha256:0b5f0eaab859d3ba5f462c82bf37ab56e9d37e19b40b5898c731dbe4213a85f7"}, + {file = "google_re2-1.1-1-cp39-cp39-macosx_12_0_arm64.whl", hash = "sha256:f6d591d9c4cbc7142b729ddcc3f654d059d8ebc3bc95891198808a4785a6b4d8"}, + {file = "google_re2-1.1-1-cp39-cp39-macosx_12_0_x86_64.whl", hash = "sha256:3c325c2eae197b423330a04ab62e2e1cf942676cd5560907db4d63e23ce0648a"}, + {file = "google_re2-1.1-1-cp39-cp39-macosx_13_0_arm64.whl", hash = "sha256:1e019e8f57955806ee843254ce454249b58800a6e872b2c8e9df2ef3459de0d5"}, + {file = "google_re2-1.1-1-cp39-cp39-macosx_13_0_x86_64.whl", hash = "sha256:58ebbcc7ad2b639768a6bca586357291660ea40dfac83039208e5055c357513b"}, + {file = "google_re2-1.1-1-cp39-cp39-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:723f8553e7fc022294071f14fb7dfc7958c365dc7d4a71d4938ccd2df8c6eca4"}, + {file = "google_re2-1.1-1-cp39-cp39-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:d81512b08e6787fc8ef29fea365d3fdbf957553a625550e1d96c36877ae30355"}, + {file = "google_re2-1.1-1-cp39-cp39-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c58601b155651cc572a23ee2860788c77581aad85d3567a55b89b0674702f34d"}, + {file = "google_re2-1.1-1-cp39-cp39-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:9c6c9f64b9724ec38da8e514f404ac64e9a6a5e8b1d7031c2dadd05c1f4c16fd"}, + {file = "google_re2-1.1-1-cp39-cp39-win32.whl", hash = "sha256:d1b751b9ab9f8e2ab2a36d72b909281ce65f328c9115a1685acae1a2d1afd7a4"}, + {file = "google_re2-1.1-1-cp39-cp39-win_amd64.whl", hash = "sha256:ac775c75cec7069351d201da4e0fb0cae4c1c5ebecd08fa34e1be89740c1d80b"}, + {file = "google_re2-1.1-2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:5eaefe4705b75ca5f78178a50104b689e9282f868e12f119b26b4cffc0c7ee6e"}, + {file = "google_re2-1.1-2-cp310-cp310-macosx_11_0_x86_64.whl", hash = "sha256:e35f2c8aabfaaa4ce6420b3cae86c0c29042b1b4f9937254347e9b985694a171"}, + {file = "google_re2-1.1-2-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:35fd189cbaaaa39c9a6a8a00164c8d9c709bacd0c231c694936879609beff516"}, + {file = "google_re2-1.1-2-cp310-cp310-macosx_12_0_x86_64.whl", hash = "sha256:60475d222cebd066c80414831c8a42aa2449aab252084102ee05440896586e6a"}, + {file = "google_re2-1.1-2-cp310-cp310-macosx_13_0_arm64.whl", hash = "sha256:871cb85b9b0e1784c983b5c148156b3c5314cb29ca70432dff0d163c5c08d7e5"}, + {file = "google_re2-1.1-2-cp310-cp310-macosx_13_0_x86_64.whl", hash = "sha256:94f4e66e34bdb8de91ec6cdf20ba4fa9fea1dfdcfb77ff1f59700d01a0243664"}, + {file = "google_re2-1.1-2-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:1563577e2b720d267c4cffacc0f6a2b5c8480ea966ebdb1844fbea6602c7496f"}, + {file = "google_re2-1.1-2-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:49b7964532a801b96062d78c0222d155873968f823a546a3dbe63d73f25bb56f"}, + {file = "google_re2-1.1-2-cp310-cp310-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:2362fd70eb639a75fd0187d28b4ba7b20b3088833d8ad7ffd8693d0ba159e1c2"}, + {file = "google_re2-1.1-2-cp310-cp310-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:86b80719636a4e21391e20a9adf18173ee6ae2ec956726fe2ff587417b5e8ba6"}, + {file = "google_re2-1.1-2-cp310-cp310-win32.whl", hash = "sha256:5456fba09df951fe8d1714474ed1ecda102a68ddffab0113e6c117d2e64e6f2b"}, + {file = "google_re2-1.1-2-cp310-cp310-win_amd64.whl", hash = "sha256:2ac6936a3a60d8d9de9563e90227b3aea27068f597274ca192c999a12d8baa8f"}, + {file = "google_re2-1.1-2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:d5a87b436028ec9b0f02fe19d4cbc19ef30441085cdfcdf1cce8fbe5c4bd5e9a"}, + {file = "google_re2-1.1-2-cp311-cp311-macosx_11_0_x86_64.whl", hash = "sha256:fc0d4163de9ed2155a77e7a2d59d94c348a6bbab3cff88922fab9e0d3d24faec"}, + {file = "google_re2-1.1-2-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:48b12d953bc796736e7831d67b36892fb6419a4cc44cb16521fe291e594bfe23"}, + {file = "google_re2-1.1-2-cp311-cp311-macosx_12_0_x86_64.whl", hash = "sha256:62c780c927cff98c1538439f0ff616f48a9b2e8837c676f53170d8ae5b9e83cb"}, + {file = "google_re2-1.1-2-cp311-cp311-macosx_13_0_arm64.whl", hash = "sha256:04b2aefd768aa4edeef8b273327806c9cb0b82e90ff52eacf5d11003ac7a0db2"}, + {file = "google_re2-1.1-2-cp311-cp311-macosx_13_0_x86_64.whl", hash = "sha256:9c90175992346519ee7546d9af9a64541c05b6b70346b0ddc54a48aa0d3b6554"}, + {file = "google_re2-1.1-2-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:22ad9ad9d125249d6386a2e80efb9de7af8260b703b6be7fa0ab069c1cf56ced"}, + {file = "google_re2-1.1-2-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:f70971f6ffe5254e476e71d449089917f50ebf9cf60f9cec80975ab1693777e2"}, + {file = "google_re2-1.1-2-cp311-cp311-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:f267499529e64a4abed24c588f355ebe4700189d434d84a7367725f5a186e48d"}, + {file = "google_re2-1.1-2-cp311-cp311-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b632eff5e4cd44545a9c0e52f2e1becd55831e25f4dd4e0d7ec8ee6ca50858c1"}, + {file = "google_re2-1.1-2-cp311-cp311-win32.whl", hash = "sha256:a42c733036e8f242ee4e5f0e27153ad4ca44ced9e4ce82f3972938ddee528db0"}, + {file = "google_re2-1.1-2-cp311-cp311-win_amd64.whl", hash = "sha256:64f8eed4ca96905d99b5286b3d14b5ca4f6a025ff3c1351626a7df2f93ad1ddd"}, + {file = "google_re2-1.1-2-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:5541efcca5b5faf7e0d882334a04fa479bad4e7433f94870f46272eec0672c4a"}, + {file = "google_re2-1.1-2-cp38-cp38-macosx_11_0_x86_64.whl", hash = "sha256:92309af35b6eb2d3b3dc57045cdd83a76370958ab3e0edd2cc4638f6d23f5b32"}, + {file = "google_re2-1.1-2-cp38-cp38-macosx_12_0_arm64.whl", hash = "sha256:197cd9bcaba96d18c5bf84d0c32fca7a26c234ea83b1d3083366f4392cb99f78"}, + {file = "google_re2-1.1-2-cp38-cp38-macosx_12_0_x86_64.whl", hash = "sha256:1b896f171d29b541256cf26e10dccc9103ac1894683914ed88828ca6facf8dca"}, + {file = "google_re2-1.1-2-cp38-cp38-macosx_13_0_arm64.whl", hash = "sha256:e022d3239b945014e916ca7120fee659b246ec26c301f9e0542f1a19b38a8744"}, + {file = "google_re2-1.1-2-cp38-cp38-macosx_13_0_x86_64.whl", hash = "sha256:2c73f8a9440873b68bee1198094377501065e85aaf6fcc0d2512c7589ffa06ca"}, + {file = "google_re2-1.1-2-cp38-cp38-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:901d86555bd7725506d651afaba7d71cd4abd13260aed6cfd7c641a45f76d4f6"}, + {file = "google_re2-1.1-2-cp38-cp38-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:ce4710ff636701cfb56eb91c19b775d53b03749a23b7d2a5071bbbf4342a9067"}, + {file = "google_re2-1.1-2-cp38-cp38-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:76a20e5ebdf5bc5d430530197e42a2eeb562f729d3a3fb51f39168283d676e66"}, + {file = "google_re2-1.1-2-cp38-cp38-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:77c9f4d4bb1c8de9d2642d3c4b8b615858ba764df025b3b4f1310266f8def269"}, + {file = "google_re2-1.1-2-cp38-cp38-win32.whl", hash = "sha256:94bd60785bf37ef130a1613738e3c39465a67eae3f3be44bb918540d39b68da3"}, + {file = "google_re2-1.1-2-cp38-cp38-win_amd64.whl", hash = "sha256:59efeb77c0dcdbe37794c61f29c5b1f34bc06e8ec309a111ccdd29d380644d70"}, + {file = "google_re2-1.1-2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:221e38c27e1dd9ccb8e911e9c7aed6439f68ce81e7bb74001076830b0d6e931d"}, + {file = "google_re2-1.1-2-cp39-cp39-macosx_11_0_x86_64.whl", hash = "sha256:d9145879e6c2e1b814445300b31f88a675e1f06c57564670d95a1442e8370c27"}, + {file = "google_re2-1.1-2-cp39-cp39-macosx_12_0_arm64.whl", hash = "sha256:c8a12f0740e2a52826bdbf95569a4b0abdf413b4012fa71e94ad25dd4715c6e5"}, + {file = "google_re2-1.1-2-cp39-cp39-macosx_12_0_x86_64.whl", hash = "sha256:9c9998f71466f4db7bda752aa7c348b2881ff688e361108fe500caad1d8b9cb2"}, + {file = "google_re2-1.1-2-cp39-cp39-macosx_13_0_arm64.whl", hash = "sha256:0c39f69b702005963a3d3bf78743e1733ad73efd7e6e8465d76e3009e4694ceb"}, + {file = "google_re2-1.1-2-cp39-cp39-macosx_13_0_x86_64.whl", hash = "sha256:6d0ce762dee8d6617d0b1788a9653e805e83a23046c441d0ea65f1e27bf84114"}, + {file = "google_re2-1.1-2-cp39-cp39-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:ecf3619d98c9b4a7844ab52552ad32597cdbc9a5bdbc7e3435391c653600d1e2"}, + {file = "google_re2-1.1-2-cp39-cp39-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:9a1426a8cbd1fa004974574708d496005bd379310c4b1c7012be4bc75efde7a8"}, + {file = "google_re2-1.1-2-cp39-cp39-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:a1a30626ba48b4070f3eab272d860ef1952e710b088792c4d68dddb155be6bfc"}, + {file = "google_re2-1.1-2-cp39-cp39-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:1b9c1ffcfbc3095b6ff601ec2d2bf662988f6ea6763bc1c9d52bec55881f8fde"}, + {file = "google_re2-1.1-2-cp39-cp39-win32.whl", hash = "sha256:32ecf995a252c0548404c1065ba4b36f1e524f1f4a86b6367a1a6c3da3801e30"}, + {file = "google_re2-1.1-2-cp39-cp39-win_amd64.whl", hash = "sha256:e7865410f3b112a3609739283ec3f4f6f25aae827ff59c6bfdf806fd394d753e"}, + {file = "google_re2-1.1-3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:3b21f83f0a201009c56f06fcc7294a33555ede97130e8a91b3f4cae01aed1d73"}, + {file = "google_re2-1.1-3-cp310-cp310-macosx_11_0_x86_64.whl", hash = "sha256:b38194b91354a38db1f86f25d09cdc6ac85d63aee4c67b43da3048ce637adf45"}, + {file = "google_re2-1.1-3-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:e7da3da8d6b5a18d6c3b61b11cc5b66b8564eaedce99d2312b15b6487730fc76"}, + {file = "google_re2-1.1-3-cp310-cp310-macosx_12_0_x86_64.whl", hash = "sha256:aeca656fb10d8638f245331aabab59c9e7e051ca974b366dd79e6a9efb12e401"}, + {file = "google_re2-1.1-3-cp310-cp310-macosx_13_0_arm64.whl", hash = "sha256:2069d6dc94f5fa14a159bf99cad2f11e9c0f8ec3b7f44a4dde9e59afe5d1c786"}, + {file = "google_re2-1.1-3-cp310-cp310-macosx_13_0_x86_64.whl", hash = "sha256:2319a39305a4931cb5251451f2582713418a19bef2af7adf9e2a7a0edd939b99"}, + {file = "google_re2-1.1-3-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:eb98fc131699756c6d86246f670a5e1c1cc1ba85413c425ad344cb30479b246c"}, + {file = "google_re2-1.1-3-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:a6e038986d8ffe4e269f8532f03009f229d1f6018d4ac0dabc8aff876338f6e0"}, + {file = "google_re2-1.1-3-cp310-cp310-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:8618343ee658310e0f53bf586fab7409de43ce82bf8d9f7eb119536adc9783fd"}, + {file = "google_re2-1.1-3-cp310-cp310-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d8140ca861cfe00602319cefe2c7b8737b379eb07fb328b51dc44584f47a2718"}, + {file = "google_re2-1.1-3-cp310-cp310-win32.whl", hash = "sha256:41f439c5c54e8a3a0a1fa2dbd1e809d3f643f862df7b16dd790f36a1238a272e"}, + {file = "google_re2-1.1-3-cp310-cp310-win_amd64.whl", hash = "sha256:fe20e97a33176d96d3e4b5b401de35182b9505823abea51425ec011f53ef5e56"}, + {file = "google_re2-1.1-3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:7c39ff52b1765db039f690ee5b7b23919d8535aae94db7996079fbde0098c4d7"}, + {file = "google_re2-1.1-3-cp311-cp311-macosx_11_0_x86_64.whl", hash = "sha256:5420be674fd164041639ba4c825450f3d4bd635572acdde16b3dcd697f8aa3ef"}, + {file = "google_re2-1.1-3-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:ff53881cf1ce040f102a42d39db93c3f835f522337ae9c79839a842f26d97733"}, + {file = "google_re2-1.1-3-cp311-cp311-macosx_12_0_x86_64.whl", hash = "sha256:8d04600b0b53523118df2e413a71417c408f20dee640bf07dfab601c96a18a77"}, + {file = "google_re2-1.1-3-cp311-cp311-macosx_13_0_arm64.whl", hash = "sha256:c4835d4849faa34a7fa1074098d81c420ed6c0707a3772482b02ce14f2a7c007"}, + {file = "google_re2-1.1-3-cp311-cp311-macosx_13_0_x86_64.whl", hash = "sha256:3309a9b81251d35fee15974d0ae0581a9a375266deeafdc3a3ac0d172a742357"}, + {file = "google_re2-1.1-3-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:e2b51cafee7e0bc72d0a4a454547bd8f257cde412ac9f1a2dc46a203b5e42cf4"}, + {file = "google_re2-1.1-3-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:83f5f1cb52f832c2297d271ee8c56cf5e9053448162e5d2223d513f729bad908"}, + {file = "google_re2-1.1-3-cp311-cp311-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:55865a1ace92be3f7953b2e2b38b901d8074a367aa491daee43260a53a7fc6f0"}, + {file = "google_re2-1.1-3-cp311-cp311-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:cec2167dd142e583e98c783bd0d28b8cf5a9cdbe1f7407ba4163fe3ccb613cb9"}, + {file = "google_re2-1.1-3-cp311-cp311-win32.whl", hash = "sha256:a0bc1fe96849e4eb8b726d0bba493f5b989372243b32fe20729cace02e5a214d"}, + {file = "google_re2-1.1-3-cp311-cp311-win_amd64.whl", hash = "sha256:e6310a156db96fc5957cb007dd2feb18476898654530683897469447df73a7cd"}, + {file = "google_re2-1.1-3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:8e63cd10ea006088b320e8c5d308da1f6c87aa95138a71c60dd7ca1c8e91927e"}, + {file = "google_re2-1.1-3-cp312-cp312-macosx_11_0_x86_64.whl", hash = "sha256:12b566830a334178733a85e416b1e0507dbc0ceb322827616fe51ef56c5154f1"}, + {file = "google_re2-1.1-3-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:442e18c9d46b225c1496919c16eafe8f8d9bb4091b00b4d3440da03c55bbf4ed"}, + {file = "google_re2-1.1-3-cp312-cp312-macosx_12_0_x86_64.whl", hash = "sha256:c54c00263a9c39b2dacd93e9636319af51e3cf885c080b9680a9631708326460"}, + {file = "google_re2-1.1-3-cp312-cp312-macosx_13_0_arm64.whl", hash = "sha256:15a3caeeb327bc22e0c9f95eb76890fec8874cacccd2b01ff5c080ab4819bbec"}, + {file = "google_re2-1.1-3-cp312-cp312-macosx_13_0_x86_64.whl", hash = "sha256:59ec0d2cced77f715d41f6eafd901f6b15c11e28ba25fe0effdc1de554d78e75"}, + {file = "google_re2-1.1-3-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:185bf0e3441aed3840590f8e42f916e2920d235eb14df2cbc2049526803d3e71"}, + {file = "google_re2-1.1-3-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:586d3f2014eea5be14d8de53374d9b79fa99689160e00efa64b5fe93af326087"}, + {file = "google_re2-1.1-3-cp312-cp312-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:cc2575082de4ffd234d9607f3ae67ca22b15a1a88793240e2045f3b3a36a5795"}, + {file = "google_re2-1.1-3-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:59c5ad438eddb3630def394456091284d7bbc5b89351987f94f3792d296d1f96"}, + {file = "google_re2-1.1-3-cp312-cp312-win32.whl", hash = "sha256:5b9878c53f2bf16f75bf71d4ddd57f6611351408d5821040e91c53ebdf82c373"}, + {file = "google_re2-1.1-3-cp312-cp312-win_amd64.whl", hash = "sha256:4fdecfeb213110d0a85bad335a8e7cdb59fea7de81a4fe659233f487171980f9"}, + {file = "google_re2-1.1-3-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:2dd87bacab32b709c28d0145fe75a956b6a39e28f0726d867375dba5721c76c1"}, + {file = "google_re2-1.1-3-cp38-cp38-macosx_11_0_x86_64.whl", hash = "sha256:55d24c61fe35dddc1bb484593a57c9f60f9e66d7f31f091ef9608ed0b6dde79f"}, + {file = "google_re2-1.1-3-cp38-cp38-macosx_12_0_arm64.whl", hash = "sha256:a0cf1180d908622df648c26b0cd09281f92129805ccc56a39227fdbfeab95cb4"}, + {file = "google_re2-1.1-3-cp38-cp38-macosx_12_0_x86_64.whl", hash = "sha256:09586f07f3f88d432265c75976da1c619ab7192cd7ebdf53f4ae0776c19e4b56"}, + {file = "google_re2-1.1-3-cp38-cp38-macosx_13_0_arm64.whl", hash = "sha256:539f1b053402203576e919a06749198da4ae415931ee28948a1898131ae932ce"}, + {file = "google_re2-1.1-3-cp38-cp38-macosx_13_0_x86_64.whl", hash = "sha256:abf0bcb5365b0e27a5a23f3da403dffdbbac2c0e3a3f1535a8b10cc121b5d5fb"}, + {file = "google_re2-1.1-3-cp38-cp38-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:19c83e5bbed7958213eeac3aa71c506525ce54faf03e07d0b96cd0a764890511"}, + {file = "google_re2-1.1-3-cp38-cp38-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:3348e77330ff672dc44ec01894fa5d93c409a532b6d688feac55e714e9059920"}, + {file = "google_re2-1.1-3-cp38-cp38-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:06b63edb57c5ce5a13eabfd71155e346b9477dc8906dec7c580d4f70c16a7e0d"}, + {file = "google_re2-1.1-3-cp38-cp38-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:12fe57ba2914092b83338d61d8def9ebd5a2bd0fd8679eceb5d4c2748105d5c0"}, + {file = "google_re2-1.1-3-cp38-cp38-win32.whl", hash = "sha256:80796e08d24e606e675019fe8de4eb5c94bb765be13c384f2695247d54a6df75"}, + {file = "google_re2-1.1-3-cp38-cp38-win_amd64.whl", hash = "sha256:3c2257dedfe7cc5deb6791e563af9e071a9d414dad89e37ac7ad22f91be171a9"}, + {file = "google_re2-1.1-3-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:43a0cd77c87c894f28969ac622f94b2e6d1571261dfdd785026848a25cfdc9b9"}, + {file = "google_re2-1.1-3-cp39-cp39-macosx_11_0_x86_64.whl", hash = "sha256:1038990b77fd66f279bd66a0832b67435ea925e15bb59eafc7b60fdec812b616"}, + {file = "google_re2-1.1-3-cp39-cp39-macosx_12_0_arm64.whl", hash = "sha256:fb5dda6875d18dd45f0f24ebced6d1f7388867c8fb04a235d1deab7ea479ce38"}, + {file = "google_re2-1.1-3-cp39-cp39-macosx_12_0_x86_64.whl", hash = "sha256:bb1d164965c6d57a351b421d2f77c051403766a8b75aaa602324ee2451fff77f"}, + {file = "google_re2-1.1-3-cp39-cp39-macosx_13_0_arm64.whl", hash = "sha256:a072ebfa495051d07ffecbf6ce21eb84793568d5c3c678c00ed8ff6b8066ab31"}, + {file = "google_re2-1.1-3-cp39-cp39-macosx_13_0_x86_64.whl", hash = "sha256:4eb66c8398c8a510adc97978d944b3b29c91181237218841ea1a91dc39ec0e54"}, + {file = "google_re2-1.1-3-cp39-cp39-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:f7c8b57b1f559553248d1757b7fa5b2e0cc845666738d155dff1987c2618264e"}, + {file = "google_re2-1.1-3-cp39-cp39-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:9162f6aa4f25453c682eb176f21b8e2f40205be9f667e98a54b3e1ff10d6ee75"}, + {file = "google_re2-1.1-3-cp39-cp39-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:a2d65ddf67fd7bf94705626871d463057d3d9a3538d41022f95b9d8f01df36e1"}, + {file = "google_re2-1.1-3-cp39-cp39-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d140c7b9395b4d1e654127aa1c99bcc603ed01000b7bc7e28c52562f1894ec12"}, + {file = "google_re2-1.1-3-cp39-cp39-win32.whl", hash = "sha256:80c5fc200f64b2d903eeb07b8d6cefc620a872a0240c7caaa9aca05b20f5568f"}, + {file = "google_re2-1.1-3-cp39-cp39-win_amd64.whl", hash = "sha256:9eb6dbcee9b5dc4069bbc0634f2eb039ca524a14bed5868fdf6560aaafcbca06"}, + {file = "google_re2-1.1-4-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:0db114d7e1aa96dbcea452a40136d7d747d60cbb61394965774688ef59cccd4e"}, + {file = "google_re2-1.1-4-cp310-cp310-macosx_12_0_x86_64.whl", hash = "sha256:82133958e003a1344e5b7a791b9a9dd7560b5c8f96936dbe16f294604524a633"}, + {file = "google_re2-1.1-4-cp310-cp310-macosx_13_0_arm64.whl", hash = "sha256:9e74fd441d1f3d917d3303e319f61b82cdbd96b9a5ba919377a6eef1504a1e2b"}, + {file = "google_re2-1.1-4-cp310-cp310-macosx_13_0_x86_64.whl", hash = "sha256:734a2e7a4541c57253b5ebee24f3f3366ba3658bcad01da25fb623c78723471a"}, + {file = "google_re2-1.1-4-cp310-cp310-macosx_14_0_arm64.whl", hash = "sha256:d88d5eecbc908abe16132456fae13690d0508f3ac5777f320ef95cb6cab9a961"}, + {file = "google_re2-1.1-4-cp310-cp310-macosx_14_0_x86_64.whl", hash = "sha256:b91db80b171ecec435a07977a227757dd487356701a32f556fa6fca5d0a40522"}, + {file = "google_re2-1.1-4-cp310-cp310-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6b23129887a64bb9948af14c84705273ed1a40054e99433b4acccab4dcf6a226"}, + {file = "google_re2-1.1-4-cp310-cp310-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:5dc1a0cc7cd19261dcaf76763e2499305dbb7e51dc69555167cdb8af98782698"}, + {file = "google_re2-1.1-4-cp310-cp310-win32.whl", hash = "sha256:3b2ab1e2420b5dd9743a2d6bc61b64e5f708563702a75b6db86637837eaeaf2f"}, + {file = "google_re2-1.1-4-cp310-cp310-win_amd64.whl", hash = "sha256:92efca1a7ef83b6df012d432a1cbc71d10ff42200640c0f9a5ff5b343a48e633"}, + {file = "google_re2-1.1-4-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:854818fd4ce79787aca5ba459d6e5abe4ca9be2c684a5b06a7f1757452ca3708"}, + {file = "google_re2-1.1-4-cp311-cp311-macosx_12_0_x86_64.whl", hash = "sha256:4ceef51174b6f653b6659a8fdaa9c38960c5228b44b25be2a3bcd8566827554f"}, + {file = "google_re2-1.1-4-cp311-cp311-macosx_13_0_arm64.whl", hash = "sha256:ee49087c3db7e6f5238105ab5299c09e9b77516fe8cfb0a37e5f1e813d76ecb8"}, + {file = "google_re2-1.1-4-cp311-cp311-macosx_13_0_x86_64.whl", hash = "sha256:dc2312854bdc01410acc5d935f1906a49cb1f28980341c20a68797ad89d8e178"}, + {file = "google_re2-1.1-4-cp311-cp311-macosx_14_0_arm64.whl", hash = "sha256:0dc0d2e42296fa84a3cb3e1bd667c6969389cd5cdf0786e6b1f911ae2d75375b"}, + {file = "google_re2-1.1-4-cp311-cp311-macosx_14_0_x86_64.whl", hash = "sha256:6bf04ced98453b035f84320f348f67578024f44d2997498def149054eb860ae8"}, + {file = "google_re2-1.1-4-cp311-cp311-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1d6b6ef11dc4ab322fa66c2f3561925f2b5372a879c3ed764d20e939e2fd3e5f"}, + {file = "google_re2-1.1-4-cp311-cp311-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0dcde6646fa9a97fd3692b3f6ae7daf7f3277d7500b6c253badeefa11db8956a"}, + {file = "google_re2-1.1-4-cp311-cp311-win32.whl", hash = "sha256:5f4f0229deb057348893574d5b0a96d055abebac6debf29d95b0c0e26524c9f6"}, + {file = "google_re2-1.1-4-cp311-cp311-win_amd64.whl", hash = "sha256:4713ddbe48a18875270b36a462b0eada5e84d6826f8df7edd328d8706b6f9d07"}, + {file = "google_re2-1.1-4-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:40a698300b8faddbb325662973f839489c89b960087060bd389c376828978a04"}, + {file = "google_re2-1.1-4-cp312-cp312-macosx_12_0_x86_64.whl", hash = "sha256:103d2d7ac92ba23911a151fd1fc7035cbf6dc92a7f6aea92270ebceb5cd5acd3"}, + {file = "google_re2-1.1-4-cp312-cp312-macosx_13_0_arm64.whl", hash = "sha256:51fb7182bccab05e8258a2b6a63dda1a6b4a9e8dfb9b03ec50e50c49c2827dd4"}, + {file = "google_re2-1.1-4-cp312-cp312-macosx_13_0_x86_64.whl", hash = "sha256:65383022abd63d7b620221eba7935132b53244b8b463d8fdce498c93cf58b7b7"}, + {file = "google_re2-1.1-4-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:396281fc68a9337157b3ffcd9392c6b7fcb8aab43e5bdab496262a81d56a4ecc"}, + {file = "google_re2-1.1-4-cp312-cp312-macosx_14_0_x86_64.whl", hash = "sha256:8198adcfcff1c680e052044124621730fc48d08005f90a75487f5651f1ebfce2"}, + {file = "google_re2-1.1-4-cp312-cp312-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:81f7bff07c448aec4db9ca453d2126ece8710dbd9278b8bb09642045d3402a96"}, + {file = "google_re2-1.1-4-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b7dacf730fd7d6ec71b11d6404b0b26e230814bfc8e9bb0d3f13bec9b5531f8d"}, + {file = "google_re2-1.1-4-cp312-cp312-win32.whl", hash = "sha256:8c764f62f4b1d89d1ef264853b6dd9fee14a89e9b86a81bc2157fe3531425eb4"}, + {file = "google_re2-1.1-4-cp312-cp312-win_amd64.whl", hash = "sha256:0be2666df4bc5381a5d693585f9bbfefb0bfd3c07530d7e403f181f5de47254a"}, + {file = "google_re2-1.1-4-cp38-cp38-macosx_12_0_arm64.whl", hash = "sha256:5cb1b63a0bfd8dd65d39d2f3b2e5ae0a06ce4b2ce5818a1d1fc78a786a252673"}, + {file = "google_re2-1.1-4-cp38-cp38-macosx_12_0_x86_64.whl", hash = "sha256:e41751ce6b67a95230edd0772226dc94c2952a2909674cd69df9804ed0125307"}, + {file = "google_re2-1.1-4-cp38-cp38-macosx_13_0_arm64.whl", hash = "sha256:b998cfa2d50bf4c063e777c999a7e8645ec7e5d7baf43ad71b1e2e10bb0300c3"}, + {file = "google_re2-1.1-4-cp38-cp38-macosx_13_0_x86_64.whl", hash = "sha256:226ca3b0c2e970f3fc82001ac89e845ecc7a4bb7c68583e7a76cda70b61251a7"}, + {file = "google_re2-1.1-4-cp38-cp38-macosx_14_0_arm64.whl", hash = "sha256:9adec1f734ebad7c72e56c85f205a281d8fe9bf6583bc21020157d3f2812ce89"}, + {file = "google_re2-1.1-4-cp38-cp38-macosx_14_0_x86_64.whl", hash = "sha256:9c34f3c64ba566af967d29e11299560e6fdfacd8ca695120a7062b6ed993b179"}, + {file = "google_re2-1.1-4-cp38-cp38-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e1b85385fe293838e0d0b6e19e6c48ba8c6f739ea92ce2e23b718afe7b343363"}, + {file = "google_re2-1.1-4-cp38-cp38-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:4694daa8a8987cfb568847aa872f9990e930c91a68c892ead876411d4b9012c3"}, + {file = "google_re2-1.1-4-cp38-cp38-win32.whl", hash = "sha256:5e671e9be1668187e2995aac378de574fa40df70bb6f04657af4d30a79274ce0"}, + {file = "google_re2-1.1-4-cp38-cp38-win_amd64.whl", hash = "sha256:f66c164d6049a8299f6dfcfa52d1580576b4b9724d6fcdad2f36f8f5da9304b6"}, + {file = "google_re2-1.1-4-cp39-cp39-macosx_12_0_arm64.whl", hash = "sha256:25cb17ae0993a48c70596f3a3ef5d659638106401cc8193f51c0d7961b3b3eb7"}, + {file = "google_re2-1.1-4-cp39-cp39-macosx_12_0_x86_64.whl", hash = "sha256:5f101f86d14ca94ca4dcf63cceaa73d351f2be2481fcaa29d9e68eeab0dc2a88"}, + {file = "google_re2-1.1-4-cp39-cp39-macosx_13_0_arm64.whl", hash = "sha256:4e82591e85bf262a6d74cff152867e05fc97867c68ba81d6836ff8b0e7e62365"}, + {file = "google_re2-1.1-4-cp39-cp39-macosx_13_0_x86_64.whl", hash = "sha256:1f61c09b93ffd34b1e2557e5a9565039f935407a5786dbad46f64f1a484166e6"}, + {file = "google_re2-1.1-4-cp39-cp39-macosx_14_0_arm64.whl", hash = "sha256:12b390ad8c7e74bab068732f774e75e0680dade6469b249a721f3432f90edfc3"}, + {file = "google_re2-1.1-4-cp39-cp39-macosx_14_0_x86_64.whl", hash = "sha256:1284343eb31c2e82ed2d8159f33ba6842238a56782c881b07845a6d85613b055"}, + {file = "google_re2-1.1-4-cp39-cp39-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6c7b38e0daf2c06e4d3163f4c732ab3ad2521aecfed6605b69e4482c612da303"}, + {file = "google_re2-1.1-4-cp39-cp39-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:1f4d4f0823e8b2f6952a145295b1ff25245ce9bb136aff6fe86452e507d4c1dd"}, + {file = "google_re2-1.1-4-cp39-cp39-win32.whl", hash = "sha256:1afae56b2a07bb48cfcfefaa15ed85bae26a68f5dc7f9e128e6e6ea36914e847"}, + {file = "google_re2-1.1-4-cp39-cp39-win_amd64.whl", hash = "sha256:aa7d6d05911ab9c8adbf3c225a7a120ab50fd2784ac48f2f0d140c0b7afc2b55"}, +] + +[[package]] +name = "google-resumable-media" +version = "2.5.0" +description = "Utilities for Google Media Downloads and Resumable Uploads" +optional = true +python-versions = ">= 3.7" +files = [ + {file = "google-resumable-media-2.5.0.tar.gz", hash = "sha256:218931e8e2b2a73a58eb354a288e03a0fd5fb1c4583261ac6e4c078666468c93"}, + {file = "google_resumable_media-2.5.0-py2.py3-none-any.whl", hash = "sha256:da1bd943e2e114a56d85d6848497ebf9be6a14d3db23e9fc57581e7c3e8170ec"}, +] + +[package.dependencies] +google-crc32c = ">=1.0,<2.0dev" + +[package.extras] +aiohttp = ["aiohttp (>=3.6.2,<4.0.0dev)"] +requests = ["requests (>=2.18.0,<3.0.0dev)"] + +[[package]] name = "googleapis-common-protos" version = "1.60.0" description = "Common protobufs used in Google APIs" -category = "main" optional = false python-versions = ">=3.7" +files = [ + {file = "googleapis-common-protos-1.60.0.tar.gz", hash = "sha256:e73ebb404098db405ba95d1e1ae0aa91c3e15a71da031a2eeb6b2e23e7bc3708"}, + {file = "googleapis_common_protos-1.60.0-py2.py3-none-any.whl", hash = "sha256:69f9bbcc6acde92cab2db95ce30a70bd2b81d20b12eff3f1aabaffcbe8a93918"}, +] [package.dependencies] grpcio = {version = ">=1.44.0,<2.0.0.dev0", optional = true, markers = "extra == \"grpc\""} @@ -2437,9 +3512,11 @@ grpc = ["grpcio (>=1.44.0,<2.0.0.dev0)"] name = "grapheme" version = "0.6.0" description = "Unicode grapheme helpers" -category = "dev" optional = false python-versions = "*" +files = [ + {file = "grapheme-0.6.0.tar.gz", hash = "sha256:44c2b9f21bbe77cfb05835fec230bd435954275267fea1858013b102f8603cca"}, +] [package.extras] test = ["pytest", "sphinx", "sphinx-autobuild", "twine", "wheel"] @@ -2448,9 +3525,12 @@ test = ["pytest", "sphinx", "sphinx-autobuild", "twine", "wheel"] name = "graphviz" version = "0.20.1" description = "Simple Python interface for Graphviz" -category = "dev" optional = false python-versions = ">=3.7" +files = [ + {file = "graphviz-0.20.1-py3-none-any.whl", hash = "sha256:587c58a223b51611c0cf461132da386edd896a029524ca61a1462b880bf97977"}, + {file = "graphviz-0.20.1.zip", hash = "sha256:8c58f14adaa3b947daf26c19bc1e98c4e0702cdc31cf99153e6f06904d492bf8"}, +] [package.extras] dev = ["flake8", "pep8-naming", "tox (>=3)", "twine", "wheel"] @@ -2459,23 +3539,85 @@ test = ["coverage", "mock (>=4)", "pytest (>=7)", "pytest-cov", "pytest-mock (>= [[package]] name = "greenlet" -version = "2.0.2" +version = "3.0.3" description = "Lightweight in-process concurrent programming" -category = "main" optional = false -python-versions = ">=2.7,!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*" - -[package.extras] -docs = ["Sphinx", "docutils (<0.18)"] +python-versions = ">=3.7" +files = [ + {file = "greenlet-3.0.3-cp310-cp310-macosx_11_0_universal2.whl", hash = "sha256:9da2bd29ed9e4f15955dd1595ad7bc9320308a3b766ef7f837e23ad4b4aac31a"}, + {file = "greenlet-3.0.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d353cadd6083fdb056bb46ed07e4340b0869c305c8ca54ef9da3421acbdf6881"}, + {file = "greenlet-3.0.3-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:dca1e2f3ca00b84a396bc1bce13dd21f680f035314d2379c4160c98153b2059b"}, + {file = "greenlet-3.0.3-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3ed7fb269f15dc662787f4119ec300ad0702fa1b19d2135a37c2c4de6fadfd4a"}, + {file = "greenlet-3.0.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dd4f49ae60e10adbc94b45c0b5e6a179acc1736cf7a90160b404076ee283cf83"}, + {file = "greenlet-3.0.3-cp310-cp310-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:73a411ef564e0e097dbe7e866bb2dda0f027e072b04da387282b02c308807405"}, + {file = "greenlet-3.0.3-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:7f362975f2d179f9e26928c5b517524e89dd48530a0202570d55ad6ca5d8a56f"}, + {file = "greenlet-3.0.3-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:649dde7de1a5eceb258f9cb00bdf50e978c9db1b996964cd80703614c86495eb"}, + {file = "greenlet-3.0.3-cp310-cp310-win_amd64.whl", hash = "sha256:68834da854554926fbedd38c76e60c4a2e3198c6fbed520b106a8986445caaf9"}, + {file = "greenlet-3.0.3-cp311-cp311-macosx_11_0_universal2.whl", hash = "sha256:b1b5667cced97081bf57b8fa1d6bfca67814b0afd38208d52538316e9422fc61"}, + {file = "greenlet-3.0.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:52f59dd9c96ad2fc0d5724107444f76eb20aaccb675bf825df6435acb7703559"}, + {file = "greenlet-3.0.3-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:afaff6cf5200befd5cec055b07d1c0a5a06c040fe5ad148abcd11ba6ab9b114e"}, + {file = "greenlet-3.0.3-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:fe754d231288e1e64323cfad462fcee8f0288654c10bdf4f603a39ed923bef33"}, + {file = "greenlet-3.0.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2797aa5aedac23af156bbb5a6aa2cd3427ada2972c828244eb7d1b9255846379"}, + {file = "greenlet-3.0.3-cp311-cp311-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b7f009caad047246ed379e1c4dbcb8b020f0a390667ea74d2387be2998f58a22"}, + {file = "greenlet-3.0.3-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:c5e1536de2aad7bf62e27baf79225d0d64360d4168cf2e6becb91baf1ed074f3"}, + {file = "greenlet-3.0.3-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:894393ce10ceac937e56ec00bb71c4c2f8209ad516e96033e4b3b1de270e200d"}, + {file = "greenlet-3.0.3-cp311-cp311-win_amd64.whl", hash = "sha256:1ea188d4f49089fc6fb283845ab18a2518d279c7cd9da1065d7a84e991748728"}, + {file = "greenlet-3.0.3-cp312-cp312-macosx_11_0_universal2.whl", hash = "sha256:70fb482fdf2c707765ab5f0b6655e9cfcf3780d8d87355a063547b41177599be"}, + {file = "greenlet-3.0.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d4d1ac74f5c0c0524e4a24335350edad7e5f03b9532da7ea4d3c54d527784f2e"}, + {file = "greenlet-3.0.3-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:149e94a2dd82d19838fe4b2259f1b6b9957d5ba1b25640d2380bea9c5df37676"}, + {file = "greenlet-3.0.3-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:15d79dd26056573940fcb8c7413d84118086f2ec1a8acdfa854631084393efcc"}, + {file = "greenlet-3.0.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:881b7db1ebff4ba09aaaeae6aa491daeb226c8150fc20e836ad00041bcb11230"}, + {file = "greenlet-3.0.3-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:fcd2469d6a2cf298f198f0487e0a5b1a47a42ca0fa4dfd1b6862c999f018ebbf"}, + {file = "greenlet-3.0.3-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:1f672519db1796ca0d8753f9e78ec02355e862d0998193038c7073045899f305"}, + {file = "greenlet-3.0.3-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:2516a9957eed41dd8f1ec0c604f1cdc86758b587d964668b5b196a9db5bfcde6"}, + {file = "greenlet-3.0.3-cp312-cp312-win_amd64.whl", hash = "sha256:bba5387a6975598857d86de9eac14210a49d554a77eb8261cc68b7d082f78ce2"}, + {file = "greenlet-3.0.3-cp37-cp37m-macosx_11_0_universal2.whl", hash = "sha256:5b51e85cb5ceda94e79d019ed36b35386e8c37d22f07d6a751cb659b180d5274"}, + {file = "greenlet-3.0.3-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:daf3cb43b7cf2ba96d614252ce1684c1bccee6b2183a01328c98d36fcd7d5cb0"}, + {file = "greenlet-3.0.3-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:99bf650dc5d69546e076f413a87481ee1d2d09aaaaaca058c9251b6d8c14783f"}, + {file = "greenlet-3.0.3-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2dd6e660effd852586b6a8478a1d244b8dc90ab5b1321751d2ea15deb49ed414"}, + {file = "greenlet-3.0.3-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e3391d1e16e2a5a1507d83e4a8b100f4ee626e8eca43cf2cadb543de69827c4c"}, + {file = "greenlet-3.0.3-cp37-cp37m-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:e1f145462f1fa6e4a4ae3c0f782e580ce44d57c8f2c7aae1b6fa88c0b2efdb41"}, + {file = "greenlet-3.0.3-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:1a7191e42732df52cb5f39d3527217e7ab73cae2cb3694d241e18f53d84ea9a7"}, + {file = "greenlet-3.0.3-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:0448abc479fab28b00cb472d278828b3ccca164531daab4e970a0458786055d6"}, + {file = "greenlet-3.0.3-cp37-cp37m-win32.whl", hash = "sha256:b542be2440edc2d48547b5923c408cbe0fc94afb9f18741faa6ae970dbcb9b6d"}, + {file = "greenlet-3.0.3-cp37-cp37m-win_amd64.whl", hash = "sha256:01bc7ea167cf943b4c802068e178bbf70ae2e8c080467070d01bfa02f337ee67"}, + {file = "greenlet-3.0.3-cp38-cp38-macosx_11_0_universal2.whl", hash = "sha256:1996cb9306c8595335bb157d133daf5cf9f693ef413e7673cb07e3e5871379ca"}, + {file = "greenlet-3.0.3-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3ddc0f794e6ad661e321caa8d2f0a55ce01213c74722587256fb6566049a8b04"}, + {file = "greenlet-3.0.3-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c9db1c18f0eaad2f804728c67d6c610778456e3e1cc4ab4bbd5eeb8e6053c6fc"}, + {file = "greenlet-3.0.3-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:7170375bcc99f1a2fbd9c306f5be8764eaf3ac6b5cb968862cad4c7057756506"}, + {file = "greenlet-3.0.3-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6b66c9c1e7ccabad3a7d037b2bcb740122a7b17a53734b7d72a344ce39882a1b"}, + {file = "greenlet-3.0.3-cp38-cp38-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:098d86f528c855ead3479afe84b49242e174ed262456c342d70fc7f972bc13c4"}, + {file = "greenlet-3.0.3-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:81bb9c6d52e8321f09c3d165b2a78c680506d9af285bfccbad9fb7ad5a5da3e5"}, + {file = "greenlet-3.0.3-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:fd096eb7ffef17c456cfa587523c5f92321ae02427ff955bebe9e3c63bc9f0da"}, + {file = "greenlet-3.0.3-cp38-cp38-win32.whl", hash = "sha256:d46677c85c5ba00a9cb6f7a00b2bfa6f812192d2c9f7d9c4f6a55b60216712f3"}, + {file = "greenlet-3.0.3-cp38-cp38-win_amd64.whl", hash = "sha256:419b386f84949bf0e7c73e6032e3457b82a787c1ab4a0e43732898a761cc9dbf"}, + {file = "greenlet-3.0.3-cp39-cp39-macosx_11_0_universal2.whl", hash = "sha256:da70d4d51c8b306bb7a031d5cff6cc25ad253affe89b70352af5f1cb68e74b53"}, + {file = "greenlet-3.0.3-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:086152f8fbc5955df88382e8a75984e2bb1c892ad2e3c80a2508954e52295257"}, + {file = "greenlet-3.0.3-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d73a9fe764d77f87f8ec26a0c85144d6a951a6c438dfe50487df5595c6373eac"}, + {file = "greenlet-3.0.3-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b7dcbe92cc99f08c8dd11f930de4d99ef756c3591a5377d1d9cd7dd5e896da71"}, + {file = "greenlet-3.0.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1551a8195c0d4a68fac7a4325efac0d541b48def35feb49d803674ac32582f61"}, + {file = "greenlet-3.0.3-cp39-cp39-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:64d7675ad83578e3fc149b617a444fab8efdafc9385471f868eb5ff83e446b8b"}, + {file = "greenlet-3.0.3-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:b37eef18ea55f2ffd8f00ff8fe7c8d3818abd3e25fb73fae2ca3b672e333a7a6"}, + {file = "greenlet-3.0.3-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:77457465d89b8263bca14759d7c1684df840b6811b2499838cc5b040a8b5b113"}, + {file = "greenlet-3.0.3-cp39-cp39-win32.whl", hash = "sha256:57e8974f23e47dac22b83436bdcf23080ade568ce77df33159e019d161ce1d1e"}, + {file = "greenlet-3.0.3-cp39-cp39-win_amd64.whl", hash = "sha256:c5ee858cfe08f34712f548c3c363e807e7186f03ad7a5039ebadb29e8c6be067"}, + {file = "greenlet-3.0.3.tar.gz", hash = "sha256:43374442353259554ce33599da8b692d5aa96f8976d567d4badf263371fbe491"}, +] + +[package.extras] +docs = ["Sphinx", "furo"] test = ["objgraph", "psutil"] [[package]] name = "grpc-google-iam-v1" version = "0.12.6" description = "IAM API client library" -category = "main" optional = true python-versions = ">=3.7" +files = [ + {file = "grpc-google-iam-v1-0.12.6.tar.gz", hash = "sha256:2bc4b8fdf22115a65d751c9317329322602c39b7c86a289c9b72d228d960ef5f"}, + {file = "grpc_google_iam_v1-0.12.6-py2.py3-none-any.whl", hash = "sha256:5c10f3d8dc2d88678ab1a9b0cb5482735c5efee71e6c0cd59f872eef22913f5c"}, +] [package.dependencies] googleapis-common-protos = {version = ">=1.56.0,<2.0.0dev", extras = ["grpc"]} @@ -2486,9 +3628,55 @@ protobuf = ">=3.19.5,<3.20.0 || >3.20.0,<3.20.1 || >3.20.1,<4.21.1 || >4.21.1,<4 name = "grpcio" version = "1.57.0" description = "HTTP/2-based RPC framework" -category = "main" optional = false python-versions = ">=3.7" +files = [ + {file = "grpcio-1.57.0-cp310-cp310-linux_armv7l.whl", hash = "sha256:092fa155b945015754bdf988be47793c377b52b88d546e45c6a9f9579ac7f7b6"}, + {file = "grpcio-1.57.0-cp310-cp310-macosx_12_0_universal2.whl", hash = "sha256:2f7349786da979a94690cc5c2b804cab4e8774a3cf59be40d037c4342c906649"}, + {file = "grpcio-1.57.0-cp310-cp310-manylinux_2_17_aarch64.whl", hash = "sha256:82640e57fb86ea1d71ea9ab54f7e942502cf98a429a200b2e743d8672171734f"}, + {file = "grpcio-1.57.0-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:40b72effd4c789de94ce1be2b5f88d7b9b5f7379fe9645f198854112a6567d9a"}, + {file = "grpcio-1.57.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2f708a6a17868ad8bf586598bee69abded4996b18adf26fd2d91191383b79019"}, + {file = "grpcio-1.57.0-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:60fe15288a0a65d5c1cb5b4a62b1850d07336e3ba728257a810317be14f0c527"}, + {file = "grpcio-1.57.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:6907b1cf8bb29b058081d2aad677b15757a44ef2d4d8d9130271d2ad5e33efca"}, + {file = "grpcio-1.57.0-cp310-cp310-win32.whl", hash = "sha256:57b183e8b252825c4dd29114d6c13559be95387aafc10a7be645462a0fc98bbb"}, + {file = "grpcio-1.57.0-cp310-cp310-win_amd64.whl", hash = "sha256:7b400807fa749a9eb286e2cd893e501b110b4d356a218426cb9c825a0474ca56"}, + {file = "grpcio-1.57.0-cp311-cp311-linux_armv7l.whl", hash = "sha256:c6ebecfb7a31385393203eb04ed8b6a08f5002f53df3d59e5e795edb80999652"}, + {file = "grpcio-1.57.0-cp311-cp311-macosx_10_10_universal2.whl", hash = "sha256:00258cbe3f5188629828363ae8ff78477ce976a6f63fb2bb5e90088396faa82e"}, + {file = "grpcio-1.57.0-cp311-cp311-manylinux_2_17_aarch64.whl", hash = "sha256:23e7d8849a0e58b806253fd206ac105b328171e01b8f18c7d5922274958cc87e"}, + {file = "grpcio-1.57.0-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:5371bcd861e679d63b8274f73ac281751d34bd54eccdbfcd6aa00e692a82cd7b"}, + {file = "grpcio-1.57.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:aed90d93b731929e742967e236f842a4a2174dc5db077c8f9ad2c5996f89f63e"}, + {file = "grpcio-1.57.0-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:fe752639919aad9ffb0dee0d87f29a6467d1ef764f13c4644d212a9a853a078d"}, + {file = "grpcio-1.57.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:fada6b07ec4f0befe05218181f4b85176f11d531911b64c715d1875c4736d73a"}, + {file = "grpcio-1.57.0-cp311-cp311-win32.whl", hash = "sha256:bb396952cfa7ad2f01061fbc7dc1ad91dd9d69243bcb8110cf4e36924785a0fe"}, + {file = "grpcio-1.57.0-cp311-cp311-win_amd64.whl", hash = "sha256:e503cb45ed12b924b5b988ba9576dc9949b2f5283b8e33b21dcb6be74a7c58d0"}, + {file = "grpcio-1.57.0-cp37-cp37m-linux_armv7l.whl", hash = "sha256:fd173b4cf02b20f60860dc2ffe30115c18972d7d6d2d69df97ac38dee03be5bf"}, + {file = "grpcio-1.57.0-cp37-cp37m-macosx_10_10_universal2.whl", hash = "sha256:d7f8df114d6b4cf5a916b98389aeaf1e3132035420a88beea4e3d977e5f267a5"}, + {file = "grpcio-1.57.0-cp37-cp37m-manylinux_2_17_aarch64.whl", hash = "sha256:76c44efa4ede1f42a9d5b2fed1fe9377e73a109bef8675fb0728eb80b0b8e8f2"}, + {file = "grpcio-1.57.0-cp37-cp37m-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4faea2cfdf762a664ab90589b66f416274887641ae17817de510b8178356bf73"}, + {file = "grpcio-1.57.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c60b83c43faeb6d0a9831f0351d7787a0753f5087cc6fa218d78fdf38e5acef0"}, + {file = "grpcio-1.57.0-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:b363bbb5253e5f9c23d8a0a034dfdf1b7c9e7f12e602fc788c435171e96daccc"}, + {file = "grpcio-1.57.0-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:f1fb0fd4a1e9b11ac21c30c169d169ef434c6e9344ee0ab27cfa6f605f6387b2"}, + {file = "grpcio-1.57.0-cp37-cp37m-win_amd64.whl", hash = "sha256:34950353539e7d93f61c6796a007c705d663f3be41166358e3d88c45760c7d98"}, + {file = "grpcio-1.57.0-cp38-cp38-linux_armv7l.whl", hash = "sha256:871f9999e0211f9551f368612460442a5436d9444606184652117d6a688c9f51"}, + {file = "grpcio-1.57.0-cp38-cp38-macosx_10_10_universal2.whl", hash = "sha256:a8a8e560e8dbbdf29288872e91efd22af71e88b0e5736b0daf7773c1fecd99f0"}, + {file = "grpcio-1.57.0-cp38-cp38-manylinux_2_17_aarch64.whl", hash = "sha256:2313b124e475aa9017a9844bdc5eafb2d5abdda9d456af16fc4535408c7d6da6"}, + {file = "grpcio-1.57.0-cp38-cp38-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b4098b6b638d9e0ca839a81656a2fd4bc26c9486ea707e8b1437d6f9d61c3941"}, + {file = "grpcio-1.57.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5e5b58e32ae14658085c16986d11e99abd002ddbf51c8daae8a0671fffb3467f"}, + {file = "grpcio-1.57.0-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:0f80bf37f09e1caba6a8063e56e2b87fa335add314cf2b78ebf7cb45aa7e3d06"}, + {file = "grpcio-1.57.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:5b7a4ce8f862fe32b2a10b57752cf3169f5fe2915acfe7e6a1e155db3da99e79"}, + {file = "grpcio-1.57.0-cp38-cp38-win32.whl", hash = "sha256:9338bacf172e942e62e5889b6364e56657fbf8ac68062e8b25c48843e7b202bb"}, + {file = "grpcio-1.57.0-cp38-cp38-win_amd64.whl", hash = "sha256:e1cb52fa2d67d7f7fab310b600f22ce1ff04d562d46e9e0ac3e3403c2bb4cc16"}, + {file = "grpcio-1.57.0-cp39-cp39-linux_armv7l.whl", hash = "sha256:fee387d2fab144e8a34e0e9c5ca0f45c9376b99de45628265cfa9886b1dbe62b"}, + {file = "grpcio-1.57.0-cp39-cp39-macosx_10_10_universal2.whl", hash = "sha256:b53333627283e7241fcc217323f225c37783b5f0472316edcaa4479a213abfa6"}, + {file = "grpcio-1.57.0-cp39-cp39-manylinux_2_17_aarch64.whl", hash = "sha256:f19ac6ac0a256cf77d3cc926ef0b4e64a9725cc612f97228cd5dc4bd9dbab03b"}, + {file = "grpcio-1.57.0-cp39-cp39-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e3fdf04e402f12e1de8074458549337febb3b45f21076cc02ef4ff786aff687e"}, + {file = "grpcio-1.57.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5613a2fecc82f95d6c51d15b9a72705553aa0d7c932fad7aed7afb51dc982ee5"}, + {file = "grpcio-1.57.0-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:b670c2faa92124b7397b42303e4d8eb64a4cd0b7a77e35a9e865a55d61c57ef9"}, + {file = "grpcio-1.57.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:7a635589201b18510ff988161b7b573f50c6a48fae9cb567657920ca82022b37"}, + {file = "grpcio-1.57.0-cp39-cp39-win32.whl", hash = "sha256:d78d8b86fcdfa1e4c21f8896614b6cc7ee01a2a758ec0c4382d662f2a62cf766"}, + {file = "grpcio-1.57.0-cp39-cp39-win_amd64.whl", hash = "sha256:20ec6fc4ad47d1b6e12deec5045ec3cd5402d9a1597f738263e98f490fe07056"}, + {file = "grpcio-1.57.0.tar.gz", hash = "sha256:4b089f7ad1eb00a104078bab8015b0ed0ebcb3b589e527ab009c53893fd4e613"}, +] [package.extras] protobuf = ["grpcio-tools (>=1.57.0)"] @@ -2497,9 +3685,12 @@ protobuf = ["grpcio-tools (>=1.57.0)"] name = "grpcio-status" version = "1.57.0" description = "Status proto mapping for gRPC" -category = "main" optional = true python-versions = ">=3.6" +files = [ + {file = "grpcio-status-1.57.0.tar.gz", hash = "sha256:b098da99df1eebe58337f8f78e50df990273ccacc1226fddeb47c590e3df9e02"}, + {file = "grpcio_status-1.57.0-py3-none-any.whl", hash = "sha256:15d6af055914ebbc4ed17e55ebfb8e6bb17a45a57fea32e6af19978fb7844690"}, +] [package.dependencies] googleapis-common-protos = ">=1.5.5" @@ -2510,9 +3701,55 @@ protobuf = ">=4.21.6" name = "grpcio-tools" version = "1.57.0" description = "Protobuf code generator for gRPC" -category = "main" optional = true python-versions = ">=3.7" +files = [ + {file = "grpcio-tools-1.57.0.tar.gz", hash = "sha256:2f16130d869ce27ecd623194547b649dd657333ec7e8644cc571c645781a9b85"}, + {file = "grpcio_tools-1.57.0-cp310-cp310-linux_armv7l.whl", hash = "sha256:4fb8a8468031f858381a576078924af364a08833d8f8f3237018252c4573a802"}, + {file = "grpcio_tools-1.57.0-cp310-cp310-macosx_12_0_universal2.whl", hash = "sha256:35bf0dad8a3562043345236c26d0053a856fb06c04d7da652f2ded914e508ae7"}, + {file = "grpcio_tools-1.57.0-cp310-cp310-manylinux_2_17_aarch64.whl", hash = "sha256:ec9aab2fb6783c7fc54bc28f58eb75f1ca77594e6b0fd5e5e7a8114a95169fe0"}, + {file = "grpcio_tools-1.57.0-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0cf5fc0a1c23f8ea34b408b72fb0e90eec0f404ad4dba98e8f6da3c9ce34e2ed"}, + {file = "grpcio_tools-1.57.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:26e69d08a515554e0cfe1ec4d31568836f4b17f0ff82294f957f629388629eb9"}, + {file = "grpcio_tools-1.57.0-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:c39a3656576b6fdaaf28abe0467f7a7231df4230c1bee132322dbc3209419e7f"}, + {file = "grpcio_tools-1.57.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:f64f8ab22d27d4a5693310748d35a696061c3b5c7b8c4fb4ab3b4bc1068b6b56"}, + {file = "grpcio_tools-1.57.0-cp310-cp310-win32.whl", hash = "sha256:d2a134756f4db34759a5cc7f7e43f7eb87540b68d1cca62925593c6fb93924f7"}, + {file = "grpcio_tools-1.57.0-cp310-cp310-win_amd64.whl", hash = "sha256:9a3d60fb8d46ede26c1907c146561b3a9caa20a7aff961bc661ef8226f85a2e9"}, + {file = "grpcio_tools-1.57.0-cp311-cp311-linux_armv7l.whl", hash = "sha256:aac98ecad8f7bd4301855669d42a5d97ef7bb34bec2b1e74c7a0641d47e313cf"}, + {file = "grpcio_tools-1.57.0-cp311-cp311-macosx_10_10_universal2.whl", hash = "sha256:cdd020cb68b51462983b7c2dfbc3eb6ede032b8bf438d4554df0c3f08ce35c76"}, + {file = "grpcio_tools-1.57.0-cp311-cp311-manylinux_2_17_aarch64.whl", hash = "sha256:f54081b08419a39221cd646363b5708857c696b3ad4784f1dcf310891e33a5f7"}, + {file = "grpcio_tools-1.57.0-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ed85a0291fff45b67f2557fe7f117d3bc7af8b54b8619d27bf374b5c8b7e3ca2"}, + {file = "grpcio_tools-1.57.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e868cd6feb3ef07d4b35be104fe1fd0657db05259ff8f8ec5e08f4f89ca1191d"}, + {file = "grpcio_tools-1.57.0-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:dfb6f6120587b8e228a3cae5ee4985b5bdc18501bad05c49df61965dfc9d70a9"}, + {file = "grpcio_tools-1.57.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:4a7ad7f328e28fc97c356d0f10fb10d8b5151bb65aa7cf14bf8084513f0b7306"}, + {file = "grpcio_tools-1.57.0-cp311-cp311-win32.whl", hash = "sha256:9867f2817b1a0c93c523f89ac6c9d8625548af4620a7ce438bf5a76e23327284"}, + {file = "grpcio_tools-1.57.0-cp311-cp311-win_amd64.whl", hash = "sha256:1f9e917a9f18087f6c14b4d4508fb94fca5c2f96852363a89232fb9b2124ac1f"}, + {file = "grpcio_tools-1.57.0-cp37-cp37m-linux_armv7l.whl", hash = "sha256:9f2aefa8a37bd2c4db1a3f1aca11377e2766214520fb70e67071f4ff8d8b0fa5"}, + {file = "grpcio_tools-1.57.0-cp37-cp37m-macosx_10_10_universal2.whl", hash = "sha256:850cbda0ec5d24c39e7215ede410276040692ca45d105fbbeada407fa03f0ac0"}, + {file = "grpcio_tools-1.57.0-cp37-cp37m-manylinux_2_17_aarch64.whl", hash = "sha256:6fa52972c9647876ea35f6dc2b51002a74ed900ec7894586cbb2fe76f64f99de"}, + {file = "grpcio_tools-1.57.0-cp37-cp37m-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:76c0eea89d7542719594e50e2283f51a072978b953e8b3e9fd7c59a2c762d4c1"}, + {file = "grpcio_tools-1.57.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f3da5240211252fc70a6451fe00c143e2ab2f7bfc2445695ad2ed056b8e48d96"}, + {file = "grpcio_tools-1.57.0-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:a0256f8786ac9e4db618a1aa492bb3472569a0946fd3ee862ffe23196323da55"}, + {file = "grpcio_tools-1.57.0-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:c026bdf5c1366ce88b7bbe2d8207374d675afd3fd911f60752103de3da4a41d2"}, + {file = "grpcio_tools-1.57.0-cp37-cp37m-win_amd64.whl", hash = "sha256:9053c2f655589545be08b9d6a673e92970173a4bf11a4b9f18cd6e9af626b587"}, + {file = "grpcio_tools-1.57.0-cp38-cp38-linux_armv7l.whl", hash = "sha256:81ec4dbb696e095057b2528d11a8da04be6bbe2b967fa07d4ea9ba6354338cbf"}, + {file = "grpcio_tools-1.57.0-cp38-cp38-macosx_10_10_universal2.whl", hash = "sha256:495e2946406963e0b9f063f76d5af0f2a19517dac2b367b5b044432ac9194296"}, + {file = "grpcio_tools-1.57.0-cp38-cp38-manylinux_2_17_aarch64.whl", hash = "sha256:7b46fc6aa8eb7edd18cafcd21fd98703cb6c09e46b507de335fca7f0161dfccb"}, + {file = "grpcio_tools-1.57.0-cp38-cp38-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:fb81ff861692111fa81bd85f64584e624cb4013bd66fbce8a209b8893f5ce398"}, + {file = "grpcio_tools-1.57.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8a42dc220eb5305f470855c9284f4c8e85ae59d6d742cd07946b0cbe5e9ca186"}, + {file = "grpcio_tools-1.57.0-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:90d10d9038ba46a595a223a34f136c9230e3d6d7abc2433dbf0e1c31939d3a8b"}, + {file = "grpcio_tools-1.57.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:5bc3e6d338aefb052e19cedabe00452be46d0c10a4ed29ee77abb00402e438fe"}, + {file = "grpcio_tools-1.57.0-cp38-cp38-win32.whl", hash = "sha256:34b36217b17b5bea674a414229913e1fd80ede328be51e1b531fcc62abd393b0"}, + {file = "grpcio_tools-1.57.0-cp38-cp38-win_amd64.whl", hash = "sha256:dbde4004a0688400036342ff73e3706e8940483e2871547b1354d59e93a38277"}, + {file = "grpcio_tools-1.57.0-cp39-cp39-linux_armv7l.whl", hash = "sha256:784574709b9690dc28696617ea69352e2132352fdfc9bc89afa8e39f99ae538e"}, + {file = "grpcio_tools-1.57.0-cp39-cp39-macosx_10_10_universal2.whl", hash = "sha256:85ac4e62eb44428cde025fd9ab7554002315fc7880f791c553fc5a0015cc9931"}, + {file = "grpcio_tools-1.57.0-cp39-cp39-manylinux_2_17_aarch64.whl", hash = "sha256:dc771d4db5701f280957bbcee91745e0686d00ed1c6aa7e05ba30a58b02d70a1"}, + {file = "grpcio_tools-1.57.0-cp39-cp39-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f3ac06703c412f8167a9062eaf6099409967e33bf98fa5b02be4b4689b6bdf39"}, + {file = "grpcio_tools-1.57.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:02d78c034109f46032c7217260066d49d41e6bcaf588fa28fa40fe2f83445347"}, + {file = "grpcio_tools-1.57.0-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:2db25f15ed44327f2e02d0c4fe741ac966f9500e407047d8a7c7fccf2df65616"}, + {file = "grpcio_tools-1.57.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:2b417c97936d94874a3ce7ed8deab910f2233e3612134507cfee4af8735c38a6"}, + {file = "grpcio_tools-1.57.0-cp39-cp39-win32.whl", hash = "sha256:f717cce5093e6b6049d9ea6d12fdf3658efdb1a80772f7737db1f8510b876df6"}, + {file = "grpcio_tools-1.57.0-cp39-cp39-win_amd64.whl", hash = "sha256:1c0e8a1a32973a5d59fbcc19232f925e5c48116e9411f788033a31c5ca5130b4"}, +] [package.dependencies] grpcio = ">=1.57.0" @@ -2523,9 +3760,12 @@ setuptools = "*" name = "gunicorn" version = "21.2.0" description = "WSGI HTTP Server for UNIX" -category = "dev" optional = false python-versions = ">=3.5" +files = [ + {file = "gunicorn-21.2.0-py3-none-any.whl", hash = "sha256:3213aa5e8c24949e792bcacfc176fef362e7aac80b76c56f6b5122bf350722f0"}, + {file = "gunicorn-21.2.0.tar.gz", hash = "sha256:88ec8bff1d634f98e61b9f65bc4bf3cd918a90806c6f5c48bc5603849ec81033"}, +] [package.dependencies] packaging = "*" @@ -2540,17 +3780,23 @@ tornado = ["tornado (>=0.2)"] name = "h11" version = "0.14.0" description = "A pure-Python, bring-your-own-I/O implementation of HTTP/1.1" -category = "main" optional = false python-versions = ">=3.7" +files = [ + {file = "h11-0.14.0-py3-none-any.whl", hash = "sha256:e3fe4ac4b851c468cc8363d500db52c2ead036020723024a109d37346efaa761"}, + {file = "h11-0.14.0.tar.gz", hash = "sha256:8f19fbbe99e72420ff35c00b27a34cb9937e902a8b810e2c88300c6f0a3b699d"}, +] [[package]] name = "h2" version = "4.1.0" description = "HTTP/2 State-Machine based protocol implementation" -category = "main" optional = true python-versions = ">=3.6.1" +files = [ + {file = "h2-4.1.0-py3-none-any.whl", hash = "sha256:03a46bcf682256c95b5fd9e9a99c1323584c3eec6440d379b9903d709476bc6d"}, + {file = "h2-4.1.0.tar.gz", hash = "sha256:a83aca08fbe7aacb79fec788c9c0bac936343560ed9ec18b82a13a12c28d2abb"}, +] [package.dependencies] hpack = ">=4.0,<5" @@ -2560,9 +3806,12 @@ hyperframe = ">=6.0,<7" name = "hexbytes" version = "0.3.1" description = "hexbytes: Python `bytes` subclass that decodes hex, with a readable console output" -category = "main" optional = false python-versions = ">=3.7, <4" +files = [ + {file = "hexbytes-0.3.1-py3-none-any.whl", hash = "sha256:383595ad75026cf00abd570f44b368c6cdac0c6becfae5c39ff88829877f8a59"}, + {file = "hexbytes-0.3.1.tar.gz", hash = "sha256:a3fe35c6831ee8fafd048c4c086b986075fc14fd46258fa24ecb8d65745f9a9d"}, +] [package.extras] dev = ["black (>=22)", "bumpversion (>=0.5.3)", "eth-utils (>=1.0.1,<3)", "flake8 (==6.0.0)", "flake8-bugbear (==23.3.23)", "hypothesis (>=3.44.24,<=6.31.6)", "ipython", "isort (>=5.10.1)", "mypy (==0.971)", "pydocstyle (>=5.0.0)", "pytest (>=7.0.0)", "pytest-watch (>=4.1.0)", "pytest-xdist (>=2.4.0)", "sphinx (>=5.0.0)", "sphinx-rtd-theme (>=1.0.0)", "towncrier (>=21,<22)", "tox (>=4.0.0)", "twine", "wheel"] @@ -2574,35 +3823,44 @@ test = ["eth-utils (>=1.0.1,<3)", "hypothesis (>=3.44.24,<=6.31.6)", "pytest (>= name = "hpack" version = "4.0.0" description = "Pure-Python HPACK header compression" -category = "main" optional = true python-versions = ">=3.6.1" +files = [ + {file = "hpack-4.0.0-py3-none-any.whl", hash = "sha256:84a076fad3dc9a9f8063ccb8041ef100867b1878b25ef0ee63847a5d53818a6c"}, + {file = "hpack-4.0.0.tar.gz", hash = "sha256:fc41de0c63e687ebffde81187a948221294896f6bdc0ae2312708df339430095"}, +] [[package]] name = "httpcore" version = "0.17.3" description = "A minimal low-level HTTP client." -category = "main" optional = false python-versions = ">=3.7" +files = [ + {file = "httpcore-0.17.3-py3-none-any.whl", hash = "sha256:c2789b767ddddfa2a5782e3199b2b7f6894540b17b16ec26b2c4d8e103510b87"}, + {file = "httpcore-0.17.3.tar.gz", hash = "sha256:a6f30213335e34c1ade7be6ec7c47f19f50c56db36abef1a9dfa3815b1cb3888"}, +] [package.dependencies] anyio = ">=3.0,<5.0" certifi = "*" h11 = ">=0.13,<0.15" -sniffio = ">=1.0.0,<2.0.0" +sniffio = "==1.*" [package.extras] http2 = ["h2 (>=3,<5)"] -socks = ["socksio (>=1.0.0,<2.0.0)"] +socks = ["socksio (==1.*)"] [[package]] name = "httplib2" version = "0.22.0" description = "A comprehensive HTTP client library." -category = "dev" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" +files = [ + {file = "httplib2-0.22.0-py3-none-any.whl", hash = "sha256:14ae0a53c1ba8f3d37e9e27cf37eabb0fb9980f435ba405d546948b009dd64dc"}, + {file = "httplib2-0.22.0.tar.gz", hash = "sha256:d7a10bc5ef5ab08322488bde8c726eeee5c8618723fdb399597ec58f3d82df81"}, +] [package.dependencies] pyparsing = {version = ">=2.4.2,<3.0.0 || >3.0.0,<3.0.1 || >3.0.1,<3.0.2 || >3.0.2,<3.0.3 || >3.0.3,<4", markers = "python_version > \"3.0\""} @@ -2611,9 +3869,12 @@ pyparsing = {version = ">=2.4.2,<3.0.0 || >3.0.0,<3.0.1 || >3.0.1,<3.0.2 || >3.0 name = "httpx" version = "0.24.1" description = "The next generation HTTP client." -category = "main" optional = false python-versions = ">=3.7" +files = [ + {file = "httpx-0.24.1-py3-none-any.whl", hash = "sha256:06781eb9ac53cde990577af654bd990a4949de37a28bdb4a230d434f3a30b9bd"}, + {file = "httpx-0.24.1.tar.gz", hash = "sha256:5853a43053df830c20f8110c5e69fe44d035d850b2dfe795e196f00fdb774bdd"}, +] [package.dependencies] certifi = "*" @@ -2624,17 +3885,20 @@ sniffio = "*" [package.extras] brotli = ["brotli", "brotlicffi"] -cli = ["click (>=8.0.0,<9.0.0)", "pygments (>=2.0.0,<3.0.0)", "rich (>=10,<14)"] +cli = ["click (==8.*)", "pygments (==2.*)", "rich (>=10,<14)"] http2 = ["h2 (>=3,<5)"] -socks = ["socksio (>=1.0.0,<2.0.0)"] +socks = ["socksio (==1.*)"] [[package]] name = "humanfriendly" version = "10.0" description = "Human friendly output for text interfaces using Python" -category = "main" optional = true python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" +files = [ + {file = "humanfriendly-10.0-py2.py3-none-any.whl", hash = "sha256:1697e1a8a8f550fd43c2865cd84542fc175a61dcb779b6fee18cf6b6ccba1477"}, + {file = "humanfriendly-10.0.tar.gz", hash = "sha256:6b0b831ce8f15f7300721aa49829fc4e83921a9a301cc7f606be6686a2288ddc"}, +] [package.dependencies] pyreadline3 = {version = "*", markers = "sys_platform == \"win32\" and python_version >= \"3.8\""} @@ -2643,9 +3907,12 @@ pyreadline3 = {version = "*", markers = "sys_platform == \"win32\" and python_ve name = "humanize" version = "4.8.0" description = "Python humanize utilities" -category = "main" optional = false python-versions = ">=3.8" +files = [ + {file = "humanize-4.8.0-py3-none-any.whl", hash = "sha256:8bc9e2bb9315e61ec06bf690151ae35aeb65651ab091266941edf97c90836404"}, + {file = "humanize-4.8.0.tar.gz", hash = "sha256:9783373bf1eec713a770ecaa7c2d7a7902c98398009dfa3d8a2df91eec9311e8"}, +] [package.extras] tests = ["freezegun", "pytest", "pytest-cov"] @@ -2654,25 +3921,34 @@ tests = ["freezegun", "pytest", "pytest-cov"] name = "hyperframe" version = "6.0.1" description = "HTTP/2 framing layer for Python" -category = "main" optional = true python-versions = ">=3.6.1" +files = [ + {file = "hyperframe-6.0.1-py3-none-any.whl", hash = "sha256:0ec6bafd80d8ad2195c4f03aacba3a8265e57bc4cff261e802bf39970ed02a15"}, + {file = "hyperframe-6.0.1.tar.gz", hash = "sha256:ae510046231dc8e9ecb1a6586f63d2347bf4c8905914aa84ba585ae85f28a914"}, +] [[package]] name = "idna" version = "3.4" description = "Internationalized Domain Names in Applications (IDNA)" -category = "main" optional = false python-versions = ">=3.5" +files = [ + {file = "idna-3.4-py3-none-any.whl", hash = "sha256:90b77e79eaa3eba6de819a0c442c0b4ceefc341a7a2ab77d7562bf49f425c5c2"}, + {file = "idna-3.4.tar.gz", hash = "sha256:814f528e8dead7d329833b91c5faa87d60bf71824cd12a7530b5526063d02cb4"}, +] [[package]] name = "importlib-metadata" version = "6.11.0" description = "Read metadata from Python packages" -category = "main" optional = false python-versions = ">=3.8" +files = [ + {file = "importlib_metadata-6.11.0-py3-none-any.whl", hash = "sha256:f0afba6205ad8f8947c7d338b5342d5db2afbfd82f9cbef7879a9539cc12eb9b"}, + {file = "importlib_metadata-6.11.0.tar.gz", hash = "sha256:1231cf92d825c9e03cfc4da076a16de6422c863558229ea0b22b675657463443"}, +] [package.dependencies] zipp = ">=0.5" @@ -2686,9 +3962,12 @@ testing = ["flufl.flake8", "importlib-resources (>=1.3)", "packaging", "pyfakefs name = "importlib-resources" version = "6.0.1" description = "Read resources from Python packages" -category = "main" optional = false python-versions = ">=3.8" +files = [ + {file = "importlib_resources-6.0.1-py3-none-any.whl", hash = "sha256:134832a506243891221b88b4ae1213327eea96ceb4e407a00d790bb0626f45cf"}, + {file = "importlib_resources-6.0.1.tar.gz", hash = "sha256:4359457e42708462b9626a04657c6208ad799ceb41e5c58c57ffa0e6a098a5d4"}, +] [package.dependencies] zipp = {version = ">=3.1.0", markers = "python_version < \"3.10\""} @@ -2701,25 +3980,34 @@ testing = ["pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", name = "inflection" version = "0.5.1" description = "A port of Ruby on Rails inflector to Python" -category = "dev" optional = false python-versions = ">=3.5" +files = [ + {file = "inflection-0.5.1-py2.py3-none-any.whl", hash = "sha256:f38b2b640938a4f35ade69ac3d053042959b62a0f1076a5bbaa1b9526605a8a2"}, + {file = "inflection-0.5.1.tar.gz", hash = "sha256:1a29730d366e996aaacffb2f1f1cb9593dc38e2ddd30c91250c6dde09ea9b417"}, +] [[package]] name = "iniconfig" version = "2.0.0" description = "brain-dead simple config-ini parsing" -category = "dev" optional = false python-versions = ">=3.7" +files = [ + {file = "iniconfig-2.0.0-py3-none-any.whl", hash = "sha256:b6a85871a79d2e3b22d2d1b94ac2824226a63c6b741c88f7ae975f18b6778374"}, + {file = "iniconfig-2.0.0.tar.gz", hash = "sha256:2d91e135bf72d31a410b17c16da610a82cb55f6b0477d1a902134b24a455b8b3"}, +] [[package]] name = "isodate" version = "0.6.1" description = "An ISO 8601 date/time/duration parser and formatter" -category = "main" optional = false python-versions = "*" +files = [ + {file = "isodate-0.6.1-py2.py3-none-any.whl", hash = "sha256:0751eece944162659049d35f4f549ed815792b38793f07cf73381c1c87cbed96"}, + {file = "isodate-0.6.1.tar.gz", hash = "sha256:48c5881de7e8b0a0d648cb024c8062dc84e7b840ed81e864c7614fd3c127bde9"}, +] [package.dependencies] six = "*" @@ -2728,9 +4016,12 @@ six = "*" name = "isort" version = "5.12.0" description = "A Python utility / library to sort Python imports." -category = "dev" optional = false python-versions = ">=3.8.0" +files = [ + {file = "isort-5.12.0-py3-none-any.whl", hash = "sha256:f84c2818376e66cf843d497486ea8fed8700b340f308f076c6fb1229dff318b6"}, + {file = "isort-5.12.0.tar.gz", hash = "sha256:8bef7dde241278824a6d83f44a544709b065191b95b6e50894bdc722fcba0504"}, +] [package.extras] colors = ["colorama (>=0.4.3)"] @@ -2742,17 +4033,23 @@ requirements-deprecated-finder = ["pip-api", "pipreqs"] name = "itsdangerous" version = "2.1.2" description = "Safely pass data to untrusted environments and back." -category = "dev" optional = false python-versions = ">=3.7" +files = [ + {file = "itsdangerous-2.1.2-py3-none-any.whl", hash = "sha256:2c2349112351b88699d8d4b6b075022c0808887cb7ad10069318a8b0bc88db44"}, + {file = "itsdangerous-2.1.2.tar.gz", hash = "sha256:5dbbc68b317e5e42f327f9021763545dc3fc3bfe22e6deb96aaf1fc38874156a"}, +] [[package]] name = "jaraco-classes" version = "3.3.0" description = "Utility functions for Python class constructs" -category = "main" optional = true python-versions = ">=3.8" +files = [ + {file = "jaraco.classes-3.3.0-py3-none-any.whl", hash = "sha256:10afa92b6743f25c0cf5f37c6bb6e18e2c5bb84a16527ccfc0040ea377e7aaeb"}, + {file = "jaraco.classes-3.3.0.tar.gz", hash = "sha256:c063dd08e89217cee02c8d5e5ec560f2c8ce6cdc2fcdc2e68f7b2e5547ed3621"}, +] [package.dependencies] more-itertools = "*" @@ -2765,9 +4062,12 @@ testing = ["pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", name = "jeepney" version = "0.8.0" description = "Low-level, pure Python DBus protocol wrapper." -category = "main" optional = true python-versions = ">=3.7" +files = [ + {file = "jeepney-0.8.0-py3-none-any.whl", hash = "sha256:c0a454ad016ca575060802ee4d590dd912e35c122fa04e70306de3d076cce755"}, + {file = "jeepney-0.8.0.tar.gz", hash = "sha256:5efe48d255973902f6badc3ce55e2aa6c5c3b3bc642059ef3a91247bcfcc5806"}, +] [package.extras] test = ["async-timeout", "pytest", "pytest-asyncio (>=0.17)", "pytest-trio", "testpath", "trio"] @@ -2777,9 +4077,12 @@ trio = ["async_generator", "trio"] name = "jinja2" version = "3.1.2" description = "A very fast and expressive template engine." -category = "main" optional = false python-versions = ">=3.7" +files = [ + {file = "Jinja2-3.1.2-py3-none-any.whl", hash = "sha256:6088930bfe239f0e6710546ab9c19c9ef35e29792895fed6e6e31a023a182a61"}, + {file = "Jinja2-3.1.2.tar.gz", hash = "sha256:31351a702a408a9e7595a8fc6150fc3f43bb6bf7e319770cbc0db9df9437e852"}, +] [package.dependencies] MarkupSafe = ">=2.0" @@ -2791,9 +4094,12 @@ i18n = ["Babel (>=2.7)"] name = "jinxed" version = "1.2.0" description = "Jinxed Terminal Library" -category = "dev" optional = false python-versions = "*" +files = [ + {file = "jinxed-1.2.0-py2.py3-none-any.whl", hash = "sha256:cfc2b2e4e3b4326954d546ba6d6b9a7a796ddcb0aef8d03161d005177eb0d48b"}, + {file = "jinxed-1.2.0.tar.gz", hash = "sha256:032acda92d5c57cd216033cbbd53de731e6ed50deb63eb4781336ca55f72cda5"}, +] [package.dependencies] ansicon = {version = "*", markers = "platform_system == \"Windows\""} @@ -2802,17 +4108,24 @@ ansicon = {version = "*", markers = "platform_system == \"Windows\""} name = "jmespath" version = "1.0.1" description = "JSON Matching Expressions" -category = "main" optional = true python-versions = ">=3.7" +files = [ + {file = "jmespath-1.0.1-py3-none-any.whl", hash = "sha256:02e2e4cc71b5bcab88332eebf907519190dd9e6e82107fa7f83b1003a6252980"}, + {file = "jmespath-1.0.1.tar.gz", hash = "sha256:90261b206d6defd58fdd5e85f478bf633a2901798906be2ad389150c5c60edbe"}, +] [[package]] name = "jsonpath-ng" version = "1.5.3" description = "A final implementation of JSONPath for Python that aims to be standard compliant, including arithmetic and binary comparison operators and providing clear AST for metaprogramming." -category = "main" optional = false python-versions = "*" +files = [ + {file = "jsonpath-ng-1.5.3.tar.gz", hash = "sha256:a273b182a82c1256daab86a313b937059261b5c5f8c4fa3fc38b882b344dd567"}, + {file = "jsonpath_ng-1.5.3-py2-none-any.whl", hash = "sha256:f75b95dbecb8a0f3b86fd2ead21c2b022c3f5770957492b9b6196ecccfeb10aa"}, + {file = "jsonpath_ng-1.5.3-py3-none-any.whl", hash = "sha256:292a93569d74029ba75ac2dc3d3630fc0e17b2df26119a165fa1d498ca47bf65"}, +] [package.dependencies] decorator = "*" @@ -2823,10 +4136,13 @@ six = "*" name = "jsonschema" version = "4.19.0" description = "An implementation of JSON Schema validation for Python" -category = "main" optional = false python-versions = ">=3.8" - +files = [ + {file = "jsonschema-4.19.0-py3-none-any.whl", hash = "sha256:043dc26a3845ff09d20e4420d6012a9c91c9aa8999fa184e7efcfeccb41e32cb"}, + {file = "jsonschema-4.19.0.tar.gz", hash = "sha256:6e1e7569ac13be8139b2dd2c21a55d350066ee3f80df06c608b398cdc6f30e8f"}, +] + [package.dependencies] attrs = ">=22.2.0" importlib-resources = {version = ">=1.4.0", markers = "python_version < \"3.9\""} @@ -2843,9 +4159,12 @@ format-nongpl = ["fqdn", "idna", "isoduration", "jsonpointer (>1.13)", "rfc3339- name = "jsonschema-specifications" version = "2023.7.1" description = "The JSON Schema meta-schemas and vocabularies, exposed as a Registry" -category = "main" optional = false python-versions = ">=3.8" +files = [ + {file = "jsonschema_specifications-2023.7.1-py3-none-any.whl", hash = "sha256:05adf340b659828a004220a9613be00fa3f223f2b82002e273dee62fd50524b1"}, + {file = "jsonschema_specifications-2023.7.1.tar.gz", hash = "sha256:c91a50404e88a1f6ba40636778e2ee08f6e24c5613fe4c53ac24578a5a7f72bb"}, +] [package.dependencies] importlib-resources = {version = ">=1.4.0", markers = "python_version < \"3.9\""} @@ -2855,9 +4174,12 @@ referencing = ">=0.28.0" name = "keyring" version = "24.2.0" description = "Store and access your passwords safely." -category = "main" optional = true python-versions = ">=3.8" +files = [ + {file = "keyring-24.2.0-py3-none-any.whl", hash = "sha256:4901caaf597bfd3bbd78c9a0c7c4c29fcd8310dab2cffefe749e916b6527acd6"}, + {file = "keyring-24.2.0.tar.gz", hash = "sha256:ca0746a19ec421219f4d713f848fa297a661a8a8c1504867e55bfb5e09091509"}, +] [package.dependencies] importlib-metadata = {version = ">=4.11.4", markers = "python_version < \"3.12\""} @@ -2876,17 +4198,57 @@ testing = ["pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", name = "lazy-object-proxy" version = "1.9.0" description = "A fast and thorough lazy object proxy." -category = "dev" optional = false python-versions = ">=3.7" +files = [ + {file = "lazy-object-proxy-1.9.0.tar.gz", hash = "sha256:659fb5809fa4629b8a1ac5106f669cfc7bef26fbb389dda53b3e010d1ac4ebae"}, + {file = "lazy_object_proxy-1.9.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:b40387277b0ed2d0602b8293b94d7257e17d1479e257b4de114ea11a8cb7f2d7"}, + {file = "lazy_object_proxy-1.9.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e8c6cfb338b133fbdbc5cfaa10fe3c6aeea827db80c978dbd13bc9dd8526b7d4"}, + {file = "lazy_object_proxy-1.9.0-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:721532711daa7db0d8b779b0bb0318fa87af1c10d7fe5e52ef30f8eff254d0cd"}, + {file = "lazy_object_proxy-1.9.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:66a3de4a3ec06cd8af3f61b8e1ec67614fbb7c995d02fa224813cb7afefee701"}, + {file = "lazy_object_proxy-1.9.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:1aa3de4088c89a1b69f8ec0dcc169aa725b0ff017899ac568fe44ddc1396df46"}, + {file = "lazy_object_proxy-1.9.0-cp310-cp310-win32.whl", hash = "sha256:f0705c376533ed2a9e5e97aacdbfe04cecd71e0aa84c7c0595d02ef93b6e4455"}, + {file = "lazy_object_proxy-1.9.0-cp310-cp310-win_amd64.whl", hash = "sha256:ea806fd4c37bf7e7ad82537b0757999264d5f70c45468447bb2b91afdbe73a6e"}, + {file = "lazy_object_proxy-1.9.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:946d27deaff6cf8452ed0dba83ba38839a87f4f7a9732e8f9fd4107b21e6ff07"}, + {file = "lazy_object_proxy-1.9.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:79a31b086e7e68b24b99b23d57723ef7e2c6d81ed21007b6281ebcd1688acb0a"}, + {file = "lazy_object_proxy-1.9.0-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f699ac1c768270c9e384e4cbd268d6e67aebcfae6cd623b4d7c3bfde5a35db59"}, + {file = "lazy_object_proxy-1.9.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:bfb38f9ffb53b942f2b5954e0f610f1e721ccebe9cce9025a38c8ccf4a5183a4"}, + {file = "lazy_object_proxy-1.9.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:189bbd5d41ae7a498397287c408617fe5c48633e7755287b21d741f7db2706a9"}, + {file = "lazy_object_proxy-1.9.0-cp311-cp311-win32.whl", hash = "sha256:81fc4d08b062b535d95c9ea70dbe8a335c45c04029878e62d744bdced5141586"}, + {file = "lazy_object_proxy-1.9.0-cp311-cp311-win_amd64.whl", hash = "sha256:f2457189d8257dd41ae9b434ba33298aec198e30adf2dcdaaa3a28b9994f6adb"}, + {file = "lazy_object_proxy-1.9.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:d9e25ef10a39e8afe59a5c348a4dbf29b4868ab76269f81ce1674494e2565a6e"}, + {file = "lazy_object_proxy-1.9.0-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cbf9b082426036e19c6924a9ce90c740a9861e2bdc27a4834fd0a910742ac1e8"}, + {file = "lazy_object_proxy-1.9.0-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9f5fa4a61ce2438267163891961cfd5e32ec97a2c444e5b842d574251ade27d2"}, + {file = "lazy_object_proxy-1.9.0-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:8fa02eaab317b1e9e03f69aab1f91e120e7899b392c4fc19807a8278a07a97e8"}, + {file = "lazy_object_proxy-1.9.0-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:e7c21c95cae3c05c14aafffe2865bbd5e377cfc1348c4f7751d9dc9a48ca4bda"}, + {file = "lazy_object_proxy-1.9.0-cp37-cp37m-win32.whl", hash = "sha256:f12ad7126ae0c98d601a7ee504c1122bcef553d1d5e0c3bfa77b16b3968d2734"}, + {file = "lazy_object_proxy-1.9.0-cp37-cp37m-win_amd64.whl", hash = "sha256:edd20c5a55acb67c7ed471fa2b5fb66cb17f61430b7a6b9c3b4a1e40293b1671"}, + {file = "lazy_object_proxy-1.9.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:2d0daa332786cf3bb49e10dc6a17a52f6a8f9601b4cf5c295a4f85854d61de63"}, + {file = "lazy_object_proxy-1.9.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9cd077f3d04a58e83d04b20e334f678c2b0ff9879b9375ed107d5d07ff160171"}, + {file = "lazy_object_proxy-1.9.0-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:660c94ea760b3ce47d1855a30984c78327500493d396eac4dfd8bd82041b22be"}, + {file = "lazy_object_proxy-1.9.0-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:212774e4dfa851e74d393a2370871e174d7ff0ebc980907723bb67d25c8a7c30"}, + {file = "lazy_object_proxy-1.9.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:f0117049dd1d5635bbff65444496c90e0baa48ea405125c088e93d9cf4525b11"}, + {file = "lazy_object_proxy-1.9.0-cp38-cp38-win32.whl", hash = "sha256:0a891e4e41b54fd5b8313b96399f8b0e173bbbfc03c7631f01efbe29bb0bcf82"}, + {file = "lazy_object_proxy-1.9.0-cp38-cp38-win_amd64.whl", hash = "sha256:9990d8e71b9f6488e91ad25f322898c136b008d87bf852ff65391b004da5e17b"}, + {file = "lazy_object_proxy-1.9.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:9e7551208b2aded9c1447453ee366f1c4070602b3d932ace044715d89666899b"}, + {file = "lazy_object_proxy-1.9.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5f83ac4d83ef0ab017683d715ed356e30dd48a93746309c8f3517e1287523ef4"}, + {file = "lazy_object_proxy-1.9.0-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7322c3d6f1766d4ef1e51a465f47955f1e8123caee67dd641e67d539a534d006"}, + {file = "lazy_object_proxy-1.9.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:18b78ec83edbbeb69efdc0e9c1cb41a3b1b1ed11ddd8ded602464c3fc6020494"}, + {file = "lazy_object_proxy-1.9.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:09763491ce220c0299688940f8dc2c5d05fd1f45af1e42e636b2e8b2303e4382"}, + {file = "lazy_object_proxy-1.9.0-cp39-cp39-win32.whl", hash = "sha256:9090d8e53235aa280fc9239a86ae3ea8ac58eff66a705fa6aa2ec4968b95c821"}, + {file = "lazy_object_proxy-1.9.0-cp39-cp39-win_amd64.whl", hash = "sha256:db1c1722726f47e10e0b5fdbf15ac3b8adb58c091d12b3ab713965795036985f"}, +] [[package]] name = "leather" version = "0.3.4" description = "Python charting for 80% of humans." -category = "main" optional = false python-versions = "*" +files = [ + {file = "leather-0.3.4-py2.py3-none-any.whl", hash = "sha256:5e741daee96e9f1e9e06081b8c8a10c4ac199301a0564cdd99b09df15b4603d2"}, + {file = "leather-0.3.4.tar.gz", hash = "sha256:b43e21c8fa46b2679de8449f4d953c06418666dc058ce41055ee8a8d3bb40918"}, +] [package.dependencies] six = ">=1.6.1" @@ -2895,9 +4257,12 @@ six = ">=1.6.1" name = "limits" version = "3.6.0" description = "Rate limiting utilities" -category = "dev" optional = false python-versions = ">=3.7" +files = [ + {file = "limits-3.6.0-py3-none-any.whl", hash = "sha256:32fe29a398352c71bc43d53773117d47e22c5ea4200aef28d3f5fdee10334cd7"}, + {file = "limits-3.6.0.tar.gz", hash = "sha256:57a9c69fd37ad1e4fa3886dff8d035227e1f6af87f47e9118627e72cf1ced3bf"}, +] [package.dependencies] deprecated = ">=1.2" @@ -2921,9 +4286,12 @@ rediscluster = ["redis (>=4.2.0,!=4.5.2,!=4.5.3)"] name = "linkify-it-py" version = "2.0.2" description = "Links recognition library with FULL unicode support." -category = "dev" optional = false python-versions = ">=3.7" +files = [ + {file = "linkify-it-py-2.0.2.tar.gz", hash = "sha256:19f3060727842c254c808e99d465c80c49d2c7306788140987a1a7a29b0d6ad2"}, + {file = "linkify_it_py-2.0.2-py3-none-any.whl", hash = "sha256:a3a24428f6c96f27370d7fe61d2ac0be09017be5190d68d8658233171f1b6541"}, +] [package.dependencies] uc-micro-py = "*" @@ -2938,36 +4306,142 @@ test = ["coverage", "pytest", "pytest-cov"] name = "lockfile" version = "0.12.2" description = "Platform-independent file locking module" -category = "dev" optional = false python-versions = "*" +files = [ + {file = "lockfile-0.12.2-py2.py3-none-any.whl", hash = "sha256:6c3cb24f344923d30b2785d5ad75182c8ea7ac1b6171b08657258ec7429d50fa"}, + {file = "lockfile-0.12.2.tar.gz", hash = "sha256:6aed02de03cba24efabcd600b30540140634fc06cfa603822d508d5361e9f799"}, +] [[package]] name = "logbook" version = "1.5.3" description = "A logging replacement for Python" -category = "main" optional = false python-versions = "*" +files = [ + {file = "Logbook-1.5.3-cp27-cp27m-win32.whl", hash = "sha256:56ee54c11df3377314cedcd6507638f015b4b88c0238c2e01b5eb44fd3a6ad1b"}, + {file = "Logbook-1.5.3-cp27-cp27m-win_amd64.whl", hash = "sha256:2dc85f1510533fddb481e97677bb7bca913560862734c0b3b289bfed04f78c92"}, + {file = "Logbook-1.5.3-cp35-cp35m-win32.whl", hash = "sha256:94e2e11ff3c2304b0d09a36c6208e5ae756eb948b210e5cbd63cd8d27f911542"}, + {file = "Logbook-1.5.3-cp35-cp35m-win_amd64.whl", hash = "sha256:97fee1bd9605f76335b169430ed65e15e457a844b2121bd1d90a08cf7e30aba0"}, + {file = "Logbook-1.5.3-cp36-cp36m-win32.whl", hash = "sha256:7c533eb728b3d220b1b5414ba4635292d149d79f74f6973b4aa744c850ca944a"}, + {file = "Logbook-1.5.3-cp36-cp36m-win_amd64.whl", hash = "sha256:e18f7422214b1cf0240c56f884fd9c9b4ff9d0da2eabca9abccba56df7222f66"}, + {file = "Logbook-1.5.3-cp37-cp37m-win32.whl", hash = "sha256:8f76a2e7b1f72595f753228732f81ce342caf03babc3fed6bbdcf366f2f20f18"}, + {file = "Logbook-1.5.3-cp37-cp37m-win_amd64.whl", hash = "sha256:0cf2cdbfb65a03b5987d19109dacad13417809dcf697f66e1a7084fb21744ea9"}, + {file = "Logbook-1.5.3.tar.gz", hash = "sha256:66f454ada0f56eae43066f604a222b09893f98c1adc18df169710761b8f32fe8"}, +] [package.extras] -all = ["Jinja2", "brotli", "cython", "execnet (>=1.0.9)", "pytest (>4.0)", "pytest-cov (>=2.6)", "pyzmq", "redis", "sqlalchemy"] +all = ["Jinja2", "brotli", "cython", "execnet (>=1.0.9)", "mock", "pytest", "pytest-cov (<2.6)", "pyzmq", "redis", "sqlalchemy"] compression = ["brotli"] -dev = ["cython", "pytest (>4.0)", "pytest-cov (>=2.6)"] +dev = ["cython", "mock", "pytest", "pytest-cov (<2.6)"] execnet = ["execnet (>=1.0.9)"] jinja = ["Jinja2"] redis = ["redis"] sqlalchemy = ["sqlalchemy"] -test = ["pytest (>4.0)", "pytest-cov (>=2.6)"] +test = ["mock", "pytest", "pytest-cov (<2.6)"] zmq = ["pyzmq"] [[package]] name = "lxml" version = "4.9.3" description = "Powerful and Pythonic XML processing library combining libxml2/libxslt with the ElementTree API." -category = "main" optional = true python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, != 3.4.*" +files = [ + {file = "lxml-4.9.3-cp27-cp27m-macosx_11_0_x86_64.whl", hash = "sha256:b0a545b46b526d418eb91754565ba5b63b1c0b12f9bd2f808c852d9b4b2f9b5c"}, + {file = "lxml-4.9.3-cp27-cp27m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:075b731ddd9e7f68ad24c635374211376aa05a281673ede86cbe1d1b3455279d"}, + {file = "lxml-4.9.3-cp27-cp27m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:1e224d5755dba2f4a9498e150c43792392ac9b5380aa1b845f98a1618c94eeef"}, + {file = "lxml-4.9.3-cp27-cp27m-win32.whl", hash = "sha256:2c74524e179f2ad6d2a4f7caf70e2d96639c0954c943ad601a9e146c76408ed7"}, + {file = "lxml-4.9.3-cp27-cp27m-win_amd64.whl", hash = "sha256:4f1026bc732b6a7f96369f7bfe1a4f2290fb34dce00d8644bc3036fb351a4ca1"}, + {file = "lxml-4.9.3-cp27-cp27mu-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:c0781a98ff5e6586926293e59480b64ddd46282953203c76ae15dbbbf302e8bb"}, + {file = "lxml-4.9.3-cp27-cp27mu-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:cef2502e7e8a96fe5ad686d60b49e1ab03e438bd9123987994528febd569868e"}, + {file = "lxml-4.9.3-cp310-cp310-macosx_11_0_x86_64.whl", hash = "sha256:b86164d2cff4d3aaa1f04a14685cbc072efd0b4f99ca5708b2ad1b9b5988a991"}, + {file = "lxml-4.9.3-cp310-cp310-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_24_i686.whl", hash = "sha256:42871176e7896d5d45138f6d28751053c711ed4d48d8e30b498da155af39aebd"}, + {file = "lxml-4.9.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:ae8b9c6deb1e634ba4f1930eb67ef6e6bf6a44b6eb5ad605642b2d6d5ed9ce3c"}, + {file = "lxml-4.9.3-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:411007c0d88188d9f621b11d252cce90c4a2d1a49db6c068e3c16422f306eab8"}, + {file = "lxml-4.9.3-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:cd47b4a0d41d2afa3e58e5bf1f62069255aa2fd6ff5ee41604418ca925911d76"}, + {file = "lxml-4.9.3-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:0e2cb47860da1f7e9a5256254b74ae331687b9672dfa780eed355c4c9c3dbd23"}, + {file = "lxml-4.9.3-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:1247694b26342a7bf47c02e513d32225ededd18045264d40758abeb3c838a51f"}, + {file = "lxml-4.9.3-cp310-cp310-win32.whl", hash = "sha256:cdb650fc86227eba20de1a29d4b2c1bfe139dc75a0669270033cb2ea3d391b85"}, + {file = "lxml-4.9.3-cp310-cp310-win_amd64.whl", hash = "sha256:97047f0d25cd4bcae81f9ec9dc290ca3e15927c192df17331b53bebe0e3ff96d"}, + {file = "lxml-4.9.3-cp311-cp311-macosx_11_0_universal2.whl", hash = "sha256:1f447ea5429b54f9582d4b955f5f1985f278ce5cf169f72eea8afd9502973dd5"}, + {file = "lxml-4.9.3-cp311-cp311-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_24_i686.whl", hash = "sha256:57d6ba0ca2b0c462f339640d22882acc711de224d769edf29962b09f77129cbf"}, + {file = "lxml-4.9.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:9767e79108424fb6c3edf8f81e6730666a50feb01a328f4a016464a5893f835a"}, + {file = "lxml-4.9.3-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:71c52db65e4b56b8ddc5bb89fb2e66c558ed9d1a74a45ceb7dcb20c191c3df2f"}, + {file = "lxml-4.9.3-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:d73d8ecf8ecf10a3bd007f2192725a34bd62898e8da27eb9d32a58084f93962b"}, + {file = "lxml-4.9.3-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:0a3d3487f07c1d7f150894c238299934a2a074ef590b583103a45002035be120"}, + {file = "lxml-4.9.3-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:9e28c51fa0ce5674be9f560c6761c1b441631901993f76700b1b30ca6c8378d6"}, + {file = "lxml-4.9.3-cp311-cp311-win32.whl", hash = "sha256:0bfd0767c5c1de2551a120673b72e5d4b628737cb05414f03c3277bf9bed3305"}, + {file = "lxml-4.9.3-cp311-cp311-win_amd64.whl", hash = "sha256:25f32acefac14ef7bd53e4218fe93b804ef6f6b92ffdb4322bb6d49d94cad2bc"}, + {file = "lxml-4.9.3-cp312-cp312-macosx_11_0_universal2.whl", hash = "sha256:d3ff32724f98fbbbfa9f49d82852b159e9784d6094983d9a8b7f2ddaebb063d4"}, + {file = "lxml-4.9.3-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:48d6ed886b343d11493129e019da91d4039826794a3e3027321c56d9e71505be"}, + {file = "lxml-4.9.3-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:9a92d3faef50658dd2c5470af249985782bf754c4e18e15afb67d3ab06233f13"}, + {file = "lxml-4.9.3-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:b4e4bc18382088514ebde9328da057775055940a1f2e18f6ad2d78aa0f3ec5b9"}, + {file = "lxml-4.9.3-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:fc9b106a1bf918db68619fdcd6d5ad4f972fdd19c01d19bdb6bf63f3589a9ec5"}, + {file = "lxml-4.9.3-cp312-cp312-win_amd64.whl", hash = "sha256:d37017287a7adb6ab77e1c5bee9bcf9660f90ff445042b790402a654d2ad81d8"}, + {file = "lxml-4.9.3-cp35-cp35m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:56dc1f1ebccc656d1b3ed288f11e27172a01503fc016bcabdcbc0978b19352b7"}, + {file = "lxml-4.9.3-cp35-cp35m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:578695735c5a3f51569810dfebd05dd6f888147a34f0f98d4bb27e92b76e05c2"}, + {file = "lxml-4.9.3-cp35-cp35m-win32.whl", hash = "sha256:704f61ba8c1283c71b16135caf697557f5ecf3e74d9e453233e4771d68a1f42d"}, + {file = "lxml-4.9.3-cp35-cp35m-win_amd64.whl", hash = "sha256:c41bfca0bd3532d53d16fd34d20806d5c2b1ace22a2f2e4c0008570bf2c58833"}, + {file = "lxml-4.9.3-cp36-cp36m-macosx_11_0_x86_64.whl", hash = "sha256:64f479d719dc9f4c813ad9bb6b28f8390360660b73b2e4beb4cb0ae7104f1c12"}, + {file = "lxml-4.9.3-cp36-cp36m-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_24_i686.whl", hash = "sha256:dd708cf4ee4408cf46a48b108fb9427bfa00b9b85812a9262b5c668af2533ea5"}, + {file = "lxml-4.9.3-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5c31c7462abdf8f2ac0577d9f05279727e698f97ecbb02f17939ea99ae8daa98"}, + {file = "lxml-4.9.3-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:e3cd95e10c2610c360154afdc2f1480aea394f4a4f1ea0a5eacce49640c9b190"}, + {file = "lxml-4.9.3-cp36-cp36m-manylinux_2_28_x86_64.whl", hash = "sha256:4930be26af26ac545c3dffb662521d4e6268352866956672231887d18f0eaab2"}, + {file = "lxml-4.9.3-cp36-cp36m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:4aec80cde9197340bc353d2768e2a75f5f60bacda2bab72ab1dc499589b3878c"}, + {file = "lxml-4.9.3-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:14e019fd83b831b2e61baed40cab76222139926b1fb5ed0e79225bc0cae14584"}, + {file = "lxml-4.9.3-cp36-cp36m-musllinux_1_1_aarch64.whl", hash = "sha256:0c0850c8b02c298d3c7006b23e98249515ac57430e16a166873fc47a5d549287"}, + {file = "lxml-4.9.3-cp36-cp36m-musllinux_1_1_x86_64.whl", hash = "sha256:aca086dc5f9ef98c512bac8efea4483eb84abbf926eaeedf7b91479feb092458"}, + {file = "lxml-4.9.3-cp36-cp36m-win32.whl", hash = "sha256:50baa9c1c47efcaef189f31e3d00d697c6d4afda5c3cde0302d063492ff9b477"}, + {file = "lxml-4.9.3-cp36-cp36m-win_amd64.whl", hash = "sha256:bef4e656f7d98aaa3486d2627e7d2df1157d7e88e7efd43a65aa5dd4714916cf"}, + {file = "lxml-4.9.3-cp37-cp37m-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_24_i686.whl", hash = "sha256:46f409a2d60f634fe550f7133ed30ad5321ae2e6630f13657fb9479506b00601"}, + {file = "lxml-4.9.3-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_24_aarch64.whl", hash = "sha256:4c28a9144688aef80d6ea666c809b4b0e50010a2aca784c97f5e6bf143d9f129"}, + {file = "lxml-4.9.3-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:141f1d1a9b663c679dc524af3ea1773e618907e96075262726c7612c02b149a4"}, + {file = "lxml-4.9.3-cp37-cp37m-manylinux_2_28_x86_64.whl", hash = "sha256:53ace1c1fd5a74ef662f844a0413446c0629d151055340e9893da958a374f70d"}, + {file = "lxml-4.9.3-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:17a753023436a18e27dd7769e798ce302963c236bc4114ceee5b25c18c52c693"}, + {file = "lxml-4.9.3-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:7d298a1bd60c067ea75d9f684f5f3992c9d6766fadbc0bcedd39750bf344c2f4"}, + {file = "lxml-4.9.3-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:081d32421db5df44c41b7f08a334a090a545c54ba977e47fd7cc2deece78809a"}, + {file = "lxml-4.9.3-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:23eed6d7b1a3336ad92d8e39d4bfe09073c31bfe502f20ca5116b2a334f8ec02"}, + {file = "lxml-4.9.3-cp37-cp37m-win32.whl", hash = "sha256:1509dd12b773c02acd154582088820893109f6ca27ef7291b003d0e81666109f"}, + {file = "lxml-4.9.3-cp37-cp37m-win_amd64.whl", hash = "sha256:120fa9349a24c7043854c53cae8cec227e1f79195a7493e09e0c12e29f918e52"}, + {file = "lxml-4.9.3-cp38-cp38-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_24_i686.whl", hash = "sha256:4d2d1edbca80b510443f51afd8496be95529db04a509bc8faee49c7b0fb6d2cc"}, + {file = "lxml-4.9.3-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_24_aarch64.whl", hash = "sha256:8d7e43bd40f65f7d97ad8ef5c9b1778943d02f04febef12def25f7583d19baac"}, + {file = "lxml-4.9.3-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:71d66ee82e7417828af6ecd7db817913cb0cf9d4e61aa0ac1fde0583d84358db"}, + {file = "lxml-4.9.3-cp38-cp38-manylinux_2_28_x86_64.whl", hash = "sha256:6fc3c450eaa0b56f815c7b62f2b7fba7266c4779adcf1cece9e6deb1de7305ce"}, + {file = "lxml-4.9.3-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:65299ea57d82fb91c7f019300d24050c4ddeb7c5a190e076b5f48a2b43d19c42"}, + {file = "lxml-4.9.3-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:eadfbbbfb41b44034a4c757fd5d70baccd43296fb894dba0295606a7cf3124aa"}, + {file = "lxml-4.9.3-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:3e9bdd30efde2b9ccfa9cb5768ba04fe71b018a25ea093379c857c9dad262c40"}, + {file = "lxml-4.9.3-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:fcdd00edfd0a3001e0181eab3e63bd5c74ad3e67152c84f93f13769a40e073a7"}, + {file = "lxml-4.9.3-cp38-cp38-win32.whl", hash = "sha256:57aba1bbdf450b726d58b2aea5fe47c7875f5afb2c4a23784ed78f19a0462574"}, + {file = "lxml-4.9.3-cp38-cp38-win_amd64.whl", hash = "sha256:92af161ecbdb2883c4593d5ed4815ea71b31fafd7fd05789b23100d081ecac96"}, + {file = "lxml-4.9.3-cp39-cp39-macosx_11_0_x86_64.whl", hash = "sha256:9bb6ad405121241e99a86efff22d3ef469024ce22875a7ae045896ad23ba2340"}, + {file = "lxml-4.9.3-cp39-cp39-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_24_i686.whl", hash = "sha256:8ed74706b26ad100433da4b9d807eae371efaa266ffc3e9191ea436087a9d6a7"}, + {file = "lxml-4.9.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:fbf521479bcac1e25a663df882c46a641a9bff6b56dc8b0fafaebd2f66fb231b"}, + {file = "lxml-4.9.3-cp39-cp39-manylinux_2_28_aarch64.whl", hash = "sha256:303bf1edce6ced16bf67a18a1cf8339d0db79577eec5d9a6d4a80f0fb10aa2da"}, + {file = "lxml-4.9.3-cp39-cp39-manylinux_2_28_x86_64.whl", hash = "sha256:5515edd2a6d1a5a70bfcdee23b42ec33425e405c5b351478ab7dc9347228f96e"}, + {file = "lxml-4.9.3-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:690dafd0b187ed38583a648076865d8c229661ed20e48f2335d68e2cf7dc829d"}, + {file = "lxml-4.9.3-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:b6420a005548ad52154c8ceab4a1290ff78d757f9e5cbc68f8c77089acd3c432"}, + {file = "lxml-4.9.3-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:bb3bb49c7a6ad9d981d734ef7c7193bc349ac338776a0360cc671eaee89bcf69"}, + {file = "lxml-4.9.3-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:d27be7405547d1f958b60837dc4c1007da90b8b23f54ba1f8b728c78fdb19d50"}, + {file = "lxml-4.9.3-cp39-cp39-win32.whl", hash = "sha256:8df133a2ea5e74eef5e8fc6f19b9e085f758768a16e9877a60aec455ed2609b2"}, + {file = "lxml-4.9.3-cp39-cp39-win_amd64.whl", hash = "sha256:4dd9a263e845a72eacb60d12401e37c616438ea2e5442885f65082c276dfb2b2"}, + {file = "lxml-4.9.3-pp310-pypy310_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:6689a3d7fd13dc687e9102a27e98ef33730ac4fe37795d5036d18b4d527abd35"}, + {file = "lxml-4.9.3-pp37-pypy37_pp73-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_24_i686.whl", hash = "sha256:f6bdac493b949141b733c5345b6ba8f87a226029cbabc7e9e121a413e49441e0"}, + {file = "lxml-4.9.3-pp37-pypy37_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:05186a0f1346ae12553d66df1cfce6f251589fea3ad3da4f3ef4e34b2d58c6a3"}, + {file = "lxml-4.9.3-pp37-pypy37_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:c2006f5c8d28dee289f7020f721354362fa304acbaaf9745751ac4006650254b"}, + {file = "lxml-4.9.3-pp38-pypy38_pp73-macosx_11_0_x86_64.whl", hash = "sha256:5c245b783db29c4e4fbbbfc9c5a78be496c9fea25517f90606aa1f6b2b3d5f7b"}, + {file = "lxml-4.9.3-pp38-pypy38_pp73-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_24_i686.whl", hash = "sha256:4fb960a632a49f2f089d522f70496640fdf1218f1243889da3822e0a9f5f3ba7"}, + {file = "lxml-4.9.3-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:50670615eaf97227d5dc60de2dc99fb134a7130d310d783314e7724bf163f75d"}, + {file = "lxml-4.9.3-pp38-pypy38_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:9719fe17307a9e814580af1f5c6e05ca593b12fb7e44fe62450a5384dbf61b4b"}, + {file = "lxml-4.9.3-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:3331bece23c9ee066e0fb3f96c61322b9e0f54d775fccefff4c38ca488de283a"}, + {file = "lxml-4.9.3-pp39-pypy39_pp73-macosx_11_0_x86_64.whl", hash = "sha256:ed667f49b11360951e201453fc3967344d0d0263aa415e1619e85ae7fd17b4e0"}, + {file = "lxml-4.9.3-pp39-pypy39_pp73-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_24_i686.whl", hash = "sha256:8b77946fd508cbf0fccd8e400a7f71d4ac0e1595812e66025bac475a8e811694"}, + {file = "lxml-4.9.3-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:e4da8ca0c0c0aea88fd46be8e44bd49716772358d648cce45fe387f7b92374a7"}, + {file = "lxml-4.9.3-pp39-pypy39_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:fe4bda6bd4340caa6e5cf95e73f8fea5c4bfc55763dd42f1b50a94c1b4a2fbd4"}, + {file = "lxml-4.9.3-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:f3df3db1d336b9356dd3112eae5f5c2b8b377f3bc826848567f10bfddfee77e9"}, + {file = "lxml-4.9.3.tar.gz", hash = "sha256:48628bd53a426c9eb9bc066a923acaa0878d1e86129fd5359aee99285f4eed9c"}, +] [package.extras] cssselect = ["cssselect (>=0.7)"] @@ -2979,9 +4453,46 @@ source = ["Cython (>=0.29.35)"] name = "lz4" version = "4.3.3" description = "LZ4 Bindings for Python" -category = "main" optional = true python-versions = ">=3.8" +files = [ + {file = "lz4-4.3.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:b891880c187e96339474af2a3b2bfb11a8e4732ff5034be919aa9029484cd201"}, + {file = "lz4-4.3.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:222a7e35137d7539c9c33bb53fcbb26510c5748779364014235afc62b0ec797f"}, + {file = "lz4-4.3.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f76176492ff082657ada0d0f10c794b6da5800249ef1692b35cf49b1e93e8ef7"}, + {file = "lz4-4.3.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f1d18718f9d78182c6b60f568c9a9cec8a7204d7cb6fad4e511a2ef279e4cb05"}, + {file = "lz4-4.3.3-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6cdc60e21ec70266947a48839b437d46025076eb4b12c76bd47f8e5eb8a75dcc"}, + {file = "lz4-4.3.3-cp310-cp310-win32.whl", hash = "sha256:c81703b12475da73a5d66618856d04b1307e43428a7e59d98cfe5a5d608a74c6"}, + {file = "lz4-4.3.3-cp310-cp310-win_amd64.whl", hash = "sha256:43cf03059c0f941b772c8aeb42a0813d68d7081c009542301637e5782f8a33e2"}, + {file = "lz4-4.3.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:30e8c20b8857adef7be045c65f47ab1e2c4fabba86a9fa9a997d7674a31ea6b6"}, + {file = "lz4-4.3.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:2f7b1839f795315e480fb87d9bc60b186a98e3e5d17203c6e757611ef7dcef61"}, + {file = "lz4-4.3.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:edfd858985c23523f4e5a7526ca6ee65ff930207a7ec8a8f57a01eae506aaee7"}, + {file = "lz4-4.3.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0e9c410b11a31dbdc94c05ac3c480cb4b222460faf9231f12538d0074e56c563"}, + {file = "lz4-4.3.3-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d2507ee9c99dbddd191c86f0e0c8b724c76d26b0602db9ea23232304382e1f21"}, + {file = "lz4-4.3.3-cp311-cp311-win32.whl", hash = "sha256:f180904f33bdd1e92967923a43c22899e303906d19b2cf8bb547db6653ea6e7d"}, + {file = "lz4-4.3.3-cp311-cp311-win_amd64.whl", hash = "sha256:b14d948e6dce389f9a7afc666d60dd1e35fa2138a8ec5306d30cd2e30d36b40c"}, + {file = "lz4-4.3.3-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:e36cd7b9d4d920d3bfc2369840da506fa68258f7bb176b8743189793c055e43d"}, + {file = "lz4-4.3.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:31ea4be9d0059c00b2572d700bf2c1bc82f241f2c3282034a759c9a4d6ca4dc2"}, + {file = "lz4-4.3.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:33c9a6fd20767ccaf70649982f8f3eeb0884035c150c0b818ea660152cf3c809"}, + {file = "lz4-4.3.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bca8fccc15e3add173da91be8f34121578dc777711ffd98d399be35487c934bf"}, + {file = "lz4-4.3.3-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e7d84b479ddf39fe3ea05387f10b779155fc0990125f4fb35d636114e1c63a2e"}, + {file = "lz4-4.3.3-cp312-cp312-win32.whl", hash = "sha256:337cb94488a1b060ef1685187d6ad4ba8bc61d26d631d7ba909ee984ea736be1"}, + {file = "lz4-4.3.3-cp312-cp312-win_amd64.whl", hash = "sha256:5d35533bf2cee56f38ced91f766cd0038b6abf46f438a80d50c52750088be93f"}, + {file = "lz4-4.3.3-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:363ab65bf31338eb364062a15f302fc0fab0a49426051429866d71c793c23394"}, + {file = "lz4-4.3.3-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:0a136e44a16fc98b1abc404fbabf7f1fada2bdab6a7e970974fb81cf55b636d0"}, + {file = "lz4-4.3.3-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:abc197e4aca8b63f5ae200af03eb95fb4b5055a8f990079b5bdf042f568469dd"}, + {file = "lz4-4.3.3-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:56f4fe9c6327adb97406f27a66420b22ce02d71a5c365c48d6b656b4aaeb7775"}, + {file = "lz4-4.3.3-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f0e822cd7644995d9ba248cb4b67859701748a93e2ab7fc9bc18c599a52e4604"}, + {file = "lz4-4.3.3-cp38-cp38-win32.whl", hash = "sha256:24b3206de56b7a537eda3a8123c644a2b7bf111f0af53bc14bed90ce5562d1aa"}, + {file = "lz4-4.3.3-cp38-cp38-win_amd64.whl", hash = "sha256:b47839b53956e2737229d70714f1d75f33e8ac26e52c267f0197b3189ca6de24"}, + {file = "lz4-4.3.3-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:6756212507405f270b66b3ff7f564618de0606395c0fe10a7ae2ffcbbe0b1fba"}, + {file = "lz4-4.3.3-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:ee9ff50557a942d187ec85462bb0960207e7ec5b19b3b48949263993771c6205"}, + {file = "lz4-4.3.3-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2b901c7784caac9a1ded4555258207d9e9697e746cc8532129f150ffe1f6ba0d"}, + {file = "lz4-4.3.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b6d9ec061b9eca86e4dcc003d93334b95d53909afd5a32c6e4f222157b50c071"}, + {file = "lz4-4.3.3-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f4c7bf687303ca47d69f9f0133274958fd672efaa33fb5bcde467862d6c621f0"}, + {file = "lz4-4.3.3-cp39-cp39-win32.whl", hash = "sha256:054b4631a355606e99a42396f5db4d22046a3397ffc3269a348ec41eaebd69d2"}, + {file = "lz4-4.3.3-cp39-cp39-win_amd64.whl", hash = "sha256:eac9af361e0d98335a02ff12fb56caeb7ea1196cf1a49dbf6f17828a131da807"}, + {file = "lz4-4.3.3.tar.gz", hash = "sha256:01fe674ef2889dbb9899d8a67361e0c4a2c833af5aeb37dd505727cf5d2a131e"}, +] [package.extras] docs = ["sphinx (>=1.6.0)", "sphinx-bootstrap-theme"] @@ -2992,17 +4503,23 @@ tests = ["psutil", "pytest (!=3.3.0)", "pytest-cov"] name = "makefun" version = "1.15.1" description = "Small library to dynamically create python functions." -category = "main" optional = false python-versions = "*" +files = [ + {file = "makefun-1.15.1-py2.py3-none-any.whl", hash = "sha256:a63cfc7b47a539c76d97bd4fdb833c7d0461e759fd1225f580cb4be6200294d4"}, + {file = "makefun-1.15.1.tar.gz", hash = "sha256:40b0f118b6ded0d8d78c78f1eb679b8b6b2462e3c1b3e05fb1b2da8cd46b48a5"}, +] [[package]] name = "mako" version = "1.2.4" description = "A super-fast templating language that borrows the best ideas from the existing templating languages." -category = "main" optional = false python-versions = ">=3.7" +files = [ + {file = "Mako-1.2.4-py3-none-any.whl", hash = "sha256:c97c79c018b9165ac9922ae4f32da095ffd3c4e6872b45eded42926deea46818"}, + {file = "Mako-1.2.4.tar.gz", hash = "sha256:d60a3903dc3bb01a18ad6a89cdbe2e4eadc69c0bc8ef1e3773ba53d44c3f7a34"}, +] [package.dependencies] MarkupSafe = ">=0.9.2" @@ -3016,9 +4533,12 @@ testing = ["pytest"] name = "markdown" version = "3.4.4" description = "Python implementation of John Gruber's Markdown." -category = "dev" optional = false python-versions = ">=3.7" +files = [ + {file = "Markdown-3.4.4-py3-none-any.whl", hash = "sha256:a4c1b65c0957b4bd9e7d86ddc7b3c9868fb9670660f6f99f6d1bca8954d5a941"}, + {file = "Markdown-3.4.4.tar.gz", hash = "sha256:225c6123522495d4119a90b3a3ba31a1e87a70369e03f14799ea9c0d7183a3d6"}, +] [package.dependencies] importlib-metadata = {version = ">=4.4", markers = "python_version < \"3.10\""} @@ -3031,9 +4551,12 @@ testing = ["coverage", "pyyaml"] name = "markdown-it-py" version = "3.0.0" description = "Python port of markdown-it. Markdown parsing, done right!" -category = "dev" optional = false python-versions = ">=3.8" +files = [ + {file = "markdown-it-py-3.0.0.tar.gz", hash = "sha256:e3f60a94fa066dc52ec76661e37c851cb232d92f9886b15cb560aaada2df8feb"}, + {file = "markdown_it_py-3.0.0-py3-none-any.whl", hash = "sha256:355216845c60bd96232cd8d8c40e8f9765cc86f46880e43a8fd22dc1a1a8cab1"}, +] [package.dependencies] mdurl = ">=0.1,<1.0" @@ -3052,57 +4575,127 @@ testing = ["coverage", "pytest", "pytest-cov", "pytest-regressions"] name = "markupsafe" version = "2.1.3" description = "Safely add untrusted strings to HTML/XML markup." -category = "main" optional = false python-versions = ">=3.7" - -[[package]] -name = "marshmallow" -version = "3.20.1" -description = "A lightweight library for converting complex datatypes to and from native Python datatypes." -category = "dev" -optional = false -python-versions = ">=3.8" - -[package.dependencies] -packaging = ">=17.0" - -[package.extras] -dev = ["flake8 (==6.0.0)", "flake8-bugbear (==23.7.10)", "mypy (==1.4.1)", "pre-commit (>=2.4,<4.0)", "pytest", "pytz", "simplejson", "tox"] -docs = ["alabaster (==0.7.13)", "autodocsumm (==0.2.11)", "sphinx (==7.0.1)", "sphinx-issues (==3.0.1)", "sphinx-version-warning (==1.1.2)"] -lint = ["flake8 (==6.0.0)", "flake8-bugbear (==23.7.10)", "mypy (==1.4.1)", "pre-commit (>=2.4,<4.0)"] -tests = ["pytest", "pytz", "simplejson"] - -[[package]] -name = "marshmallow-oneofschema" -version = "3.0.1" -description = "marshmallow multiplexing schema" -category = "dev" -optional = false -python-versions = ">=3.6" - -[package.dependencies] -marshmallow = ">=3.0.0,<4.0.0" - -[package.extras] -dev = ["flake8 (==3.9.2)", "flake8-bugbear (==21.4.3)", "mock", "pre-commit (>=2.7,<3.0)", "pytest", "tox"] -lint = ["flake8 (==3.9.2)", "flake8-bugbear (==21.4.3)", "pre-commit (>=2.7,<3.0)"] -tests = ["mock", "pytest"] - -[[package]] -name = "marshmallow-sqlalchemy" -version = "0.26.1" -description = "SQLAlchemy integration with the marshmallow (de)serialization library" -category = "dev" -optional = false -python-versions = ">=3.6" - -[package.dependencies] -marshmallow = ">=3.0.0" -SQLAlchemy = ">=1.2.0" - -[package.extras] -dev = ["flake8 (==3.9.2)", "flake8-bugbear (==21.4.3)", "pre-commit (>=2.0,<3.0)", "pytest", "pytest-lazy-fixture", "tox"] +files = [ + {file = "MarkupSafe-2.1.3-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:cd0f502fe016460680cd20aaa5a76d241d6f35a1c3350c474bac1273803893fa"}, + {file = "MarkupSafe-2.1.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:e09031c87a1e51556fdcb46e5bd4f59dfb743061cf93c4d6831bf894f125eb57"}, + {file = "MarkupSafe-2.1.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:68e78619a61ecf91e76aa3e6e8e33fc4894a2bebe93410754bd28fce0a8a4f9f"}, + {file = "MarkupSafe-2.1.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:65c1a9bcdadc6c28eecee2c119465aebff8f7a584dd719facdd9e825ec61ab52"}, + {file = "MarkupSafe-2.1.3-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:525808b8019e36eb524b8c68acdd63a37e75714eac50e988180b169d64480a00"}, + {file = "MarkupSafe-2.1.3-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:962f82a3086483f5e5f64dbad880d31038b698494799b097bc59c2edf392fce6"}, + {file = "MarkupSafe-2.1.3-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:aa7bd130efab1c280bed0f45501b7c8795f9fdbeb02e965371bbef3523627779"}, + {file = "MarkupSafe-2.1.3-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:c9c804664ebe8f83a211cace637506669e7890fec1b4195b505c214e50dd4eb7"}, + {file = "MarkupSafe-2.1.3-cp310-cp310-win32.whl", hash = "sha256:10bbfe99883db80bdbaff2dcf681dfc6533a614f700da1287707e8a5d78a8431"}, + {file = "MarkupSafe-2.1.3-cp310-cp310-win_amd64.whl", hash = "sha256:1577735524cdad32f9f694208aa75e422adba74f1baee7551620e43a3141f559"}, + {file = "MarkupSafe-2.1.3-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:ad9e82fb8f09ade1c3e1b996a6337afac2b8b9e365f926f5a61aacc71adc5b3c"}, + {file = "MarkupSafe-2.1.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:3c0fae6c3be832a0a0473ac912810b2877c8cb9d76ca48de1ed31e1c68386575"}, + {file = "MarkupSafe-2.1.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b076b6226fb84157e3f7c971a47ff3a679d837cf338547532ab866c57930dbee"}, + {file = "MarkupSafe-2.1.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bfce63a9e7834b12b87c64d6b155fdd9b3b96191b6bd334bf37db7ff1fe457f2"}, + {file = "MarkupSafe-2.1.3-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:338ae27d6b8745585f87218a3f23f1512dbf52c26c28e322dbe54bcede54ccb9"}, + {file = "MarkupSafe-2.1.3-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:e4dd52d80b8c83fdce44e12478ad2e85c64ea965e75d66dbeafb0a3e77308fcc"}, + {file = "MarkupSafe-2.1.3-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:df0be2b576a7abbf737b1575f048c23fb1d769f267ec4358296f31c2479db8f9"}, + {file = "MarkupSafe-2.1.3-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:5bbe06f8eeafd38e5d0a4894ffec89378b6c6a625ff57e3028921f8ff59318ac"}, + {file = "MarkupSafe-2.1.3-cp311-cp311-win32.whl", hash = "sha256:dd15ff04ffd7e05ffcb7fe79f1b98041b8ea30ae9234aed2a9168b5797c3effb"}, + {file = "MarkupSafe-2.1.3-cp311-cp311-win_amd64.whl", hash = "sha256:134da1eca9ec0ae528110ccc9e48041e0828d79f24121a1a146161103c76e686"}, + {file = "MarkupSafe-2.1.3-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:f698de3fd0c4e6972b92290a45bd9b1536bffe8c6759c62471efaa8acb4c37bc"}, + {file = "MarkupSafe-2.1.3-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:aa57bd9cf8ae831a362185ee444e15a93ecb2e344c8e52e4d721ea3ab6ef1823"}, + {file = "MarkupSafe-2.1.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ffcc3f7c66b5f5b7931a5aa68fc9cecc51e685ef90282f4a82f0f5e9b704ad11"}, + {file = "MarkupSafe-2.1.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:47d4f1c5f80fc62fdd7777d0d40a2e9dda0a05883ab11374334f6c4de38adffd"}, + {file = "MarkupSafe-2.1.3-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1f67c7038d560d92149c060157d623c542173016c4babc0c1913cca0564b9939"}, + {file = "MarkupSafe-2.1.3-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:9aad3c1755095ce347e26488214ef77e0485a3c34a50c5a5e2471dff60b9dd9c"}, + {file = "MarkupSafe-2.1.3-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:14ff806850827afd6b07a5f32bd917fb7f45b046ba40c57abdb636674a8b559c"}, + {file = "MarkupSafe-2.1.3-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8f9293864fe09b8149f0cc42ce56e3f0e54de883a9de90cd427f191c346eb2e1"}, + {file = "MarkupSafe-2.1.3-cp312-cp312-win32.whl", hash = "sha256:715d3562f79d540f251b99ebd6d8baa547118974341db04f5ad06d5ea3eb8007"}, + {file = "MarkupSafe-2.1.3-cp312-cp312-win_amd64.whl", hash = "sha256:1b8dd8c3fd14349433c79fa8abeb573a55fc0fdd769133baac1f5e07abf54aeb"}, + {file = "MarkupSafe-2.1.3-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:8e254ae696c88d98da6555f5ace2279cf7cd5b3f52be2b5cf97feafe883b58d2"}, + {file = "MarkupSafe-2.1.3-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cb0932dc158471523c9637e807d9bfb93e06a95cbf010f1a38b98623b929ef2b"}, + {file = "MarkupSafe-2.1.3-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9402b03f1a1b4dc4c19845e5c749e3ab82d5078d16a2a4c2cd2df62d57bb0707"}, + {file = "MarkupSafe-2.1.3-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ca379055a47383d02a5400cb0d110cef0a776fc644cda797db0c5696cfd7e18e"}, + {file = "MarkupSafe-2.1.3-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:b7ff0f54cb4ff66dd38bebd335a38e2c22c41a8ee45aa608efc890ac3e3931bc"}, + {file = "MarkupSafe-2.1.3-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:c011a4149cfbcf9f03994ec2edffcb8b1dc2d2aede7ca243746df97a5d41ce48"}, + {file = "MarkupSafe-2.1.3-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:56d9f2ecac662ca1611d183feb03a3fa4406469dafe241673d521dd5ae92a155"}, + {file = "MarkupSafe-2.1.3-cp37-cp37m-win32.whl", hash = "sha256:8758846a7e80910096950b67071243da3e5a20ed2546e6392603c096778d48e0"}, + {file = "MarkupSafe-2.1.3-cp37-cp37m-win_amd64.whl", hash = "sha256:787003c0ddb00500e49a10f2844fac87aa6ce977b90b0feaaf9de23c22508b24"}, + {file = "MarkupSafe-2.1.3-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:2ef12179d3a291be237280175b542c07a36e7f60718296278d8593d21ca937d4"}, + {file = "MarkupSafe-2.1.3-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:2c1b19b3aaacc6e57b7e25710ff571c24d6c3613a45e905b1fde04d691b98ee0"}, + {file = "MarkupSafe-2.1.3-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8afafd99945ead6e075b973fefa56379c5b5c53fd8937dad92c662da5d8fd5ee"}, + {file = "MarkupSafe-2.1.3-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8c41976a29d078bb235fea9b2ecd3da465df42a562910f9022f1a03107bd02be"}, + {file = "MarkupSafe-2.1.3-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d080e0a5eb2529460b30190fcfcc4199bd7f827663f858a226a81bc27beaa97e"}, + {file = "MarkupSafe-2.1.3-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:69c0f17e9f5a7afdf2cc9fb2d1ce6aabdb3bafb7f38017c0b77862bcec2bbad8"}, + {file = "MarkupSafe-2.1.3-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:504b320cd4b7eff6f968eddf81127112db685e81f7e36e75f9f84f0df46041c3"}, + {file = "MarkupSafe-2.1.3-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:42de32b22b6b804f42c5d98be4f7e5e977ecdd9ee9b660fda1a3edf03b11792d"}, + {file = "MarkupSafe-2.1.3-cp38-cp38-win32.whl", hash = "sha256:ceb01949af7121f9fc39f7d27f91be8546f3fb112c608bc4029aef0bab86a2a5"}, + {file = "MarkupSafe-2.1.3-cp38-cp38-win_amd64.whl", hash = "sha256:1b40069d487e7edb2676d3fbdb2b0829ffa2cd63a2ec26c4938b2d34391b4ecc"}, + {file = "MarkupSafe-2.1.3-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:8023faf4e01efadfa183e863fefde0046de576c6f14659e8782065bcece22198"}, + {file = "MarkupSafe-2.1.3-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:6b2b56950d93e41f33b4223ead100ea0fe11f8e6ee5f641eb753ce4b77a7042b"}, + {file = "MarkupSafe-2.1.3-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9dcdfd0eaf283af041973bff14a2e143b8bd64e069f4c383416ecd79a81aab58"}, + {file = "MarkupSafe-2.1.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:05fb21170423db021895e1ea1e1f3ab3adb85d1c2333cbc2310f2a26bc77272e"}, + {file = "MarkupSafe-2.1.3-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:282c2cb35b5b673bbcadb33a585408104df04f14b2d9b01d4c345a3b92861c2c"}, + {file = "MarkupSafe-2.1.3-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:ab4a0df41e7c16a1392727727e7998a467472d0ad65f3ad5e6e765015df08636"}, + {file = "MarkupSafe-2.1.3-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:7ef3cb2ebbf91e330e3bb937efada0edd9003683db6b57bb108c4001f37a02ea"}, + {file = "MarkupSafe-2.1.3-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:0a4e4a1aff6c7ac4cd55792abf96c915634c2b97e3cc1c7129578aa68ebd754e"}, + {file = "MarkupSafe-2.1.3-cp39-cp39-win32.whl", hash = "sha256:fec21693218efe39aa7f8599346e90c705afa52c5b31ae019b2e57e8f6542bb2"}, + {file = "MarkupSafe-2.1.3-cp39-cp39-win_amd64.whl", hash = "sha256:3fd4abcb888d15a94f32b75d8fd18ee162ca0c064f35b11134be77050296d6ba"}, + {file = "MarkupSafe-2.1.3.tar.gz", hash = "sha256:af598ed32d6ae86f1b747b82783958b1a4ab8f617b06fe68795c7f026abbdcad"}, +] + +[[package]] +name = "marshmallow" +version = "3.20.1" +description = "A lightweight library for converting complex datatypes to and from native Python datatypes." +optional = false +python-versions = ">=3.8" +files = [ + {file = "marshmallow-3.20.1-py3-none-any.whl", hash = "sha256:684939db93e80ad3561392f47be0230743131560a41c5110684c16e21ade0a5c"}, + {file = "marshmallow-3.20.1.tar.gz", hash = "sha256:5d2371bbe42000f2b3fb5eaa065224df7d8f8597bc19a1bbfa5bfe7fba8da889"}, +] + +[package.dependencies] +packaging = ">=17.0" + +[package.extras] +dev = ["flake8 (==6.0.0)", "flake8-bugbear (==23.7.10)", "mypy (==1.4.1)", "pre-commit (>=2.4,<4.0)", "pytest", "pytz", "simplejson", "tox"] +docs = ["alabaster (==0.7.13)", "autodocsumm (==0.2.11)", "sphinx (==7.0.1)", "sphinx-issues (==3.0.1)", "sphinx-version-warning (==1.1.2)"] +lint = ["flake8 (==6.0.0)", "flake8-bugbear (==23.7.10)", "mypy (==1.4.1)", "pre-commit (>=2.4,<4.0)"] +tests = ["pytest", "pytz", "simplejson"] + +[[package]] +name = "marshmallow-oneofschema" +version = "3.0.1" +description = "marshmallow multiplexing schema" +optional = false +python-versions = ">=3.6" +files = [ + {file = "marshmallow-oneofschema-3.0.1.tar.gz", hash = "sha256:62cd2099b29188c92493c2940ee79d1bf2f2619a71721664e5a98ec2faa58237"}, + {file = "marshmallow_oneofschema-3.0.1-py2.py3-none-any.whl", hash = "sha256:bd29410a9f2f7457a2b428286e2a80ef76b8ddc3701527dc1f935a88914b02f2"}, +] + +[package.dependencies] +marshmallow = ">=3.0.0,<4.0.0" + +[package.extras] +dev = ["flake8 (==3.9.2)", "flake8-bugbear (==21.4.3)", "mock", "pre-commit (>=2.7,<3.0)", "pytest", "tox"] +lint = ["flake8 (==3.9.2)", "flake8-bugbear (==21.4.3)", "pre-commit (>=2.7,<3.0)"] +tests = ["mock", "pytest"] + +[[package]] +name = "marshmallow-sqlalchemy" +version = "0.26.1" +description = "SQLAlchemy integration with the marshmallow (de)serialization library" +optional = false +python-versions = ">=3.6" +files = [ + {file = "marshmallow-sqlalchemy-0.26.1.tar.gz", hash = "sha256:d8525f74de51554b5c8491effe036f60629a426229befa33ff614c8569a16a73"}, + {file = "marshmallow_sqlalchemy-0.26.1-py2.py3-none-any.whl", hash = "sha256:ba7493eeb8669a3bf00d8f906b657feaa87a740ae9e4ecf829cfd6ddf763d276"}, +] + +[package.dependencies] +marshmallow = ">=3.0.0" +SQLAlchemy = ">=1.2.0" + +[package.extras] +dev = ["flake8 (==3.9.2)", "flake8-bugbear (==21.4.3)", "pre-commit (>=2.0,<3.0)", "pytest", "pytest-lazy-fixture", "tox"] docs = ["alabaster (==0.7.12)", "sphinx (==4.0.2)", "sphinx-issues (==1.2.0)"] lint = ["flake8 (==3.9.2)", "flake8-bugbear (==21.4.3)", "pre-commit (>=2.0,<3.0)"] tests = ["pytest", "pytest-lazy-fixture"] @@ -3111,9 +4704,12 @@ tests = ["pytest", "pytest-lazy-fixture"] name = "mashumaro" version = "3.11" description = "Fast and well tested serialization library" -category = "main" optional = false python-versions = ">=3.8" +files = [ + {file = "mashumaro-3.11-py3-none-any.whl", hash = "sha256:8f858bdb33790db6d9f3087dce793a26d109aeae38bed3ca9c2d7f16f19db412"}, + {file = "mashumaro-3.11.tar.gz", hash = "sha256:b0b2443be4bdad29bb209d91fe4a2a918fbd7b63cccfeb457c7eeb567db02f5e"}, +] [package.dependencies] msgpack = {version = ">=0.5.6", optional = true, markers = "extra == \"msgpack\""} @@ -3129,17 +4725,23 @@ yaml = ["pyyaml (>=3.13)"] name = "mccabe" version = "0.7.0" description = "McCabe checker, plugin for flake8" -category = "dev" optional = false python-versions = ">=3.6" +files = [ + {file = "mccabe-0.7.0-py2.py3-none-any.whl", hash = "sha256:6c2d30ab6be0e4a46919781807b4f0d834ebdd6c6e3dca0bda5a15f863427b6e"}, + {file = "mccabe-0.7.0.tar.gz", hash = "sha256:348e0240c33b60bbdf4e523192ef919f28cb2c3d7d5c7794f74009290f236325"}, +] [[package]] name = "mdit-py-plugins" version = "0.4.0" description = "Collection of plugins for markdown-it-py" -category = "dev" optional = false python-versions = ">=3.8" +files = [ + {file = "mdit_py_plugins-0.4.0-py3-none-any.whl", hash = "sha256:b51b3bb70691f57f974e257e367107857a93b36f322a9e6d44ca5bf28ec2def9"}, + {file = "mdit_py_plugins-0.4.0.tar.gz", hash = "sha256:d8ab27e9aed6c38aa716819fedfde15ca275715955f8a185a8e1cf90fb1d2c1b"}, +] [package.dependencies] markdown-it-py = ">=1.0.0,<4.0.0" @@ -3153,17 +4755,22 @@ testing = ["coverage", "pytest", "pytest-cov", "pytest-regressions"] name = "mdurl" version = "0.1.2" description = "Markdown URL utilities" -category = "dev" optional = false python-versions = ">=3.7" +files = [ + {file = "mdurl-0.1.2-py3-none-any.whl", hash = "sha256:84008a41e51615a49fc9966191ff91509e3c40b939176e643fd50a5c2196b8f8"}, + {file = "mdurl-0.1.2.tar.gz", hash = "sha256:bb413d29f5eea38f31dd4754dd7377d4465116fb207585f97bf925588687c1ba"}, +] [[package]] name = "minimal-snowplow-tracker" version = "0.0.2" description = "A minimal snowplow event tracker for Python. Add analytics to your Python and Django apps, webapps and games" -category = "main" optional = false python-versions = "*" +files = [ + {file = "minimal-snowplow-tracker-0.0.2.tar.gz", hash = "sha256:acabf7572db0e7f5cbf6983d495eef54081f71be392330eb3aadb9ccb39daaa4"}, +] [package.dependencies] requests = ">=2.2.1,<3.0" @@ -3173,9 +4780,74 @@ six = ">=1.9.0,<2.0" name = "mmh3" version = "4.0.1" description = "Python extension for MurmurHash (MurmurHash3), a set of fast and robust hash functions." -category = "main" optional = true python-versions = "*" +files = [ + {file = "mmh3-4.0.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:b719ba87232749095011d567a36a25e40ed029fc61c47e74a12416d8bb60b311"}, + {file = "mmh3-4.0.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:f0ad423711c5096cf4a346011f3b3ec763208e4f4cc4b10ed41cad2a03dbfaed"}, + {file = "mmh3-4.0.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:80918e3f8ab6b717af0a388c14ffac5a89c15d827ff008c1ef545b8b32724116"}, + {file = "mmh3-4.0.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8222cd5f147defa1355b4042d590c34cef9b2bb173a159fcb72cda204061a4ac"}, + {file = "mmh3-4.0.1-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3821bcd1961ef19247c78c5d01b5a759de82ab0c023e2ff1d5ceed74322fa018"}, + {file = "mmh3-4.0.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:59f7ed28c24249a54665f1ed3f6c7c1c56618473381080f79bcc0bd1d1db2e4a"}, + {file = "mmh3-4.0.1-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:dacd8d07d4b9be8f0cb6e8fd9a08fc237c18578cf8d42370ee8af2f5a2bf1967"}, + {file = "mmh3-4.0.1-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5cd00883ef6bcf7831026ce42e773a4b2a4f3a7bf9003a4e781fecb1144b06c1"}, + {file = "mmh3-4.0.1-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:df73d1c7f0c50c0f8061cd349968fd9dcc6a9e7592d1c834fa898f9c98f8dd7e"}, + {file = "mmh3-4.0.1-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:f41eeae98f15af0a4ba2a92bce11d8505b612012af664a7634bbfdba7096f5fc"}, + {file = "mmh3-4.0.1-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:ce9bb622e9f1162cafd033071b32ac495c5e8d5863fca2a5144c092a0f129a5b"}, + {file = "mmh3-4.0.1-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:dd92e0ff9edee6af960d9862a3e519d651e6344321fd280fb082654fc96ecc4d"}, + {file = "mmh3-4.0.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:1aefa8ac8c8fc8ad93365477baef2125dbfd7235880a9c47dca2c46a0af49ef7"}, + {file = "mmh3-4.0.1-cp310-cp310-win32.whl", hash = "sha256:a076ea30ec279a63f44f4c203e4547b5710d00581165fed12583d2017139468d"}, + {file = "mmh3-4.0.1-cp310-cp310-win_amd64.whl", hash = "sha256:5aa1e87e448ee1ffa3737b72f2fe3f5960159ab75bbac2f49dca6fb9797132f6"}, + {file = "mmh3-4.0.1-cp310-cp310-win_arm64.whl", hash = "sha256:45155ff2f291c3a1503d1c93e539ab025a13fd8b3f2868650140702b8bd7bfc2"}, + {file = "mmh3-4.0.1-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:91f81d6dd4d0c3b4235b4a58a545493c946669c751a2e0f15084171dc2d81fee"}, + {file = "mmh3-4.0.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:bbfddaf55207798f5b29341e5b3a24dbff91711c51b1665eabc9d910255a78f0"}, + {file = "mmh3-4.0.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:0deb8e19121c0896fdc709209aceda30a367cda47f4a884fcbe56223dbf9e867"}, + {file = "mmh3-4.0.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:df468ac7b61ec7251d7499e27102899ca39d87686f659baf47f84323f8f4541f"}, + {file = "mmh3-4.0.1-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:84936c113814c6ef3bc4bd3d54f538d7ba312d1d0c2441ac35fdd7d5221c60f6"}, + {file = "mmh3-4.0.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:8b1df3cf5ce5786aa093f45462118d87ff485f0d69699cdc34f6289b1e833632"}, + {file = "mmh3-4.0.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:da281aa740aa9e7f9bebb879c1de0ea9366687ece5930f9f5027e7c87d018153"}, + {file = "mmh3-4.0.1-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0ec380933a56eb9fea16d7fcd49f1b5a5c92d7d2b86f25e9a845b72758ee8c42"}, + {file = "mmh3-4.0.1-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:2fa905fcec8a30e1c0ef522afae1d6170c4f08e6a88010a582f67c59209fb7c7"}, + {file = "mmh3-4.0.1-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:9b23a06315a65ef0b78da0be32409cfce0d6d83e51d70dcebd3302a61e4d34ce"}, + {file = "mmh3-4.0.1-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:36c27089b12026db14be594d750f7ea6d5d785713b40a971b063f033f5354a74"}, + {file = "mmh3-4.0.1-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:6338341ae6fa5eaa46f69ed9ac3e34e8eecad187b211a6e552e0d8128c568eb1"}, + {file = "mmh3-4.0.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:1aece29e27d0c8fb489d00bb712fba18b4dd10e39c9aec2e216c779ae6400b8f"}, + {file = "mmh3-4.0.1-cp311-cp311-win32.whl", hash = "sha256:2733e2160c142eed359e25e5529915964a693f0d043165b53933f904a731c1b3"}, + {file = "mmh3-4.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:09f9f643e0b7f8d98473efdfcdb155105824a38a1ada374625b84c1208197a9b"}, + {file = "mmh3-4.0.1-cp311-cp311-win_arm64.whl", hash = "sha256:d93422f38bc9c4d808c5438a011b769935a87df92ce277e9e22b6ec0ae8ed2e2"}, + {file = "mmh3-4.0.1-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:41013c033dc446d3bfb573621b8b53223adcfcf07be1da0bcbe166d930276882"}, + {file = "mmh3-4.0.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:be46540eac024dd8d9b82899d35b2f23592d3d3850845aba6f10e6127d93246b"}, + {file = "mmh3-4.0.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:0e64114b30c6c1e30f8201433b5fa6108a74a5d6f1a14af1b041360c0dd056aa"}, + {file = "mmh3-4.0.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:275637ecca755565e3b0505d3ecf8e1e0a51eb6a3cbe6e212ed40943f92f98cd"}, + {file = "mmh3-4.0.1-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:955178c8e8d3bc9ad18eab443af670cd13fe18a6b2dba16db2a2a0632be8a133"}, + {file = "mmh3-4.0.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:750afe0477e0c17904611045ad311ff10bc6c2ec5f5ddc5dd949a2b9bf71d5d5"}, + {file = "mmh3-4.0.1-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0b7c18c35e9d6a59d6c5f94a6576f800ff2b500e41cd152ecfc7bb4330f32ba2"}, + {file = "mmh3-4.0.1-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5b8635b1fc6b25d93458472c5d682a1a4b9e6c53e7f4ca75d2bf2a18fa9363ae"}, + {file = "mmh3-4.0.1-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:057b8de47adee8ad0f2e194ffa445b9845263c1c367ddb335e9ae19c011b25cc"}, + {file = "mmh3-4.0.1-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:78c0ee0197cfc912f57172aa16e784ad55b533e2e2e91b3a65188cc66fbb1b6e"}, + {file = "mmh3-4.0.1-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:d6acb15137467592691e41e6f897db1d2823ff3283111e316aa931ac0b5a5709"}, + {file = "mmh3-4.0.1-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:f91b2598e1f25e013da070ff641a29ebda76292d3a7bdd20ef1736e9baf0de67"}, + {file = "mmh3-4.0.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:a78f6f2592395321e2f0dc6b618773398b2c9b15becb419364e0960df53e9f04"}, + {file = "mmh3-4.0.1-cp38-cp38-win32.whl", hash = "sha256:d8650982d0b70af24700bd32b15fab33bb3ef9be4af411100f4960a938b0dd0f"}, + {file = "mmh3-4.0.1-cp38-cp38-win_amd64.whl", hash = "sha256:2489949c7261870a02eeaa2ec7b966881c1775df847c8ce6ea4de3e9d96b5f4f"}, + {file = "mmh3-4.0.1-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:dcd03a4bb0fa3db03648d26fb221768862f089b6aec5272f0df782a8b4fe5b5b"}, + {file = "mmh3-4.0.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:3775fb0cc675977e5b506b12b8f23cd220be3d4c2d4db7df81f03c9f61baa4cc"}, + {file = "mmh3-4.0.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:8f250f78328d41cdf73d3ad9809359636f4fb7a846d7a6586e1a0f0d2f5f2590"}, + {file = "mmh3-4.0.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4161009c9077d5ebf8b472dbf0f41b9139b3d380e0bbe71bf9b503efb2965584"}, + {file = "mmh3-4.0.1-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:2cf986ebf530717fefeee8d0decbf3f359812caebba985e2c8885c0ce7c2ee4e"}, + {file = "mmh3-4.0.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3b55741ed51e928b1eec94a119e003fa3bc0139f4f9802e19bea3af03f7dd55a"}, + {file = "mmh3-4.0.1-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d8250375641b8c5ce5d56a00c6bb29f583516389b8bde0023181d5eba8aa4119"}, + {file = "mmh3-4.0.1-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:29373e802bc094ffd490e39047bac372ac893c0f411dac3223ef11775e34acd0"}, + {file = "mmh3-4.0.1-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:071ba41e56f5c385d13ee84b288ccaf46b70cd9e9a6d8cbcbe0964dee68c0019"}, + {file = "mmh3-4.0.1-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:909e0b88d2c6285481fa6895c2a0faf6384e1b0093f72791aa57d1e04f4adc65"}, + {file = "mmh3-4.0.1-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:51d356f4380f9d9c2a0612156c3d1e7359933991e84a19304440aa04fd723e68"}, + {file = "mmh3-4.0.1-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:c4b2549949efa63d8decb6572f7e75fad4f2375d52fafced674323239dd9812d"}, + {file = "mmh3-4.0.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:9bcc7b32a89c4e5c6fdef97d82e8087ba26a20c25b4aaf0723abd0b302525934"}, + {file = "mmh3-4.0.1-cp39-cp39-win32.whl", hash = "sha256:8edee21ae4f4337fb970810ef5a263e5d2212b85daca0d39daf995e13380e908"}, + {file = "mmh3-4.0.1-cp39-cp39-win_amd64.whl", hash = "sha256:8cbb6f90f08952fcc90dbf08f0310fdf4d61096c5cb7db8adf03e23f3b857ae5"}, + {file = "mmh3-4.0.1-cp39-cp39-win_arm64.whl", hash = "sha256:ce71856cbca9d7c74d084eeee1bc5b126ed197c1c9530a4fdb994d099b9bc4db"}, + {file = "mmh3-4.0.1.tar.gz", hash = "sha256:ad8be695dc4e44a79631748ba5562d803f0ac42d36a6b97a53aca84a70809385"}, +] [package.extras] test = ["mypy (>=1.0)", "pytest (>=7.0.0)"] @@ -3184,17 +4856,23 @@ test = ["mypy (>=1.0)", "pytest (>=7.0.0)"] name = "more-itertools" version = "10.1.0" description = "More routines for operating on iterables, beyond itertools" -category = "main" optional = false python-versions = ">=3.8" +files = [ + {file = "more-itertools-10.1.0.tar.gz", hash = "sha256:626c369fa0eb37bac0291bce8259b332fd59ac792fa5497b59837309cd5b114a"}, + {file = "more_itertools-10.1.0-py3-none-any.whl", hash = "sha256:64e0735fcfdc6f3464ea133afe8ea4483b1c5fe3a3d69852e6503b43a0b222e6"}, +] [[package]] name = "mpmath" version = "1.3.0" description = "Python library for arbitrary-precision floating-point arithmetic" -category = "main" optional = true python-versions = "*" +files = [ + {file = "mpmath-1.3.0-py3-none-any.whl", hash = "sha256:a0b2b9fe80bbcd81a6647ff13108738cfb482d481d826cc0e02f5b35e5c88d2c"}, + {file = "mpmath-1.3.0.tar.gz", hash = "sha256:7a28eb2a9774d00c7bc92411c19a89209d5da7c4c9a9e227be8330a23a25b91f"}, +] [package.extras] develop = ["codecov", "pycodestyle", "pytest (>=4.6)", "pytest-cov", "wheel"] @@ -3206,9 +4884,12 @@ tests = ["pytest (>=4.6)"] name = "msal" version = "1.23.0" description = "The Microsoft Authentication Library (MSAL) for Python library enables your app to access the Microsoft Cloud by supporting authentication of users with Microsoft Azure Active Directory accounts (AAD) and Microsoft Accounts (MSA) using industry standard OAuth2 and OpenID Connect." -category = "main" optional = true python-versions = "*" +files = [ + {file = "msal-1.23.0-py2.py3-none-any.whl", hash = "sha256:3342e0837a047007f9d479e814b559c3219767453d57920dc40a31986862048b"}, + {file = "msal-1.23.0.tar.gz", hash = "sha256:25c9a33acf84301f93d1fdbe9f1a9c60cd38af0d5fffdbfa378138fc7bc1e86b"}, +] [package.dependencies] cryptography = ">=0.6,<44" @@ -3222,9 +4903,12 @@ broker = ["pymsalruntime (>=0.13.2,<0.14)"] name = "msal-extensions" version = "1.0.0" description = "Microsoft Authentication Library extensions (MSAL EX) provides a persistence API that can save your data on disk, encrypted on Windows, macOS and Linux. Concurrent data access will be coordinated by a file lock mechanism." -category = "main" optional = true python-versions = "*" +files = [ + {file = "msal-extensions-1.0.0.tar.gz", hash = "sha256:c676aba56b0cce3783de1b5c5ecfe828db998167875126ca4b47dc6436451354"}, + {file = "msal_extensions-1.0.0-py2.py3-none-any.whl", hash = "sha256:91e3db9620b822d0ed2b4d1850056a0f133cba04455e62f11612e40f5502f2ee"}, +] [package.dependencies] msal = ">=0.4.1,<2.0.0" @@ -3237,3720 +4921,9 @@ portalocker = [ name = "msgpack" version = "1.0.5" description = "MessagePack serializer" -category = "main" optional = false python-versions = "*" - -[[package]] -name = "multidict" -version = "6.0.4" -description = "multidict implementation" -category = "main" -optional = false -python-versions = ">=3.7" - -[[package]] -name = "mypy" -version = "1.6.1" -description = "Optional static typing for Python" -category = "dev" -optional = false -python-versions = ">=3.8" - -[package.dependencies] -mypy-extensions = ">=1.0.0" -tomli = {version = ">=1.1.0", markers = "python_version < \"3.11\""} -typing-extensions = ">=4.1.0" - -[package.extras] -dmypy = ["psutil (>=4.0)"] -install-types = ["pip"] -reports = ["lxml"] - -[[package]] -name = "mypy-boto3-athena" -version = "1.28.36" -description = "Type annotations for boto3.Athena 1.28.36 service generated with mypy-boto3-builder 7.18.0" -category = "main" -optional = true -python-versions = ">=3.7" - -[package.dependencies] -typing-extensions = {version = ">=4.1.0", markers = "python_version < \"3.12\""} - -[[package]] -name = "mypy-boto3-glue" -version = "1.28.36" -description = "Type annotations for boto3.Glue 1.28.36 service generated with mypy-boto3-builder 7.18.0" -category = "main" -optional = true -python-versions = ">=3.7" - -[package.dependencies] -typing-extensions = {version = ">=4.1.0", markers = "python_version < \"3.12\""} - -[[package]] -name = "mypy-boto3-lakeformation" -version = "1.28.36" -description = "Type annotations for boto3.LakeFormation 1.28.36 service generated with mypy-boto3-builder 7.18.0" -category = "main" -optional = true -python-versions = ">=3.7" - -[package.dependencies] -typing-extensions = {version = ">=4.1.0", markers = "python_version < \"3.12\""} - -[[package]] -name = "mypy-boto3-sts" -version = "1.28.37" -description = "Type annotations for boto3.STS 1.28.37 service generated with mypy-boto3-builder 7.18.2" -category = "main" -optional = true -python-versions = ">=3.7" - -[package.dependencies] -typing-extensions = {version = ">=4.1.0", markers = "python_version < \"3.12\""} - -[[package]] -name = "mypy-extensions" -version = "1.0.0" -description = "Type system extensions for programs checked with the mypy type checker." -category = "dev" -optional = false -python-versions = ">=3.5" - -[[package]] -name = "natsort" -version = "8.4.0" -description = "Simple yet flexible natural sorting in Python." -category = "dev" -optional = false -python-versions = ">=3.7" - -[package.extras] -fast = ["fastnumbers (>=2.0.0)"] -icu = ["PyICU (>=1.0.0)"] - -[[package]] -name = "networkx" -version = "2.8.8" -description = "Python package for creating and manipulating graphs and networks" -category = "main" -optional = false -python-versions = ">=3.8" - -[package.extras] -default = ["matplotlib (>=3.4)", "numpy (>=1.19)", "pandas (>=1.3)", "scipy (>=1.8)"] -developer = ["mypy (>=0.982)", "pre-commit (>=2.20)"] -doc = ["nb2plots (>=0.6)", "numpydoc (>=1.5)", "pillow (>=9.2)", "pydata-sphinx-theme (>=0.11)", "sphinx (>=5.2)", "sphinx-gallery (>=0.11)", "texext (>=0.6.6)"] -extra = ["lxml (>=4.6)", "pydot (>=1.4.2)", "pygraphviz (>=1.9)", "sympy (>=1.10)"] -test = ["codecov (>=2.1)", "pytest (>=7.2)", "pytest-cov (>=4.0)"] - -[[package]] -name = "nr-date" -version = "2.1.0" -description = "" -category = "dev" -optional = false -python-versions = ">=3.6,<4.0" - -[[package]] -name = "nr-stream" -version = "1.1.5" -description = "" -category = "dev" -optional = false -python-versions = ">=3.6,<4.0" - -[[package]] -name = "nr-util" -version = "0.8.12" -description = "General purpose Python utility library." -category = "dev" -optional = false -python-versions = ">=3.7,<4.0" - -[package.dependencies] -deprecated = ">=1.2.0,<2.0.0" -typing-extensions = ">=3.0.0" - -[[package]] -name = "numpy" -version = "1.24.4" -description = "Fundamental package for array computing in Python" -category = "main" -optional = false -python-versions = ">=3.8" - -[[package]] -name = "numpy" -version = "1.26.1" -description = "Fundamental package for array computing in Python" -category = "main" -optional = false -python-versions = "<3.13,>=3.9" - -[[package]] -name = "oauthlib" -version = "3.2.2" -description = "A generic, spec-compliant, thorough implementation of the OAuth request-signing logic" -category = "main" -optional = false -python-versions = ">=3.6" - -[package.extras] -rsa = ["cryptography (>=3.0.0)"] -signals = ["blinker (>=1.4.0)"] -signedtoken = ["cryptography (>=3.0.0)", "pyjwt (>=2.0.0,<3)"] - -[[package]] -name = "onnx" -version = "1.15.0" -description = "Open Neural Network Exchange" -category = "main" -optional = true -python-versions = ">=3.8" - -[package.dependencies] -numpy = "*" -protobuf = ">=3.20.2" - -[package.extras] -reference = ["Pillow", "google-re2"] - -[[package]] -name = "onnxruntime" -version = "1.16.1" -description = "ONNX Runtime is a runtime accelerator for Machine Learning models" -category = "main" -optional = true -python-versions = "*" - -[package.dependencies] -coloredlogs = "*" -flatbuffers = "*" -numpy = ">=1.21.6" -packaging = "*" -protobuf = "*" -sympy = "*" - -[[package]] -name = "openpyxl" -version = "3.1.2" -description = "A Python library to read/write Excel 2010 xlsx/xlsm files" -category = "main" -optional = true -python-versions = ">=3.6" - -[package.dependencies] -et-xmlfile = "*" - -[[package]] -name = "opentelemetry-api" -version = "1.15.0" -description = "OpenTelemetry Python API" -category = "dev" -optional = false -python-versions = ">=3.7" - -[package.dependencies] -deprecated = ">=1.2.6" -setuptools = ">=16.0" - -[[package]] -name = "opentelemetry-exporter-otlp" -version = "1.15.0" -description = "OpenTelemetry Collector Exporters" -category = "dev" -optional = false -python-versions = ">=3.7" - -[package.dependencies] -opentelemetry-exporter-otlp-proto-grpc = "1.15.0" -opentelemetry-exporter-otlp-proto-http = "1.15.0" - -[[package]] -name = "opentelemetry-exporter-otlp-proto-grpc" -version = "1.15.0" -description = "OpenTelemetry Collector Protobuf over gRPC Exporter" -category = "dev" -optional = false -python-versions = ">=3.7" - -[package.dependencies] -backoff = {version = ">=1.10.0,<3.0.0", markers = "python_version >= \"3.7\""} -googleapis-common-protos = ">=1.52,<2.0" -grpcio = ">=1.0.0,<2.0.0" -opentelemetry-api = ">=1.12,<2.0" -opentelemetry-proto = "1.15.0" -opentelemetry-sdk = ">=1.12,<2.0" - -[package.extras] -test = ["pytest-grpc"] - -[[package]] -name = "opentelemetry-exporter-otlp-proto-http" -version = "1.15.0" -description = "OpenTelemetry Collector Protobuf over HTTP Exporter" -category = "dev" -optional = false -python-versions = ">=3.7" - -[package.dependencies] -backoff = {version = ">=1.10.0,<3.0.0", markers = "python_version >= \"3.7\""} -googleapis-common-protos = ">=1.52,<2.0" -opentelemetry-api = ">=1.12,<2.0" -opentelemetry-proto = "1.15.0" -opentelemetry-sdk = ">=1.12,<2.0" -requests = ">=2.7,<3.0" - -[package.extras] -test = ["responses (==0.22.0)"] - -[[package]] -name = "opentelemetry-proto" -version = "1.15.0" -description = "OpenTelemetry Python Proto" -category = "dev" -optional = false -python-versions = ">=3.7" - -[package.dependencies] -protobuf = ">=3.19,<5.0" - -[[package]] -name = "opentelemetry-sdk" -version = "1.15.0" -description = "OpenTelemetry Python SDK" -category = "dev" -optional = false -python-versions = ">=3.7" - -[package.dependencies] -opentelemetry-api = "1.15.0" -opentelemetry-semantic-conventions = "0.36b0" -setuptools = ">=16.0" -typing-extensions = ">=3.7.4" - -[[package]] -name = "opentelemetry-semantic-conventions" -version = "0.36b0" -description = "OpenTelemetry Semantic Conventions" -category = "dev" -optional = false -python-versions = ">=3.7" - -[[package]] -name = "ordered-set" -version = "4.1.0" -description = "An OrderedSet is a custom MutableSet that remembers its order, so that every" -category = "dev" -optional = false -python-versions = ">=3.7" - -[package.extras] -dev = ["black", "mypy", "pytest"] - -[[package]] -name = "orjson" -version = "3.9.5" -description = "Fast, correct Python JSON library supporting dataclasses, datetimes, and numpy" -category = "main" -optional = false -python-versions = ">=3.7" - -[[package]] -name = "packaging" -version = "23.1" -description = "Core utilities for Python packages" -category = "main" -optional = false -python-versions = ">=3.7" - -[[package]] -name = "pandas" -version = "2.0.3" -description = "Powerful data structures for data analysis, time series, and statistics" -category = "main" -optional = false -python-versions = ">=3.8" - -[package.dependencies] -numpy = [ - {version = ">=1.20.3", markers = "python_version < \"3.10\""}, - {version = ">=1.21.0", markers = "python_version >= \"3.10\""}, - {version = ">=1.23.2", markers = "python_version >= \"3.11\""}, -] -python-dateutil = ">=2.8.2" -pytz = ">=2020.1" -tzdata = ">=2022.1" - -[package.extras] -all = ["PyQt5 (>=5.15.1)", "SQLAlchemy (>=1.4.16)", "beautifulsoup4 (>=4.9.3)", "bottleneck (>=1.3.2)", "brotlipy (>=0.7.0)", "fastparquet (>=0.6.3)", "fsspec (>=2021.07.0)", "gcsfs (>=2021.07.0)", "html5lib (>=1.1)", "hypothesis (>=6.34.2)", "jinja2 (>=3.0.0)", "lxml (>=4.6.3)", "matplotlib (>=3.6.1)", "numba (>=0.53.1)", "numexpr (>=2.7.3)", "odfpy (>=1.4.1)", "openpyxl (>=3.0.7)", "pandas-gbq (>=0.15.0)", "psycopg2 (>=2.8.6)", "pyarrow (>=7.0.0)", "pymysql (>=1.0.2)", "pyreadstat (>=1.1.2)", "pytest (>=7.3.2)", "pytest-asyncio (>=0.17.0)", "pytest-xdist (>=2.2.0)", "python-snappy (>=0.6.0)", "pyxlsb (>=1.0.8)", "qtpy (>=2.2.0)", "s3fs (>=2021.08.0)", "scipy (>=1.7.1)", "tables (>=3.6.1)", "tabulate (>=0.8.9)", "xarray (>=0.21.0)", "xlrd (>=2.0.1)", "xlsxwriter (>=1.4.3)", "zstandard (>=0.15.2)"] -aws = ["s3fs (>=2021.08.0)"] -clipboard = ["PyQt5 (>=5.15.1)", "qtpy (>=2.2.0)"] -compression = ["brotlipy (>=0.7.0)", "python-snappy (>=0.6.0)", "zstandard (>=0.15.2)"] -computation = ["scipy (>=1.7.1)", "xarray (>=0.21.0)"] -excel = ["odfpy (>=1.4.1)", "openpyxl (>=3.0.7)", "pyxlsb (>=1.0.8)", "xlrd (>=2.0.1)", "xlsxwriter (>=1.4.3)"] -feather = ["pyarrow (>=7.0.0)"] -fss = ["fsspec (>=2021.07.0)"] -gcp = ["gcsfs (>=2021.07.0)", "pandas-gbq (>=0.15.0)"] -hdf5 = ["tables (>=3.6.1)"] -html = ["beautifulsoup4 (>=4.9.3)", "html5lib (>=1.1)", "lxml (>=4.6.3)"] -mysql = ["SQLAlchemy (>=1.4.16)", "pymysql (>=1.0.2)"] -output-formatting = ["jinja2 (>=3.0.0)", "tabulate (>=0.8.9)"] -parquet = ["pyarrow (>=7.0.0)"] -performance = ["bottleneck (>=1.3.2)", "numba (>=0.53.1)", "numexpr (>=2.7.1)"] -plot = ["matplotlib (>=3.6.1)"] -postgresql = ["SQLAlchemy (>=1.4.16)", "psycopg2 (>=2.8.6)"] -spss = ["pyreadstat (>=1.1.2)"] -sql-other = ["SQLAlchemy (>=1.4.16)"] -test = ["hypothesis (>=6.34.2)", "pytest (>=7.3.2)", "pytest-asyncio (>=0.17.0)", "pytest-xdist (>=2.2.0)"] -xml = ["lxml (>=4.6.3)"] - -[[package]] -name = "parsedatetime" -version = "2.4" -description = "Parse human-readable date/time text." -category = "main" -optional = false -python-versions = "*" - -[package.dependencies] -future = "*" - -[[package]] -name = "pathspec" -version = "0.11.2" -description = "Utility library for gitignore style pattern matching of file paths." -category = "main" -optional = false -python-versions = ">=3.7" - -[[package]] -name = "pathvalidate" -version = "3.1.0" -description = "pathvalidate is a Python library to sanitize/validate a string such as filenames/file-paths/etc." -category = "main" -optional = false -python-versions = ">=3.7" - -[package.extras] -docs = ["Sphinx (>=2.4)", "sphinx-rtd-theme (>=1.2.2)", "urllib3 (<2)"] -test = ["Faker (>=1.0.8)", "allpairspy (>=2)", "click (>=6.2)", "pytest (>=6.0.1)", "pytest-discord (>=0.1.2)", "pytest-md-report (>=0.3)"] - -[[package]] -name = "pbr" -version = "5.11.1" -description = "Python Build Reasonableness" -category = "dev" -optional = false -python-versions = ">=2.6" - -[[package]] -name = "pendulum" -version = "2.1.2" -description = "Python datetimes made easy" -category = "main" -optional = false -python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" - -[package.dependencies] -python-dateutil = ">=2.6,<3.0" -pytzdata = ">=2020.1" - -[[package]] -name = "pipdeptree" -version = "2.9.6" -description = "Command line utility to show dependency tree of packages." -category = "main" -optional = true -python-versions = ">=3.7" - -[package.extras] -graphviz = ["graphviz (>=0.20.1)"] -test = ["covdefaults (>=2.3)", "diff-cover (>=7.6)", "pip (>=23.1.2)", "pytest (>=7.4)", "pytest-cov (>=4.1)", "pytest-mock (>=3.11.1)", "virtualenv (>=20.23.1,<21)"] - -[[package]] -name = "pkgutil-resolve-name" -version = "1.3.10" -description = "Resolve a name to an object." -category = "main" -optional = false -python-versions = ">=3.6" - -[[package]] -name = "platformdirs" -version = "3.8.1" -description = "A small Python package for determining appropriate platform-specific dirs, e.g. a \"user data dir\"." -category = "main" -optional = false -python-versions = ">=3.7" - -[package.extras] -docs = ["furo (>=2023.5.20)", "proselint (>=0.13)", "sphinx (>=7.0.1)", "sphinx-autodoc-typehints (>=1.23,!=1.23.4)"] -test = ["appdirs (==1.4.4)", "covdefaults (>=2.3)", "pytest (>=7.3.1)", "pytest-cov (>=4.1)", "pytest-mock (>=3.10)"] - -[[package]] -name = "pluggy" -version = "1.3.0" -description = "plugin and hook calling mechanisms for python" -category = "dev" -optional = false -python-versions = ">=3.8" - -[package.extras] -dev = ["pre-commit", "tox"] -testing = ["pytest", "pytest-benchmark"] - -[[package]] -name = "ply" -version = "3.11" -description = "Python Lex & Yacc" -category = "main" -optional = false -python-versions = "*" - -[[package]] -name = "portalocker" -version = "2.7.0" -description = "Wraps the portalocker recipe for easy usage" -category = "main" -optional = true -python-versions = ">=3.5" - -[package.dependencies] -pywin32 = {version = ">=226", markers = "platform_system == \"Windows\""} - -[package.extras] -docs = ["sphinx (>=1.7.1)"] -redis = ["redis"] -tests = ["pytest (>=5.4.1)", "pytest-cov (>=2.8.1)", "pytest-mypy (>=0.8.0)", "pytest-timeout (>=2.1.0)", "redis", "sphinx (>=6.0.0)"] - -[[package]] -name = "prefixed" -version = "0.7.0" -description = "Prefixed alternative numeric library" -category = "dev" -optional = false -python-versions = "*" - -[[package]] -name = "prison" -version = "0.2.1" -description = "Rison encoder/decoder" -category = "dev" -optional = false -python-versions = "*" - -[package.dependencies] -six = "*" - -[package.extras] -dev = ["nose", "pipreqs", "twine"] - -[[package]] -name = "proto-plus" -version = "1.22.3" -description = "Beautiful, Pythonic protocol buffers." -category = "main" -optional = true -python-versions = ">=3.6" - -[package.dependencies] -protobuf = ">=3.19.0,<5.0.0dev" - -[package.extras] -testing = ["google-api-core[grpc] (>=1.31.5)"] - -[[package]] -name = "protobuf" -version = "4.24.2" -description = "" -category = "main" -optional = false -python-versions = ">=3.7" - -[[package]] -name = "psutil" -version = "5.9.5" -description = "Cross-platform lib for process and system monitoring in Python." -category = "dev" -optional = false -python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" - -[package.extras] -test = ["enum34", "ipaddress", "mock", "pywin32", "wmi"] - -[[package]] -name = "psycopg2-binary" -version = "2.9.7" -description = "psycopg2 - Python-PostgreSQL Database Adapter" -category = "main" -optional = true -python-versions = ">=3.6" - -[[package]] -name = "psycopg2cffi" -version = "2.9.0" -description = ".. image:: https://travis-ci.org/chtd/psycopg2cffi.svg?branch=master" -category = "main" -optional = true -python-versions = "*" - -[package.dependencies] -cffi = ">=1.0" -six = "*" - -[[package]] -name = "py" -version = "1.11.0" -description = "library with cross-python path, ini-parsing, io, code, log facilities" -category = "dev" -optional = false -python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" - -[[package]] -name = "pyarrow" -version = "14.0.1" -description = "Python library for Apache Arrow" -category = "main" -optional = true -python-versions = ">=3.8" - -[package.dependencies] -numpy = ">=1.16.6" - -[[package]] -name = "pyasn1" -version = "0.5.0" -description = "Pure-Python implementation of ASN.1 types and DER/BER/CER codecs (X.208)" -category = "main" -optional = false -python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,>=2.7" - -[[package]] -name = "pyasn1-modules" -version = "0.3.0" -description = "A collection of ASN.1-based protocols modules" -category = "main" -optional = false -python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,>=2.7" - -[package.dependencies] -pyasn1 = ">=0.4.6,<0.6.0" - -[[package]] -name = "pyathena" -version = "3.0.6" -description = "Python DB API 2.0 (PEP 249) client for Amazon Athena" -category = "main" -optional = true -python-versions = ">=3.8.1" - -[package.dependencies] -boto3 = ">=1.26.4" -botocore = ">=1.29.4" -fsspec = "*" -tenacity = ">=4.1.0" - -[package.extras] -arrow = ["pyarrow (>=7.0.0)"] -fastparquet = ["fastparquet (>=0.4.0)"] -pandas = ["pandas (>=1.3.0)"] -sqlalchemy = ["sqlalchemy (>=1.0.0)"] - -[[package]] -name = "pycodestyle" -version = "2.9.1" -description = "Python style guide checker" -category = "dev" -optional = false -python-versions = ">=3.6" - -[[package]] -name = "pycparser" -version = "2.21" -description = "C parser in Python" -category = "main" -optional = false -python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" - -[[package]] -name = "pydantic" -version = "2.5.0" -description = "Data validation using Python type hints" -category = "main" -optional = false -python-versions = ">=3.7" - -[package.dependencies] -annotated-types = ">=0.4.0" -pydantic-core = "2.14.1" -typing-extensions = ">=4.6.1" - -[package.extras] -email = ["email-validator (>=2.0.0)"] - -[[package]] -name = "pydantic-core" -version = "2.14.1" -description = "" -category = "main" -optional = false -python-versions = ">=3.7" - -[package.dependencies] -typing-extensions = ">=4.6.0,<4.7.0 || >4.7.0" - -[[package]] -name = "pydoc-markdown" -version = "4.8.2" -description = "Create Python API documentation in Markdown format." -category = "dev" -optional = false -python-versions = ">=3.7,<4.0" - -[package.dependencies] -click = ">=7.1,<9.0" -"databind.core" = ">=4.4.0,<5.0.0" -"databind.json" = ">=4.4.0,<5.0.0" -docspec = ">=2.2.1,<3.0.0" -docspec-python = ">=2.2.1,<3.0.0" -docstring-parser = ">=0.11,<0.12" -jinja2 = ">=3.0.0,<4.0.0" -"nr.util" = ">=0.7.5,<1.0.0" -PyYAML = ">=5.0,<7.0" -requests = ">=2.23.0,<3.0.0" -tomli = ">=2.0.0,<3.0.0" -tomli_w = ">=1.0.0,<2.0.0" -watchdog = "*" -yapf = ">=0.30.0" - -[[package]] -name = "pyflakes" -version = "2.5.0" -description = "passive checker of Python programs" -category = "dev" -optional = false -python-versions = ">=3.6" - -[[package]] -name = "pygments" -version = "2.16.1" -description = "Pygments is a syntax highlighting package written in Python." -category = "dev" -optional = false -python-versions = ">=3.7" - -[package.extras] -plugins = ["importlib-metadata"] - -[[package]] -name = "pyjwt" -version = "2.8.0" -description = "JSON Web Token implementation in Python" -category = "main" -optional = false -python-versions = ">=3.7" - -[package.dependencies] -cryptography = {version = ">=3.4.0", optional = true, markers = "extra == \"crypto\""} - -[package.extras] -crypto = ["cryptography (>=3.4.0)"] -dev = ["coverage[toml] (==5.0.4)", "cryptography (>=3.4.0)", "pre-commit", "pytest (>=6.0.0,<7.0.0)", "sphinx (>=4.5.0,<5.0.0)", "sphinx-rtd-theme", "zope.interface"] -docs = ["sphinx (>=4.5.0,<5.0.0)", "sphinx-rtd-theme", "zope.interface"] -tests = ["coverage[toml] (==5.0.4)", "pytest (>=6.0.0,<7.0.0)"] - -[[package]] -name = "pymongo" -version = "4.6.0" -description = "Python driver for MongoDB " -category = "dev" -optional = false -python-versions = ">=3.7" - -[package.dependencies] -dnspython = ">=1.16.0,<3.0.0" - -[package.extras] -aws = ["pymongo-auth-aws (<2.0.0)"] -encryption = ["certifi", "pymongo[aws]", "pymongocrypt (>=1.6.0,<2.0.0)"] -gssapi = ["pykerberos", "winkerberos (>=0.5.0)"] -ocsp = ["certifi", "cryptography (>=2.5)", "pyopenssl (>=17.2.0)", "requests (<3.0.0)", "service-identity (>=18.1.0)"] -snappy = ["python-snappy"] -test = ["pytest (>=7)"] -zstd = ["zstandard"] - -[[package]] -name = "pymysql" -version = "1.1.0" -description = "Pure Python MySQL Driver" -category = "dev" -optional = false -python-versions = ">=3.7" - -[package.extras] -ed25519 = ["PyNaCl (>=1.4.0)"] -rsa = ["cryptography"] - -[[package]] -name = "pyodbc" -version = "4.0.39" -description = "DB API Module for ODBC" -category = "main" -optional = true -python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*" - -[[package]] -name = "pyopenssl" -version = "23.2.0" -description = "Python wrapper module around the OpenSSL library" -category = "main" -optional = true -python-versions = ">=3.6" - -[package.dependencies] -cryptography = ">=38.0.0,<40.0.0 || >40.0.0,<40.0.1 || >40.0.1,<42" - -[package.extras] -docs = ["sphinx (!=5.2.0,!=5.2.0.post0)", "sphinx-rtd-theme"] -test = ["flaky", "pretend", "pytest (>=3.0.1)"] - -[[package]] -name = "pyparsing" -version = "3.1.1" -description = "pyparsing module - Classes and methods to define and execute parsing grammars" -category = "dev" -optional = false -python-versions = ">=3.6.8" - -[package.extras] -diagrams = ["jinja2", "railroad-diagrams"] - -[[package]] -name = "pypdf2" -version = "3.0.1" -description = "A pure-python PDF library capable of splitting, merging, cropping, and transforming PDF files" -category = "dev" -optional = false -python-versions = ">=3.6" - -[package.dependencies] -typing_extensions = {version = ">=3.10.0.0", markers = "python_version < \"3.10\""} - -[package.extras] -crypto = ["PyCryptodome"] -dev = ["black", "flit", "pip-tools", "pre-commit (<2.18.0)", "pytest-cov", "wheel"] -docs = ["myst_parser", "sphinx", "sphinx_rtd_theme"] -full = ["Pillow", "PyCryptodome"] -image = ["Pillow"] - -[[package]] -name = "pyreadline3" -version = "3.4.1" -description = "A python implementation of GNU readline." -category = "main" -optional = true -python-versions = "*" - -[[package]] -name = "pytest" -version = "7.4.4" -description = "pytest: simple powerful testing with Python" -category = "dev" -optional = false -python-versions = ">=3.7" - -[package.dependencies] -colorama = {version = "*", markers = "sys_platform == \"win32\""} -exceptiongroup = {version = ">=1.0.0rc8", markers = "python_version < \"3.11\""} -iniconfig = "*" -packaging = "*" -pluggy = ">=0.12,<2.0" -tomli = {version = ">=1.0.0", markers = "python_version < \"3.11\""} - -[package.extras] -testing = ["argcomplete", "attrs (>=19.2.0)", "hypothesis (>=3.56)", "mock", "nose", "pygments (>=2.7.2)", "requests", "setuptools", "xmlschema"] - -[[package]] -name = "pytest-asyncio" -version = "0.23.5" -description = "Pytest support for asyncio" -category = "dev" -optional = false -python-versions = ">=3.8" - -[package.dependencies] -pytest = ">=7.0.0,<9" - -[package.extras] -docs = ["sphinx (>=5.3)", "sphinx-rtd-theme (>=1.0)"] -testing = ["coverage (>=6.2)", "hypothesis (>=5.7.1)"] - -[[package]] -name = "pytest-cases" -version = "3.6.14" -description = "Separate test code from test cases in pytest." -category = "dev" -optional = false -python-versions = "*" - -[package.dependencies] -decopatch = "*" -makefun = ">=1.9.5" - -[[package]] -name = "pytest-console-scripts" -version = "1.4.1" -description = "Pytest plugin for testing console scripts" -category = "dev" -optional = false -python-versions = ">=3.8" - -[package.dependencies] -importlib-metadata = {version = ">=3.6", markers = "python_version < \"3.10\""} -pytest = ">=4.0.0" - -[[package]] -name = "pytest-forked" -version = "1.6.0" -description = "run tests in isolated forked subprocesses" -category = "dev" -optional = false -python-versions = ">=3.7" - -[package.dependencies] -py = "*" -pytest = ">=3.10" - -[[package]] -name = "pytest-order" -version = "1.1.0" -description = "pytest plugin to run your tests in a specific order" -category = "dev" -optional = false -python-versions = ">=3.6" - -[package.dependencies] -pytest = [ - {version = ">=5.0", markers = "python_version < \"3.10\""}, - {version = ">=6.2.4", markers = "python_version >= \"3.10\""}, -] - -[[package]] -name = "python-daemon" -version = "3.0.1" -description = "Library to implement a well-behaved Unix daemon process." -category = "dev" -optional = false -python-versions = ">=3" - -[package.dependencies] -docutils = "*" -lockfile = ">=0.10" -setuptools = ">=62.4.0" - -[package.extras] -devel = ["coverage", "docutils", "isort", "testscenarios (>=0.4)", "testtools", "twine"] -test = ["coverage", "docutils", "testscenarios (>=0.4)", "testtools"] - -[[package]] -name = "python-dateutil" -version = "2.8.2" -description = "Extensions to the standard Python datetime module" -category = "main" -optional = false -python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7" - -[package.dependencies] -six = ">=1.5" - -[[package]] -name = "python-nvd3" -version = "0.15.0" -description = "Python NVD3 - Chart Library for d3.js" -category = "dev" -optional = false -python-versions = "*" - -[package.dependencies] -Jinja2 = ">=2.8" -python-slugify = ">=1.2.5" - -[[package]] -name = "python-slugify" -version = "8.0.1" -description = "A Python slugify application that also handles Unicode" -category = "main" -optional = false -python-versions = ">=3.7" - -[package.dependencies] -text-unidecode = ">=1.3" - -[package.extras] -unidecode = ["Unidecode (>=1.1.1)"] - -[[package]] -name = "pytimeparse" -version = "1.1.8" -description = "Time expression parser" -category = "main" -optional = false -python-versions = "*" - -[[package]] -name = "pytz" -version = "2023.3" -description = "World timezone definitions, modern and historical" -category = "main" -optional = false -python-versions = "*" - -[[package]] -name = "pytzdata" -version = "2020.1" -description = "The Olson timezone database for Python." -category = "main" -optional = false -python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" - -[[package]] -name = "pywin32" -version = "306" -description = "Python for Window Extensions" -category = "main" -optional = true -python-versions = "*" - -[[package]] -name = "pywin32-ctypes" -version = "0.2.2" -description = "A (partial) reimplementation of pywin32 using ctypes/cffi" -category = "main" -optional = true -python-versions = ">=3.6" - -[[package]] -name = "pyyaml" -version = "6.0.1" -description = "YAML parser and emitter for Python" -category = "main" -optional = false -python-versions = ">=3.6" - -[[package]] -name = "qdrant-client" -version = "1.6.4" -description = "Client library for the Qdrant vector search engine" -category = "main" -optional = true -python-versions = ">=3.8,<3.13" - -[package.dependencies] -fastembed = {version = "0.1.1", optional = true, markers = "python_version < \"3.12\" and extra == \"fastembed\""} -grpcio = ">=1.41.0" -grpcio-tools = ">=1.41.0" -httpx = {version = ">=0.14.0", extras = ["http2"]} -numpy = [ - {version = ">=1.21", markers = "python_version >= \"3.8\" and python_version < \"3.12\""}, - {version = ">=1.26", markers = "python_version >= \"3.12\""}, -] -portalocker = ">=2.7.0,<3.0.0" -pydantic = ">=1.10.8" -urllib3 = ">=1.26.14,<2.0.0" - -[package.extras] -fastembed = ["fastembed (==0.1.1)"] - -[[package]] -name = "redshift-connector" -version = "2.0.915" -description = "Redshift interface library" -category = "main" -optional = true -python-versions = ">=3.6" - -[package.dependencies] -beautifulsoup4 = ">=4.7.0,<5.0.0" -boto3 = ">=1.9.201,<2.0.0" -botocore = ">=1.12.201,<2.0.0" -lxml = ">=4.6.5" -packaging = "*" -pytz = ">=2020.1" -requests = ">=2.23.0,<3.0.0" -scramp = ">=1.2.0,<1.5.0" -setuptools = "*" - -[package.extras] -full = ["numpy", "pandas"] - -[[package]] -name = "referencing" -version = "0.30.2" -description = "JSON Referencing + Python" -category = "main" -optional = false -python-versions = ">=3.8" - -[package.dependencies] -attrs = ">=22.2.0" -rpds-py = ">=0.7.0" - -[[package]] -name = "regex" -version = "2023.8.8" -description = "Alternative regular expression module, to replace re." -category = "dev" -optional = false -python-versions = ">=3.6" - -[[package]] -name = "requests" -version = "2.31.0" -description = "Python HTTP for Humans." -category = "main" -optional = false -python-versions = ">=3.7" - -[package.dependencies] -certifi = ">=2017.4.17" -charset-normalizer = ">=2,<4" -idna = ">=2.5,<4" -urllib3 = ">=1.21.1,<3" - -[package.extras] -socks = ["PySocks (>=1.5.6,!=1.5.7)"] -use-chardet-on-py3 = ["chardet (>=3.0.2,<6)"] - -[[package]] -name = "requests-mock" -version = "1.11.0" -description = "Mock out responses from the requests package" -category = "dev" -optional = false -python-versions = "*" - -[package.dependencies] -requests = ">=2.3,<3" -six = "*" - -[package.extras] -fixture = ["fixtures"] -test = ["fixtures", "mock", "purl", "pytest", "requests-futures", "sphinx", "testtools"] - -[[package]] -name = "requests-oauthlib" -version = "1.3.1" -description = "OAuthlib authentication support for Requests." -category = "main" -optional = false -python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" - -[package.dependencies] -oauthlib = ">=3.0.0" -requests = ">=2.0.0" - -[package.extras] -rsa = ["oauthlib[signedtoken] (>=3.0.0)"] - -[[package]] -name = "requests-toolbelt" -version = "1.0.0" -description = "A utility belt for advanced users of python-requests" -category = "dev" -optional = false -python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" - -[package.dependencies] -requests = ">=2.0.1,<3.0.0" - -[[package]] -name = "requirements-parser" -version = "0.5.0" -description = "This is a small Python module for parsing Pip requirement files." -category = "main" -optional = false -python-versions = ">=3.6,<4.0" - -[package.dependencies] -types-setuptools = ">=57.0.0" - -[[package]] -name = "rfc3339-validator" -version = "0.1.4" -description = "A pure python RFC3339 validator" -category = "dev" -optional = false -python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" - -[package.dependencies] -six = "*" - -[[package]] -name = "rich" -version = "13.5.2" -description = "Render rich text, tables, progress bars, syntax highlighting, markdown and more to the terminal" -category = "dev" -optional = false -python-versions = ">=3.7.0" - -[package.dependencies] -markdown-it-py = ">=2.2.0" -pygments = ">=2.13.0,<3.0.0" -typing-extensions = {version = ">=4.0.0,<5.0", markers = "python_version < \"3.9\""} - -[package.extras] -jupyter = ["ipywidgets (>=7.5.1,<9)"] - -[[package]] -name = "rich-argparse" -version = "1.3.0" -description = "Rich help formatters for argparse and optparse" -category = "dev" -optional = false -python-versions = ">=3.7" - -[package.dependencies] -rich = ">=11.0.0" - -[[package]] -name = "rpds-py" -version = "0.10.0" -description = "Python bindings to Rust's persistent data structures (rpds)" -category = "main" -optional = false -python-versions = ">=3.8" - -[[package]] -name = "rsa" -version = "4.9" -description = "Pure-Python RSA implementation" -category = "main" -optional = false -python-versions = ">=3.6,<4" - -[package.dependencies] -pyasn1 = ">=0.1.3" - -[[package]] -name = "s3fs" -version = "2023.6.0" -description = "Convenient Filesystem interface over S3" -category = "main" -optional = true -python-versions = ">= 3.8" - -[package.dependencies] -aiobotocore = ">=2.5.0,<2.6.0" -aiohttp = "<4.0.0a0 || >4.0.0a0,<4.0.0a1 || >4.0.0a1" -fsspec = "2023.6.0" - -[package.extras] -awscli = ["aiobotocore[awscli] (>=2.5.0,<2.6.0)"] -boto3 = ["aiobotocore[boto3] (>=2.5.0,<2.6.0)"] - -[[package]] -name = "s3transfer" -version = "0.6.2" -description = "An Amazon S3 Transfer Manager" -category = "main" -optional = true -python-versions = ">= 3.7" - -[package.dependencies] -botocore = ">=1.12.36,<2.0a.0" - -[package.extras] -crt = ["botocore[crt] (>=1.20.29,<2.0a.0)"] - -[[package]] -name = "scramp" -version = "1.4.4" -description = "An implementation of the SCRAM protocol." -category = "main" -optional = true -python-versions = ">=3.7" - -[package.dependencies] -asn1crypto = ">=1.5.1" - -[[package]] -name = "secretstorage" -version = "3.3.3" -description = "Python bindings to FreeDesktop.org Secret Service API" -category = "main" -optional = true -python-versions = ">=3.6" - -[package.dependencies] -cryptography = ">=2.0" -jeepney = ">=0.6" - -[[package]] -name = "semver" -version = "3.0.1" -description = "Python helper for Semantic Versioning (https://semver.org)" -category = "main" -optional = false -python-versions = ">=3.7" - -[[package]] -name = "sentry-sdk" -version = "1.30.0" -description = "Python client for Sentry (https://sentry.io)" -category = "dev" -optional = false -python-versions = "*" - -[package.dependencies] -certifi = "*" -urllib3 = {version = ">=1.26.11", markers = "python_version >= \"3.6\""} - -[package.extras] -aiohttp = ["aiohttp (>=3.5)"] -arq = ["arq (>=0.23)"] -beam = ["apache-beam (>=2.12)"] -bottle = ["bottle (>=0.12.13)"] -celery = ["celery (>=3)"] -chalice = ["chalice (>=1.16.0)"] -django = ["django (>=1.8)"] -falcon = ["falcon (>=1.4)"] -fastapi = ["fastapi (>=0.79.0)"] -flask = ["blinker (>=1.1)", "flask (>=0.11)", "markupsafe"] -grpcio = ["grpcio (>=1.21.1)"] -httpx = ["httpx (>=0.16.0)"] -huey = ["huey (>=2)"] -loguru = ["loguru (>=0.5)"] -opentelemetry = ["opentelemetry-distro (>=0.35b0)"] -opentelemetry-experimental = ["opentelemetry-distro (>=0.40b0,<1.0)", "opentelemetry-instrumentation-aiohttp-client (>=0.40b0,<1.0)", "opentelemetry-instrumentation-django (>=0.40b0,<1.0)", "opentelemetry-instrumentation-fastapi (>=0.40b0,<1.0)", "opentelemetry-instrumentation-flask (>=0.40b0,<1.0)", "opentelemetry-instrumentation-requests (>=0.40b0,<1.0)", "opentelemetry-instrumentation-sqlite3 (>=0.40b0,<1.0)", "opentelemetry-instrumentation-urllib (>=0.40b0,<1.0)"] -pure-eval = ["asttokens", "executing", "pure-eval"] -pymongo = ["pymongo (>=3.1)"] -pyspark = ["pyspark (>=2.4.4)"] -quart = ["blinker (>=1.1)", "quart (>=0.16.1)"] -rq = ["rq (>=0.6)"] -sanic = ["sanic (>=0.8)"] -sqlalchemy = ["sqlalchemy (>=1.2)"] -starlette = ["starlette (>=0.19.1)"] -starlite = ["starlite (>=1.48)"] -tornado = ["tornado (>=5)"] - -[[package]] -name = "setproctitle" -version = "1.3.2" -description = "A Python module to customize the process title" -category = "dev" -optional = false -python-versions = ">=3.7" - -[package.extras] -test = ["pytest"] - -[[package]] -name = "setuptools" -version = "68.1.2" -description = "Easily download, build, install, upgrade, and uninstall Python packages" -category = "main" -optional = false -python-versions = ">=3.8" - -[package.extras] -docs = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "pygments-github-lexers (==0.0.5)", "rst.linker (>=1.9)", "sphinx (>=3.5,<=7.1.2)", "sphinx-favicon", "sphinx-hoverxref (<2)", "sphinx-inline-tabs", "sphinx-lint", "sphinx-notfound-page (==0.8.3)", "sphinx-reredirects", "sphinxcontrib-towncrier"] -testing = ["build[virtualenv]", "filelock (>=3.4.0)", "flake8-2020", "ini2toml[lite] (>=0.9)", "jaraco.develop (>=7.21)", "jaraco.envs (>=2.2)", "jaraco.path (>=3.2.0)", "pip (>=19.1)", "pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-mypy (>=0.9.1)", "pytest-perf", "pytest-ruff", "pytest-timeout", "pytest-xdist", "tomli-w (>=1.0.0)", "virtualenv (>=13.0.0)", "wheel"] -testing-integration = ["build[virtualenv]", "filelock (>=3.4.0)", "jaraco.envs (>=2.2)", "jaraco.path (>=3.2.0)", "pytest", "pytest-enabler", "pytest-xdist", "tomli", "virtualenv (>=13.0.0)", "wheel"] - -[[package]] -name = "simplejson" -version = "3.19.1" -description = "Simple, fast, extensible JSON encoder/decoder for Python" -category = "main" -optional = false -python-versions = ">=2.5, !=3.0.*, !=3.1.*, !=3.2.*" - -[[package]] -name = "six" -version = "1.16.0" -description = "Python 2 and 3 compatibility utilities" -category = "main" -optional = false -python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*" - -[[package]] -name = "smmap" -version = "5.0.0" -description = "A pure Python implementation of a sliding window memory map manager" -category = "main" -optional = false -python-versions = ">=3.6" - -[[package]] -name = "sniffio" -version = "1.3.0" -description = "Sniff out which async library your code is running under" -category = "main" -optional = false -python-versions = ">=3.7" - -[[package]] -name = "snowflake-connector-python" -version = "3.5.0" -description = "Snowflake Connector for Python" -category = "main" -optional = true -python-versions = ">=3.8" - -[package.dependencies] -asn1crypto = ">0.24.0,<2.0.0" -certifi = ">=2017.4.17" -cffi = ">=1.9,<2.0.0" -charset-normalizer = ">=2,<4" -cryptography = ">=3.1.0,<42.0.0" -filelock = ">=3.5,<4" -idna = ">=2.5,<4" -keyring = {version = "<16.1.0 || >16.1.0,<25.0.0", optional = true, markers = "extra == \"secure-local-storage\""} -packaging = "*" -platformdirs = ">=2.6.0,<4.0.0" -pyjwt = "<3.0.0" -pyOpenSSL = ">=16.2.0,<24.0.0" -pytz = "*" -requests = "<3.0.0" -sortedcontainers = ">=2.4.0" -tomlkit = "*" -typing-extensions = ">=4.3,<5" -urllib3 = ">=1.21.1,<2.0.0" - -[package.extras] -development = ["Cython", "coverage", "more-itertools", "numpy (<1.27.0)", "pendulum (!=2.1.1)", "pexpect", "pytest (<7.5.0)", "pytest-cov", "pytest-rerunfailures", "pytest-timeout", "pytest-xdist", "pytzdata"] -pandas = ["pandas (>=1.0.0,<2.1.0)", "pyarrow"] -secure-local-storage = ["keyring (!=16.1.0,<25.0.0)"] - -[[package]] -name = "sortedcontainers" -version = "2.4.0" -description = "Sorted Containers -- Sorted List, Sorted Dict, Sorted Set" -category = "main" -optional = true -python-versions = "*" - -[[package]] -name = "soupsieve" -version = "2.5" -description = "A modern CSS selector implementation for Beautiful Soup." -category = "main" -optional = true -python-versions = ">=3.8" - -[[package]] -name = "sqlalchemy" -version = "1.4.49" -description = "Database Abstraction Library" -category = "main" -optional = false -python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,>=2.7" - -[package.dependencies] -greenlet = {version = "!=0.4.17", markers = "python_version >= \"3\" and (platform_machine == \"aarch64\" or platform_machine == \"ppc64le\" or platform_machine == \"x86_64\" or platform_machine == \"amd64\" or platform_machine == \"AMD64\" or platform_machine == \"win32\" or platform_machine == \"WIN32\")"} - -[package.extras] -aiomysql = ["aiomysql", "greenlet (!=0.4.17)"] -aiosqlite = ["aiosqlite", "greenlet (!=0.4.17)", "typing_extensions (!=3.10.0.1)"] -asyncio = ["greenlet (!=0.4.17)"] -asyncmy = ["asyncmy (>=0.2.3,!=0.2.4)", "greenlet (!=0.4.17)"] -mariadb-connector = ["mariadb (>=1.0.1,!=1.1.2)"] -mssql = ["pyodbc"] -mssql-pymssql = ["pymssql"] -mssql-pyodbc = ["pyodbc"] -mypy = ["mypy (>=0.910)", "sqlalchemy2-stubs"] -mysql = ["mysqlclient (>=1.4.0)", "mysqlclient (>=1.4.0,<2)"] -mysql-connector = ["mysql-connector-python"] -oracle = ["cx_oracle (>=7)", "cx_oracle (>=7,<8)"] -postgresql = ["psycopg2 (>=2.7)"] -postgresql-asyncpg = ["asyncpg", "greenlet (!=0.4.17)"] -postgresql-pg8000 = ["pg8000 (>=1.16.6,!=1.29.0)"] -postgresql-psycopg2binary = ["psycopg2-binary"] -postgresql-psycopg2cffi = ["psycopg2cffi"] -pymysql = ["pymysql", "pymysql (<1)"] -sqlcipher = ["sqlcipher3_binary"] - -[[package]] -name = "sqlalchemy-jsonfield" -version = "1.0.1.post0" -description = "SQLALchemy JSONField implementation for storing dicts at SQL" -category = "dev" -optional = false -python-versions = ">=3.7.0" - -[package.dependencies] -sqlalchemy = "*" - -[[package]] -name = "sqlalchemy-utils" -version = "0.41.1" -description = "Various utility functions for SQLAlchemy." -category = "dev" -optional = false -python-versions = ">=3.6" - -[package.dependencies] -SQLAlchemy = ">=1.3" - -[package.extras] -arrow = ["arrow (>=0.3.4)"] -babel = ["Babel (>=1.3)"] -color = ["colour (>=0.0.4)"] -encrypted = ["cryptography (>=0.6)"] -intervals = ["intervals (>=0.7.1)"] -password = ["passlib (>=1.6,<2.0)"] -pendulum = ["pendulum (>=2.0.5)"] -phone = ["phonenumbers (>=5.9.2)"] -test = ["Jinja2 (>=2.3)", "Pygments (>=1.2)", "backports.zoneinfo", "docutils (>=0.10)", "flake8 (>=2.4.0)", "flexmock (>=0.9.7)", "isort (>=4.2.2)", "pg8000 (>=1.12.4)", "psycopg (>=3.1.8)", "psycopg2 (>=2.5.1)", "psycopg2cffi (>=2.8.1)", "pymysql", "pyodbc", "pytest (>=2.7.1)", "python-dateutil (>=2.6)", "pytz (>=2014.2)"] -test-all = ["Babel (>=1.3)", "Jinja2 (>=2.3)", "Pygments (>=1.2)", "arrow (>=0.3.4)", "backports.zoneinfo", "colour (>=0.0.4)", "cryptography (>=0.6)", "docutils (>=0.10)", "flake8 (>=2.4.0)", "flexmock (>=0.9.7)", "furl (>=0.4.1)", "intervals (>=0.7.1)", "isort (>=4.2.2)", "passlib (>=1.6,<2.0)", "pendulum (>=2.0.5)", "pg8000 (>=1.12.4)", "phonenumbers (>=5.9.2)", "psycopg (>=3.1.8)", "psycopg2 (>=2.5.1)", "psycopg2cffi (>=2.8.1)", "pymysql", "pyodbc", "pytest (>=2.7.1)", "python-dateutil", "python-dateutil (>=2.6)", "pytz (>=2014.2)"] -timezone = ["python-dateutil"] -url = ["furl (>=0.4.1)"] - -[[package]] -name = "sqlfluff" -version = "2.3.2" -description = "The SQL Linter for Humans" -category = "dev" -optional = false -python-versions = ">=3.7" - -[package.dependencies] -appdirs = "*" -chardet = "*" -click = "*" -colorama = ">=0.3" -diff-cover = ">=2.5.0" -importlib-resources = {version = "*", markers = "python_version < \"3.9\""} -Jinja2 = "*" -pathspec = "*" -pytest = "*" -pyyaml = ">=5.1" -regex = "*" -tblib = "*" -toml = {version = "*", markers = "python_version < \"3.11\""} -tqdm = "*" -typing-extensions = "*" - -[[package]] -name = "sqlparams" -version = "6.0.1" -description = "Convert between various DB API 2.0 parameter styles." -category = "main" -optional = true -python-versions = ">=3.8" - -[[package]] -name = "sqlparse" -version = "0.4.4" -description = "A non-validating SQL parser." -category = "main" -optional = false -python-versions = ">=3.5" - -[package.extras] -dev = ["build", "flake8"] -doc = ["sphinx"] -test = ["pytest", "pytest-cov"] - -[[package]] -name = "stevedore" -version = "5.1.0" -description = "Manage dynamic plugins for Python applications" -category = "dev" -optional = false -python-versions = ">=3.8" - -[package.dependencies] -pbr = ">=2.0.0,<2.1.0 || >2.1.0" - -[[package]] -name = "sympy" -version = "1.12" -description = "Computer algebra system (CAS) in Python" -category = "main" -optional = true -python-versions = ">=3.8" - -[package.dependencies] -mpmath = ">=0.19" - -[[package]] -name = "tabulate" -version = "0.9.0" -description = "Pretty-print tabular data" -category = "dev" -optional = false -python-versions = ">=3.7" - -[package.extras] -widechars = ["wcwidth"] - -[[package]] -name = "tblib" -version = "2.0.0" -description = "Traceback serialization library." -category = "dev" -optional = false -python-versions = ">=3.7" - -[[package]] -name = "tenacity" -version = "8.2.3" -description = "Retry code until it succeeds" -category = "main" -optional = false -python-versions = ">=3.7" - -[package.extras] -doc = ["reno", "sphinx", "tornado (>=4.5)"] - -[[package]] -name = "termcolor" -version = "2.3.0" -description = "ANSI color formatting for output in terminal" -category = "dev" -optional = false -python-versions = ">=3.7" - -[package.extras] -tests = ["pytest", "pytest-cov"] - -[[package]] -name = "text-unidecode" -version = "1.3" -description = "The most basic Text::Unidecode port" -category = "main" -optional = false -python-versions = "*" - -[[package]] -name = "thrift" -version = "0.16.0" -description = "Python bindings for the Apache Thrift RPC system" -category = "main" -optional = true -python-versions = "*" - -[package.dependencies] -six = ">=1.7.2" - -[package.extras] -all = ["tornado (>=4.0)", "twisted"] -tornado = ["tornado (>=4.0)"] -twisted = ["twisted"] - -[[package]] -name = "tokenizers" -version = "0.13.3" -description = "Fast and Customizable Tokenizers" -category = "main" -optional = true -python-versions = "*" - -[package.extras] -dev = ["black (==22.3)", "datasets", "numpy", "pytest", "requests"] -docs = ["setuptools-rust", "sphinx", "sphinx-rtd-theme"] -testing = ["black (==22.3)", "datasets", "numpy", "pytest", "requests"] - -[[package]] -name = "toml" -version = "0.10.2" -description = "Python Library for Tom's Obvious, Minimal Language" -category = "dev" -optional = false -python-versions = ">=2.6, !=3.0.*, !=3.1.*, !=3.2.*" - -[[package]] -name = "tomli" -version = "2.0.1" -description = "A lil' TOML parser" -category = "dev" -optional = false -python-versions = ">=3.7" - -[[package]] -name = "tomli-w" -version = "1.0.0" -description = "A lil' TOML writer" -category = "dev" -optional = false -python-versions = ">=3.7" - -[[package]] -name = "tomlkit" -version = "0.12.1" -description = "Style preserving TOML library" -category = "main" -optional = false -python-versions = ">=3.7" - -[[package]] -name = "tqdm" -version = "4.66.1" -description = "Fast, Extensible Progress Meter" -category = "main" -optional = false -python-versions = ">=3.7" - -[package.dependencies] -colorama = {version = "*", markers = "platform_system == \"Windows\""} - -[package.extras] -dev = ["pytest (>=6)", "pytest-cov", "pytest-timeout", "pytest-xdist"] -notebook = ["ipywidgets (>=6)"] -slack = ["slack-sdk"] -telegram = ["requests"] - -[[package]] -name = "typeapi" -version = "2.1.1" -description = "" -category = "dev" -optional = false -python-versions = ">=3.6.3,<4.0.0" - -[package.dependencies] -typing-extensions = ">=3.0.0" - -[[package]] -name = "types-awscrt" -version = "0.19.1" -description = "Type annotations and code completion for awscrt" -category = "main" -optional = false -python-versions = ">=3.7,<4.0" - -[[package]] -name = "types-cachetools" -version = "5.3.0.6" -description = "Typing stubs for cachetools" -category = "dev" -optional = false -python-versions = "*" - -[[package]] -name = "types-click" -version = "7.1.8" -description = "Typing stubs for click" -category = "dev" -optional = false -python-versions = "*" - -[[package]] -name = "types-deprecated" -version = "1.2.9.3" -description = "Typing stubs for Deprecated" -category = "dev" -optional = false -python-versions = "*" - -[[package]] -name = "types-protobuf" -version = "4.24.0.1" -description = "Typing stubs for protobuf" -category = "dev" -optional = false -python-versions = "*" - -[[package]] -name = "types-psutil" -version = "5.9.5.16" -description = "Typing stubs for psutil" -category = "dev" -optional = false -python-versions = "*" - -[[package]] -name = "types-psycopg2" -version = "2.9.21.14" -description = "Typing stubs for psycopg2" -category = "dev" -optional = false -python-versions = "*" - -[[package]] -name = "types-python-dateutil" -version = "2.8.19.14" -description = "Typing stubs for python-dateutil" -category = "dev" -optional = false -python-versions = "*" - -[[package]] -name = "types-pyyaml" -version = "6.0.12.11" -description = "Typing stubs for PyYAML" -category = "dev" -optional = false -python-versions = "*" - -[[package]] -name = "types-requests" -version = "2.31.0.2" -description = "Typing stubs for requests" -category = "dev" -optional = false -python-versions = "*" - -[package.dependencies] -types-urllib3 = "*" - -[[package]] -name = "types-s3transfer" -version = "0.6.2" -description = "Type annotations and code completion for s3transfer" -category = "main" -optional = false -python-versions = ">=3.7,<4.0" - -[[package]] -name = "types-setuptools" -version = "68.1.0.1" -description = "Typing stubs for setuptools" -category = "main" -optional = false -python-versions = "*" - -[[package]] -name = "types-simplejson" -version = "3.19.0.2" -description = "Typing stubs for simplejson" -category = "dev" -optional = false -python-versions = "*" - -[[package]] -name = "types-sqlalchemy" -version = "1.4.53.38" -description = "Typing stubs for SQLAlchemy" -category = "dev" -optional = false -python-versions = "*" - -[[package]] -name = "types-tqdm" -version = "4.66.0.2" -description = "Typing stubs for tqdm" -category = "dev" -optional = false -python-versions = "*" - -[[package]] -name = "types-urllib3" -version = "1.26.25.14" -description = "Typing stubs for urllib3" -category = "dev" -optional = false -python-versions = "*" - -[[package]] -name = "typing-extensions" -version = "4.7.1" -description = "Backported and Experimental Type Hints for Python 3.7+" -category = "main" -optional = false -python-versions = ">=3.7" - -[[package]] -name = "tzdata" -version = "2023.3" -description = "Provider of IANA time zone data" -category = "main" -optional = false -python-versions = ">=2" - -[[package]] -name = "uc-micro-py" -version = "1.0.2" -description = "Micro subset of unicode data files for linkify-it-py projects." -category = "dev" -optional = false -python-versions = ">=3.7" - -[package.extras] -test = ["coverage", "pytest", "pytest-cov"] - -[[package]] -name = "unicodecsv" -version = "0.14.1" -description = "Python2's stdlib csv module is nice, but it doesn't support unicode. This module is a drop-in replacement which *does*." -category = "dev" -optional = false -python-versions = "*" - -[[package]] -name = "uritemplate" -version = "4.1.1" -description = "Implementation of RFC 6570 URI Templates" -category = "dev" -optional = false -python-versions = ">=3.6" - -[[package]] -name = "urllib3" -version = "1.26.16" -description = "HTTP library with thread-safe connection pooling, file post, and more." -category = "main" -optional = false -python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*" - -[package.extras] -brotli = ["brotli (>=1.0.9)", "brotlicffi (>=0.8.0)", "brotlipy (>=0.6.0)"] -secure = ["certifi", "cryptography (>=1.3.4)", "idna (>=2.0.0)", "ipaddress", "pyOpenSSL (>=0.14)", "urllib3-secure-extra"] -socks = ["PySocks (>=1.5.6,!=1.5.7,<2.0)"] - -[[package]] -name = "validators" -version = "0.21.0" -description = "Python Data Validation for Humans™" -category = "main" -optional = true -python-versions = ">=3.8,<4.0" - -[[package]] -name = "watchdog" -version = "3.0.0" -description = "Filesystem events monitoring" -category = "dev" -optional = false -python-versions = ">=3.7" - -[package.extras] -watchmedo = ["PyYAML (>=3.10)"] - -[[package]] -name = "wcwidth" -version = "0.2.6" -description = "Measures the displayed width of unicode strings in a terminal" -category = "dev" -optional = false -python-versions = "*" - -[[package]] -name = "weaviate-client" -version = "3.23.2" -description = "A python native Weaviate client" -category = "main" -optional = true -python-versions = ">=3.8" - -[package.dependencies] -authlib = ">=1.1.0" -requests = ">=2.28.0,<=2.31.0" -tqdm = ">=4.59.0,<5.0.0" -validators = ">=0.18.2,<=0.21.0" - -[package.extras] -grpc = ["grpcio", "grpcio-tools"] - -[[package]] -name = "werkzeug" -version = "2.3.7" -description = "The comprehensive WSGI web application library." -category = "dev" -optional = false -python-versions = ">=3.8" - -[package.dependencies] -MarkupSafe = ">=2.1.1" - -[package.extras] -watchdog = ["watchdog (>=2.3)"] - -[[package]] -name = "wheel" -version = "0.41.2" -description = "A built-package format for Python" -category = "main" -optional = false -python-versions = ">=3.7" - -[package.extras] -test = ["pytest (>=6.0.0)", "setuptools (>=65)"] - -[[package]] -name = "win-precise-time" -version = "1.4.2" -description = "" -category = "main" -optional = false -python-versions = ">=3.7" - -[[package]] -name = "wrapt" -version = "1.15.0" -description = "Module for decorators, wrappers and monkey patching." -category = "main" -optional = false -python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,>=2.7" - -[[package]] -name = "wtforms" -version = "3.0.1" -description = "Form validation and rendering for Python web development." -category = "dev" -optional = false -python-versions = ">=3.7" - -[package.dependencies] -MarkupSafe = "*" - -[package.extras] -email = ["email-validator"] - -[[package]] -name = "yapf" -version = "0.33.0" -description = "A formatter for Python code." -category = "dev" -optional = false -python-versions = "*" - -[package.dependencies] -tomli = ">=2.0.1" - -[[package]] -name = "yarl" -version = "1.9.2" -description = "Yet another URL library" -category = "main" -optional = false -python-versions = ">=3.7" - -[package.dependencies] -idna = ">=2.0" -multidict = ">=4.0" - -[[package]] -name = "zipp" -version = "3.16.2" -description = "Backport of pathlib-compatible object wrapper for zip files" -category = "main" -optional = false -python-versions = ">=3.8" - -[package.extras] -docs = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-lint"] -testing = ["big-O", "jaraco.functools", "jaraco.itertools", "more-itertools", "pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-ignore-flaky", "pytest-mypy (>=0.9.1)", "pytest-ruff"] - -[extras] -athena = ["pyathena", "pyarrow", "s3fs", "botocore"] -az = ["adlfs"] -bigquery = ["grpcio", "google-cloud-bigquery", "pyarrow", "gcsfs"] -cli = ["pipdeptree", "cron-descriptor"] -databricks = ["databricks-sql-connector"] -dbt = ["dbt-core", "dbt-redshift", "dbt-bigquery", "dbt-duckdb", "dbt-snowflake", "dbt-athena-community", "dbt-databricks"] -duckdb = ["duckdb"] -filesystem = ["s3fs", "botocore"] -gcp = ["grpcio", "google-cloud-bigquery", "gcsfs"] -gs = ["gcsfs"] -motherduck = ["duckdb", "pyarrow"] -mssql = ["pyodbc"] -parquet = ["pyarrow"] -postgres = ["psycopg2-binary", "psycopg2cffi"] -qdrant = ["qdrant-client"] -redshift = ["psycopg2-binary", "psycopg2cffi"] -s3 = ["s3fs", "botocore"] -snowflake = ["snowflake-connector-python"] -synapse = ["pyodbc", "adlfs", "pyarrow"] -weaviate = ["weaviate-client"] - -[metadata] -lock-version = "1.1" -python-versions = ">=3.8.1,<3.13" -content-hash = "2a79839114197182918d1422bc51f037ff53ffb40325f652285a26eb774919d1" - -[metadata.files] -about-time = [ - {file = "about-time-4.2.1.tar.gz", hash = "sha256:6a538862d33ce67d997429d14998310e1dbfda6cb7d9bbfbf799c4709847fece"}, - {file = "about_time-4.2.1-py3-none-any.whl", hash = "sha256:8bbf4c75fe13cbd3d72f49a03b02c5c7dca32169b6d49117c257e7eb3eaee341"}, -] -adlfs = [ - {file = "adlfs-2023.8.0-py3-none-any.whl", hash = "sha256:3eb248a3c2a30b419f1147bd7676d156b5219f96ef7f11d47166afd2a3bdb07e"}, - {file = "adlfs-2023.8.0.tar.gz", hash = "sha256:07e804f6df4593acfcaf01025b162e30ac13e523d3570279c98b2d91a18026d9"}, -] -agate = [ - {file = "agate-1.7.1-py2.py3-none-any.whl", hash = "sha256:23f9f412f74f97b72f82b1525ab235cc816bc8c8525d968a091576a0dbc54a5f"}, - {file = "agate-1.7.1.tar.gz", hash = "sha256:eadf46d980168b8922d5d396d6258eecd5e7dbef7e6f0c0b71e968545ea96389"}, -] -aiobotocore = [ - {file = "aiobotocore-2.5.2-py3-none-any.whl", hash = "sha256:337429ffd3cc367532572d40be809a84c7b5335f3f8eca2f23e09dfaa9a9ef90"}, - {file = "aiobotocore-2.5.2.tar.gz", hash = "sha256:e7399f21570db1c287f1c0c814dd3475dfe1c8166722e2c77ce67f172cbcfa89"}, -] -aiohttp = [ - {file = "aiohttp-3.8.5-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:a94159871304770da4dd371f4291b20cac04e8c94f11bdea1c3478e557fbe0d8"}, - {file = "aiohttp-3.8.5-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:13bf85afc99ce6f9ee3567b04501f18f9f8dbbb2ea11ed1a2e079670403a7c84"}, - {file = "aiohttp-3.8.5-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:2ce2ac5708501afc4847221a521f7e4b245abf5178cf5ddae9d5b3856ddb2f3a"}, - {file = "aiohttp-3.8.5-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:96943e5dcc37a6529d18766597c491798b7eb7a61d48878611298afc1fca946c"}, - {file = "aiohttp-3.8.5-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:2ad5c3c4590bb3cc28b4382f031f3783f25ec223557124c68754a2231d989e2b"}, - {file = "aiohttp-3.8.5-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:0c413c633d0512df4dc7fd2373ec06cc6a815b7b6d6c2f208ada7e9e93a5061d"}, - {file = "aiohttp-3.8.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:df72ac063b97837a80d80dec8d54c241af059cc9bb42c4de68bd5b61ceb37caa"}, - {file = "aiohttp-3.8.5-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c48c5c0271149cfe467c0ff8eb941279fd6e3f65c9a388c984e0e6cf57538e14"}, - {file = "aiohttp-3.8.5-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:368a42363c4d70ab52c2c6420a57f190ed3dfaca6a1b19afda8165ee16416a82"}, - {file = "aiohttp-3.8.5-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:7607ec3ce4993464368505888af5beb446845a014bc676d349efec0e05085905"}, - {file = "aiohttp-3.8.5-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:0d21c684808288a98914e5aaf2a7c6a3179d4df11d249799c32d1808e79503b5"}, - {file = "aiohttp-3.8.5-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:312fcfbacc7880a8da0ae8b6abc6cc7d752e9caa0051a53d217a650b25e9a691"}, - {file = "aiohttp-3.8.5-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:ad093e823df03bb3fd37e7dec9d4670c34f9e24aeace76808fc20a507cace825"}, - {file = "aiohttp-3.8.5-cp310-cp310-win32.whl", hash = "sha256:33279701c04351a2914e1100b62b2a7fdb9a25995c4a104259f9a5ead7ed4802"}, - {file = "aiohttp-3.8.5-cp310-cp310-win_amd64.whl", hash = "sha256:6e4a280e4b975a2e7745573e3fc9c9ba0d1194a3738ce1cbaa80626cc9b4f4df"}, - {file = "aiohttp-3.8.5-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:ae871a964e1987a943d83d6709d20ec6103ca1eaf52f7e0d36ee1b5bebb8b9b9"}, - {file = "aiohttp-3.8.5-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:461908b2578955045efde733719d62f2b649c404189a09a632d245b445c9c975"}, - {file = "aiohttp-3.8.5-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:72a860c215e26192379f57cae5ab12b168b75db8271f111019509a1196dfc780"}, - {file = "aiohttp-3.8.5-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cc14be025665dba6202b6a71cfcdb53210cc498e50068bc088076624471f8bb9"}, - {file = "aiohttp-3.8.5-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:8af740fc2711ad85f1a5c034a435782fbd5b5f8314c9a3ef071424a8158d7f6b"}, - {file = "aiohttp-3.8.5-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:841cd8233cbd2111a0ef0a522ce016357c5e3aff8a8ce92bcfa14cef890d698f"}, - {file = "aiohttp-3.8.5-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5ed1c46fb119f1b59304b5ec89f834f07124cd23ae5b74288e364477641060ff"}, - {file = "aiohttp-3.8.5-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:84f8ae3e09a34f35c18fa57f015cc394bd1389bce02503fb30c394d04ee6b938"}, - {file = "aiohttp-3.8.5-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:62360cb771707cb70a6fd114b9871d20d7dd2163a0feafe43fd115cfe4fe845e"}, - {file = "aiohttp-3.8.5-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:23fb25a9f0a1ca1f24c0a371523546366bb642397c94ab45ad3aedf2941cec6a"}, - {file = "aiohttp-3.8.5-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:b0ba0d15164eae3d878260d4c4df859bbdc6466e9e6689c344a13334f988bb53"}, - {file = "aiohttp-3.8.5-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:5d20003b635fc6ae3f96d7260281dfaf1894fc3aa24d1888a9b2628e97c241e5"}, - {file = "aiohttp-3.8.5-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:0175d745d9e85c40dcc51c8f88c74bfbaef9e7afeeeb9d03c37977270303064c"}, - {file = "aiohttp-3.8.5-cp311-cp311-win32.whl", hash = "sha256:2e1b1e51b0774408f091d268648e3d57f7260c1682e7d3a63cb00d22d71bb945"}, - {file = "aiohttp-3.8.5-cp311-cp311-win_amd64.whl", hash = "sha256:043d2299f6dfdc92f0ac5e995dfc56668e1587cea7f9aa9d8a78a1b6554e5755"}, - {file = "aiohttp-3.8.5-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:cae533195e8122584ec87531d6df000ad07737eaa3c81209e85c928854d2195c"}, - {file = "aiohttp-3.8.5-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4f21e83f355643c345177a5d1d8079f9f28b5133bcd154193b799d380331d5d3"}, - {file = "aiohttp-3.8.5-cp36-cp36m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a7a75ef35f2df54ad55dbf4b73fe1da96f370e51b10c91f08b19603c64004acc"}, - {file = "aiohttp-3.8.5-cp36-cp36m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2e2e9839e14dd5308ee773c97115f1e0a1cb1d75cbeeee9f33824fa5144c7634"}, - {file = "aiohttp-3.8.5-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c44e65da1de4403d0576473e2344828ef9c4c6244d65cf4b75549bb46d40b8dd"}, - {file = "aiohttp-3.8.5-cp36-cp36m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:78d847e4cde6ecc19125ccbc9bfac4a7ab37c234dd88fbb3c5c524e8e14da543"}, - {file = "aiohttp-3.8.5-cp36-cp36m-musllinux_1_1_aarch64.whl", hash = "sha256:c7a815258e5895d8900aec4454f38dca9aed71085f227537208057853f9d13f2"}, - {file = "aiohttp-3.8.5-cp36-cp36m-musllinux_1_1_i686.whl", hash = "sha256:8b929b9bd7cd7c3939f8bcfffa92fae7480bd1aa425279d51a89327d600c704d"}, - {file = "aiohttp-3.8.5-cp36-cp36m-musllinux_1_1_ppc64le.whl", hash = "sha256:5db3a5b833764280ed7618393832e0853e40f3d3e9aa128ac0ba0f8278d08649"}, - {file = "aiohttp-3.8.5-cp36-cp36m-musllinux_1_1_s390x.whl", hash = "sha256:a0215ce6041d501f3155dc219712bc41252d0ab76474615b9700d63d4d9292af"}, - {file = "aiohttp-3.8.5-cp36-cp36m-musllinux_1_1_x86_64.whl", hash = "sha256:fd1ed388ea7fbed22c4968dd64bab0198de60750a25fe8c0c9d4bef5abe13824"}, - {file = "aiohttp-3.8.5-cp36-cp36m-win32.whl", hash = "sha256:6e6783bcc45f397fdebc118d772103d751b54cddf5b60fbcc958382d7dd64f3e"}, - {file = "aiohttp-3.8.5-cp36-cp36m-win_amd64.whl", hash = "sha256:b5411d82cddd212644cf9360879eb5080f0d5f7d809d03262c50dad02f01421a"}, - {file = "aiohttp-3.8.5-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:01d4c0c874aa4ddfb8098e85d10b5e875a70adc63db91f1ae65a4b04d3344cda"}, - {file = "aiohttp-3.8.5-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e5980a746d547a6ba173fd5ee85ce9077e72d118758db05d229044b469d9029a"}, - {file = "aiohttp-3.8.5-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:2a482e6da906d5e6e653be079b29bc173a48e381600161c9932d89dfae5942ef"}, - {file = "aiohttp-3.8.5-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:80bd372b8d0715c66c974cf57fe363621a02f359f1ec81cba97366948c7fc873"}, - {file = "aiohttp-3.8.5-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c1161b345c0a444ebcf46bf0a740ba5dcf50612fd3d0528883fdc0eff578006a"}, - {file = "aiohttp-3.8.5-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:cd56db019015b6acfaaf92e1ac40eb8434847d9bf88b4be4efe5bfd260aee692"}, - {file = "aiohttp-3.8.5-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:153c2549f6c004d2754cc60603d4668899c9895b8a89397444a9c4efa282aaf4"}, - {file = "aiohttp-3.8.5-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:4a01951fabc4ce26ab791da5f3f24dca6d9a6f24121746eb19756416ff2d881b"}, - {file = "aiohttp-3.8.5-cp37-cp37m-musllinux_1_1_ppc64le.whl", hash = "sha256:bfb9162dcf01f615462b995a516ba03e769de0789de1cadc0f916265c257e5d8"}, - {file = "aiohttp-3.8.5-cp37-cp37m-musllinux_1_1_s390x.whl", hash = "sha256:7dde0009408969a43b04c16cbbe252c4f5ef4574ac226bc8815cd7342d2028b6"}, - {file = "aiohttp-3.8.5-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:4149d34c32f9638f38f544b3977a4c24052042affa895352d3636fa8bffd030a"}, - {file = "aiohttp-3.8.5-cp37-cp37m-win32.whl", hash = "sha256:68c5a82c8779bdfc6367c967a4a1b2aa52cd3595388bf5961a62158ee8a59e22"}, - {file = "aiohttp-3.8.5-cp37-cp37m-win_amd64.whl", hash = "sha256:2cf57fb50be5f52bda004b8893e63b48530ed9f0d6c96c84620dc92fe3cd9b9d"}, - {file = "aiohttp-3.8.5-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:eca4bf3734c541dc4f374ad6010a68ff6c6748f00451707f39857f429ca36ced"}, - {file = "aiohttp-3.8.5-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:1274477e4c71ce8cfe6c1ec2f806d57c015ebf84d83373676036e256bc55d690"}, - {file = "aiohttp-3.8.5-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:28c543e54710d6158fc6f439296c7865b29e0b616629767e685a7185fab4a6b9"}, - {file = "aiohttp-3.8.5-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:910bec0c49637d213f5d9877105d26e0c4a4de2f8b1b29405ff37e9fc0ad52b8"}, - {file = "aiohttp-3.8.5-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5443910d662db951b2e58eb70b0fbe6b6e2ae613477129a5805d0b66c54b6cb7"}, - {file = "aiohttp-3.8.5-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2e460be6978fc24e3df83193dc0cc4de46c9909ed92dd47d349a452ef49325b7"}, - {file = "aiohttp-3.8.5-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fb1558def481d84f03b45888473fc5a1f35747b5f334ef4e7a571bc0dfcb11f8"}, - {file = "aiohttp-3.8.5-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:34dd0c107799dcbbf7d48b53be761a013c0adf5571bf50c4ecad5643fe9cfcd0"}, - {file = "aiohttp-3.8.5-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:aa1990247f02a54185dc0dff92a6904521172a22664c863a03ff64c42f9b5410"}, - {file = "aiohttp-3.8.5-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:0e584a10f204a617d71d359fe383406305a4b595b333721fa50b867b4a0a1548"}, - {file = "aiohttp-3.8.5-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:a3cf433f127efa43fee6b90ea4c6edf6c4a17109d1d037d1a52abec84d8f2e42"}, - {file = "aiohttp-3.8.5-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:c11f5b099adafb18e65c2c997d57108b5bbeaa9eeee64a84302c0978b1ec948b"}, - {file = "aiohttp-3.8.5-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:84de26ddf621d7ac4c975dbea4c945860e08cccde492269db4e1538a6a6f3c35"}, - {file = "aiohttp-3.8.5-cp38-cp38-win32.whl", hash = "sha256:ab88bafedc57dd0aab55fa728ea10c1911f7e4d8b43e1d838a1739f33712921c"}, - {file = "aiohttp-3.8.5-cp38-cp38-win_amd64.whl", hash = "sha256:5798a9aad1879f626589f3df0f8b79b3608a92e9beab10e5fda02c8a2c60db2e"}, - {file = "aiohttp-3.8.5-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:a6ce61195c6a19c785df04e71a4537e29eaa2c50fe745b732aa937c0c77169f3"}, - {file = "aiohttp-3.8.5-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:773dd01706d4db536335fcfae6ea2440a70ceb03dd3e7378f3e815b03c97ab51"}, - {file = "aiohttp-3.8.5-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:f83a552443a526ea38d064588613aca983d0ee0038801bc93c0c916428310c28"}, - {file = "aiohttp-3.8.5-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1f7372f7341fcc16f57b2caded43e81ddd18df53320b6f9f042acad41f8e049a"}, - {file = "aiohttp-3.8.5-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ea353162f249c8097ea63c2169dd1aa55de1e8fecbe63412a9bc50816e87b761"}, - {file = "aiohttp-3.8.5-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e5d47ae48db0b2dcf70bc8a3bc72b3de86e2a590fc299fdbbb15af320d2659de"}, - {file = "aiohttp-3.8.5-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d827176898a2b0b09694fbd1088c7a31836d1a505c243811c87ae53a3f6273c1"}, - {file = "aiohttp-3.8.5-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3562b06567c06439d8b447037bb655ef69786c590b1de86c7ab81efe1c9c15d8"}, - {file = "aiohttp-3.8.5-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:4e874cbf8caf8959d2adf572a78bba17cb0e9d7e51bb83d86a3697b686a0ab4d"}, - {file = "aiohttp-3.8.5-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:6809a00deaf3810e38c628e9a33271892f815b853605a936e2e9e5129762356c"}, - {file = "aiohttp-3.8.5-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:33776e945d89b29251b33a7e7d006ce86447b2cfd66db5e5ded4e5cd0340585c"}, - {file = "aiohttp-3.8.5-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:eaeed7abfb5d64c539e2db173f63631455f1196c37d9d8d873fc316470dfbacd"}, - {file = "aiohttp-3.8.5-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:e91d635961bec2d8f19dfeb41a539eb94bd073f075ca6dae6c8dc0ee89ad6f91"}, - {file = "aiohttp-3.8.5-cp39-cp39-win32.whl", hash = "sha256:00ad4b6f185ec67f3e6562e8a1d2b69660be43070bd0ef6fcec5211154c7df67"}, - {file = "aiohttp-3.8.5-cp39-cp39-win_amd64.whl", hash = "sha256:c0a9034379a37ae42dea7ac1e048352d96286626251862e448933c0f59cbd79c"}, - {file = "aiohttp-3.8.5.tar.gz", hash = "sha256:b9552ec52cc147dbf1944ac7ac98af7602e51ea2dcd076ed194ca3c0d1c7d0bc"}, -] -aioitertools = [ - {file = "aioitertools-0.11.0-py3-none-any.whl", hash = "sha256:04b95e3dab25b449def24d7df809411c10e62aab0cbe31a50ca4e68748c43394"}, - {file = "aioitertools-0.11.0.tar.gz", hash = "sha256:42c68b8dd3a69c2bf7f2233bf7df4bb58b557bca5252ac02ed5187bbc67d6831"}, -] -aiosignal = [ - {file = "aiosignal-1.3.1-py3-none-any.whl", hash = "sha256:f8376fb07dd1e86a584e4fcdec80b36b7f81aac666ebc724e2c090300dd83b17"}, - {file = "aiosignal-1.3.1.tar.gz", hash = "sha256:54cd96e15e1649b75d6c87526a6ff0b6c1b0dd3459f43d9ca11d48c339b68cfc"}, -] -alembic = [ - {file = "alembic-1.12.0-py3-none-any.whl", hash = "sha256:03226222f1cf943deee6c85d9464261a6c710cd19b4fe867a3ad1f25afda610f"}, - {file = "alembic-1.12.0.tar.gz", hash = "sha256:8e7645c32e4f200675e69f0745415335eb59a3663f5feb487abfa0b30c45888b"}, -] -alive-progress = [ - {file = "alive-progress-3.1.4.tar.gz", hash = "sha256:74a95d8d0d42bc99d3a3725dbd06ebb852245f1b64e301a7c375b92b22663f7b"}, - {file = "alive_progress-3.1.4-py3-none-any.whl", hash = "sha256:c80ad87ce9c1054b01135a87fae69ecebbfc2107497ae87cbe6aec7e534903db"}, -] -annotated-types = [ - {file = "annotated_types-0.6.0-py3-none-any.whl", hash = "sha256:0641064de18ba7a25dee8f96403ebc39113d0cb953a01429249d5c7564666a43"}, - {file = "annotated_types-0.6.0.tar.gz", hash = "sha256:563339e807e53ffd9c267e99fc6d9ea23eb8443c08f112651963e24e22f84a5d"}, -] -ansicon = [ - {file = "ansicon-1.89.0-py2.py3-none-any.whl", hash = "sha256:f1def52d17f65c2c9682cf8370c03f541f410c1752d6a14029f97318e4b9dfec"}, - {file = "ansicon-1.89.0.tar.gz", hash = "sha256:e4d039def5768a47e4afec8e89e83ec3ae5a26bf00ad851f914d1240b444d2b1"}, -] -anyio = [ - {file = "anyio-4.0.0-py3-none-any.whl", hash = "sha256:cfdb2b588b9fc25ede96d8db56ed50848b0b649dca3dd1df0b11f683bb9e0b5f"}, - {file = "anyio-4.0.0.tar.gz", hash = "sha256:f7ed51751b2c2add651e5747c891b47e26d2a21be5d32d9311dfe9692f3e5d7a"}, -] -apache-airflow = [ - {file = "apache-airflow-2.7.2.tar.gz", hash = "sha256:c6fab3449066867d9a7728f40b6b9e27f1ea68bca39b064a27f5c5ddc3262224"}, - {file = "apache_airflow-2.7.2-py3-none-any.whl", hash = "sha256:1bc2c022bcae24b911e49fafd5fb619b49efba87ed7bc8561a2065810d8fe899"}, -] -apache-airflow-providers-common-sql = [ - {file = "apache-airflow-providers-common-sql-1.7.1.tar.gz", hash = "sha256:ba37f795d9656a87cf4661edc381b8ecfe930272c59324b59f8a158fd0971aeb"}, - {file = "apache_airflow_providers_common_sql-1.7.1-py3-none-any.whl", hash = "sha256:36da2f51b51a64765b0ed5e6a5fece8eaa3ca173dfbff803e2fe2a0afbb90944"}, -] -apache-airflow-providers-ftp = [ - {file = "apache-airflow-providers-ftp-3.5.1.tar.gz", hash = "sha256:dc6dc524dc7454857a0812154d7540172e36db3a87e48a4a91918ebf80898bbf"}, - {file = "apache_airflow_providers_ftp-3.5.1-py3-none-any.whl", hash = "sha256:e4ea77d6276355acfe2392c12155db7b9d51be460b7673b616dc1d8bee03c1d7"}, -] -apache-airflow-providers-http = [ - {file = "apache-airflow-providers-http-4.5.1.tar.gz", hash = "sha256:ec90920ff980fc264af9811dc72c37ef272bcdb3d007c7114e12366559426460"}, - {file = "apache_airflow_providers_http-4.5.1-py3-none-any.whl", hash = "sha256:702f26938bc22684eefecd297c2b0809793f9e43b8d911d807a29f21e69da179"}, -] -apache-airflow-providers-imap = [ - {file = "apache-airflow-providers-imap-3.3.1.tar.gz", hash = "sha256:40bac2a75e4dfbcd7d397776d90d03938facaf2707acc6cc119a8db684e53f77"}, - {file = "apache_airflow_providers_imap-3.3.1-py3-none-any.whl", hash = "sha256:adb6ef7864a5a8e245fbbd555bb4ef1eecf5b094d6d23ca0edc5f0aded50490d"}, -] -apache-airflow-providers-sqlite = [ - {file = "apache-airflow-providers-sqlite-3.4.3.tar.gz", hash = "sha256:347d2db03eaa5ea9fef414666565ffa5e849935cbc30e37237edcaa822b5ced8"}, - {file = "apache_airflow_providers_sqlite-3.4.3-py3-none-any.whl", hash = "sha256:4ffa6a50f0ea1b4e51240b657dfec3fb026c87bdfa71af908a56461df6a6f2e0"}, -] -apispec = [ - {file = "apispec-6.3.0-py3-none-any.whl", hash = "sha256:95a0b9355785df998bb0e9b939237a30ee4c7428fd6ef97305eae3da06b9b339"}, - {file = "apispec-6.3.0.tar.gz", hash = "sha256:6cb08d92ce73ff0b3bf46cb2ea5c00d57289b0f279fb0256a3df468182ba5344"}, -] -appdirs = [ - {file = "appdirs-1.4.4-py2.py3-none-any.whl", hash = "sha256:a841dacd6b99318a741b166adb07e19ee71a274450e68237b4650ca1055ab128"}, - {file = "appdirs-1.4.4.tar.gz", hash = "sha256:7d5d0167b2b1ba821647616af46a749d1c653740dd0d2415100fe26e27afdf41"}, -] -argcomplete = [ - {file = "argcomplete-3.1.1-py3-none-any.whl", hash = "sha256:35fa893a88deea85ea7b20d241100e64516d6af6d7b0ae2bed1d263d26f70948"}, - {file = "argcomplete-3.1.1.tar.gz", hash = "sha256:6c4c563f14f01440aaffa3eae13441c5db2357b5eec639abe7c0b15334627dff"}, -] -asgiref = [ - {file = "asgiref-3.7.2-py3-none-any.whl", hash = "sha256:89b2ef2247e3b562a16eef663bc0e2e703ec6468e2fa8a5cd61cd449786d4f6e"}, - {file = "asgiref-3.7.2.tar.gz", hash = "sha256:9e0ce3aa93a819ba5b45120216b23878cf6e8525eb3848653452b4192b92afed"}, -] -asn1crypto = [ - {file = "asn1crypto-1.5.1-py2.py3-none-any.whl", hash = "sha256:db4e40728b728508912cbb3d44f19ce188f218e9eba635821bb4b68564f8fd67"}, - {file = "asn1crypto-1.5.1.tar.gz", hash = "sha256:13ae38502be632115abf8a24cbe5f4da52e3b5231990aff31123c805306ccb9c"}, -] -astatine = [ - {file = "astatine-0.3.3-py3-none-any.whl", hash = "sha256:6d8c914f01fbea252cb8f31563f2e766a9ab03c02b9bcc37d18f7d9138828401"}, - {file = "astatine-0.3.3.tar.gz", hash = "sha256:0c58a7844b5890ff16da07dbfeb187341d8324cb4378940f89d795cbebebce08"}, -] -asttokens = [ - {file = "asttokens-2.3.0-py2.py3-none-any.whl", hash = "sha256:bef1a51bc256d349e9f94e7e40e44b705ed1162f55294220dd561d24583d9877"}, - {file = "asttokens-2.3.0.tar.gz", hash = "sha256:2552a88626aaa7f0f299f871479fc755bd4e7c11e89078965e928fb7bb9a6afe"}, -] -astunparse = [ - {file = "astunparse-1.6.3-py2.py3-none-any.whl", hash = "sha256:c2652417f2c8b5bb325c885ae329bdf3f86424075c4fd1a128674bc6fba4b8e8"}, - {file = "astunparse-1.6.3.tar.gz", hash = "sha256:5ad93a8456f0d084c3456d059fd9a92cce667963232cbf763eac3bc5b7940872"}, -] -async-timeout = [ - {file = "async-timeout-4.0.3.tar.gz", hash = "sha256:4640d96be84d82d02ed59ea2b7105a0f7b33abe8703703cd0ab0bf87c427522f"}, - {file = "async_timeout-4.0.3-py3-none-any.whl", hash = "sha256:7405140ff1230c310e51dc27b3145b9092d659ce68ff733fb0cefe3ee42be028"}, -] -attrs = [ - {file = "attrs-23.1.0-py3-none-any.whl", hash = "sha256:1f28b4522cdc2fb4256ac1a020c78acf9cba2c6b461ccd2c126f3aa8e8335d04"}, - {file = "attrs-23.1.0.tar.gz", hash = "sha256:6279836d581513a26f1bf235f9acd333bc9115683f14f7e8fae46c98fc50e015"}, -] -authlib = [ - {file = "Authlib-1.2.1-py2.py3-none-any.whl", hash = "sha256:c88984ea00149a90e3537c964327da930779afa4564e354edfd98410bea01911"}, - {file = "Authlib-1.2.1.tar.gz", hash = "sha256:421f7c6b468d907ca2d9afede256f068f87e34d23dd221c07d13d4c234726afb"}, -] -azure-core = [ - {file = "azure-core-1.29.3.tar.gz", hash = "sha256:c92700af982e71c8c73de9f4c20da8b3f03ce2c22d13066e4d416b4629c87903"}, - {file = "azure_core-1.29.3-py3-none-any.whl", hash = "sha256:f8b2910f92b66293d93bd00564924ad20ad48f4a1e150577cf18d1e7d4f9263c"}, -] -azure-datalake-store = [ - {file = "azure-datalake-store-0.0.53.tar.gz", hash = "sha256:05b6de62ee3f2a0a6e6941e6933b792b800c3e7f6ffce2fc324bc19875757393"}, - {file = "azure_datalake_store-0.0.53-py2.py3-none-any.whl", hash = "sha256:a30c902a6e360aa47d7f69f086b426729784e71c536f330b691647a51dc42b2b"}, -] -azure-identity = [ - {file = "azure-identity-1.14.0.zip", hash = "sha256:72441799f8c5c89bfe21026965e266672a7c5d050c2c65119ef899dd5362e2b1"}, - {file = "azure_identity-1.14.0-py3-none-any.whl", hash = "sha256:edabf0e010eb85760e1dd19424d5e8f97ba2c9caff73a16e7b30ccbdbcce369b"}, -] -azure-storage-blob = [ - {file = "azure-storage-blob-12.17.0.zip", hash = "sha256:c14b785a17050b30fc326a315bdae6bc4a078855f4f94a4c303ad74a48dc8c63"}, - {file = "azure_storage_blob-12.17.0-py3-none-any.whl", hash = "sha256:0016e0c549a80282d7b4920c03f2f4ba35c53e6e3c7dbcd2a4a8c8eb3882c1e7"}, -] -babel = [ - {file = "Babel-2.12.1-py3-none-any.whl", hash = "sha256:b4246fb7677d3b98f501a39d43396d3cafdc8eadb045f4a31be01863f655c610"}, - {file = "Babel-2.12.1.tar.gz", hash = "sha256:cc2d99999cd01d44420ae725a21c9e3711b3aadc7976d6147f622d8581963455"}, -] -backoff = [ - {file = "backoff-2.2.1-py3-none-any.whl", hash = "sha256:63579f9a0628e06278f7e47b7d7d5b6ce20dc65c5e96a6f3ca99a6adca0396e8"}, - {file = "backoff-2.2.1.tar.gz", hash = "sha256:03f829f5bb1923180821643f8753b0502c3b682293992485b0eef2807afa5cba"}, -] -bandit = [ - {file = "bandit-1.7.5-py3-none-any.whl", hash = "sha256:75665181dc1e0096369112541a056c59d1c5f66f9bb74a8d686c3c362b83f549"}, - {file = "bandit-1.7.5.tar.gz", hash = "sha256:bdfc739baa03b880c2d15d0431b31c658ffc348e907fe197e54e0389dd59e11e"}, -] -beautifulsoup4 = [ - {file = "beautifulsoup4-4.12.2-py3-none-any.whl", hash = "sha256:bd2520ca0d9d7d12694a53d44ac482d181b4ec1888909b035a3dbf40d0f57d4a"}, - {file = "beautifulsoup4-4.12.2.tar.gz", hash = "sha256:492bbc69dca35d12daac71c4db1bfff0c876c00ef4a2ffacce226d4638eb72da"}, -] -black = [ - {file = "black-23.9.1-cp310-cp310-macosx_10_16_arm64.whl", hash = "sha256:d6bc09188020c9ac2555a498949401ab35bb6bf76d4e0f8ee251694664df6301"}, - {file = "black-23.9.1-cp310-cp310-macosx_10_16_universal2.whl", hash = "sha256:13ef033794029b85dfea8032c9d3b92b42b526f1ff4bf13b2182ce4e917f5100"}, - {file = "black-23.9.1-cp310-cp310-macosx_10_16_x86_64.whl", hash = "sha256:75a2dc41b183d4872d3a500d2b9c9016e67ed95738a3624f4751a0cb4818fe71"}, - {file = "black-23.9.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:13a2e4a93bb8ca74a749b6974925c27219bb3df4d42fc45e948a5d9feb5122b7"}, - {file = "black-23.9.1-cp310-cp310-win_amd64.whl", hash = "sha256:adc3e4442eef57f99b5590b245a328aad19c99552e0bdc7f0b04db6656debd80"}, - {file = "black-23.9.1-cp311-cp311-macosx_10_16_arm64.whl", hash = "sha256:8431445bf62d2a914b541da7ab3e2b4f3bc052d2ccbf157ebad18ea126efb91f"}, - {file = "black-23.9.1-cp311-cp311-macosx_10_16_universal2.whl", hash = "sha256:8fc1ddcf83f996247505db6b715294eba56ea9372e107fd54963c7553f2b6dfe"}, - {file = "black-23.9.1-cp311-cp311-macosx_10_16_x86_64.whl", hash = "sha256:7d30ec46de88091e4316b17ae58bbbfc12b2de05e069030f6b747dfc649ad186"}, - {file = "black-23.9.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:031e8c69f3d3b09e1aa471a926a1eeb0b9071f80b17689a655f7885ac9325a6f"}, - {file = "black-23.9.1-cp311-cp311-win_amd64.whl", hash = "sha256:538efb451cd50f43aba394e9ec7ad55a37598faae3348d723b59ea8e91616300"}, - {file = "black-23.9.1-cp38-cp38-macosx_10_16_arm64.whl", hash = "sha256:638619a559280de0c2aa4d76f504891c9860bb8fa214267358f0a20f27c12948"}, - {file = "black-23.9.1-cp38-cp38-macosx_10_16_universal2.whl", hash = "sha256:a732b82747235e0542c03bf352c126052c0fbc458d8a239a94701175b17d4855"}, - {file = "black-23.9.1-cp38-cp38-macosx_10_16_x86_64.whl", hash = "sha256:cf3a4d00e4cdb6734b64bf23cd4341421e8953615cba6b3670453737a72ec204"}, - {file = "black-23.9.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cf99f3de8b3273a8317681d8194ea222f10e0133a24a7548c73ce44ea1679377"}, - {file = "black-23.9.1-cp38-cp38-win_amd64.whl", hash = "sha256:14f04c990259576acd093871e7e9b14918eb28f1866f91968ff5524293f9c573"}, - {file = "black-23.9.1-cp39-cp39-macosx_10_16_arm64.whl", hash = "sha256:c619f063c2d68f19b2d7270f4cf3192cb81c9ec5bc5ba02df91471d0b88c4c5c"}, - {file = "black-23.9.1-cp39-cp39-macosx_10_16_universal2.whl", hash = "sha256:6a3b50e4b93f43b34a9d3ef00d9b6728b4a722c997c99ab09102fd5efdb88325"}, - {file = "black-23.9.1-cp39-cp39-macosx_10_16_x86_64.whl", hash = "sha256:c46767e8df1b7beefb0899c4a95fb43058fa8500b6db144f4ff3ca38eb2f6393"}, - {file = "black-23.9.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:50254ebfa56aa46a9fdd5d651f9637485068a1adf42270148cd101cdf56e0ad9"}, - {file = "black-23.9.1-cp39-cp39-win_amd64.whl", hash = "sha256:403397c033adbc45c2bd41747da1f7fc7eaa44efbee256b53842470d4ac5a70f"}, - {file = "black-23.9.1-py3-none-any.whl", hash = "sha256:6ccd59584cc834b6d127628713e4b6b968e5f79572da66284532525a042549f9"}, - {file = "black-23.9.1.tar.gz", hash = "sha256:24b6b3ff5c6d9ea08a8888f6977eae858e1f340d7260cf56d70a49823236b62d"}, -] -blessed = [ - {file = "blessed-1.20.0-py2.py3-none-any.whl", hash = "sha256:0c542922586a265e699188e52d5f5ac5ec0dd517e5a1041d90d2bbf23f906058"}, - {file = "blessed-1.20.0.tar.gz", hash = "sha256:2cdd67f8746e048f00df47a2880f4d6acbcdb399031b604e34ba8f71d5787680"}, -] -blinker = [ - {file = "blinker-1.6.2-py3-none-any.whl", hash = "sha256:c3d739772abb7bc2860abf5f2ec284223d9ad5c76da018234f6f50d6f31ab1f0"}, - {file = "blinker-1.6.2.tar.gz", hash = "sha256:4afd3de66ef3a9f8067559fb7a1cbe555c17dcbe15971b05d1b625c3e7abe213"}, -] -boto3 = [ - {file = "boto3-1.26.161-py3-none-any.whl", hash = "sha256:f66e5c9dbe7f34383bcf64fa6070771355c11a44dd75c7f1279f2f37e1c89183"}, - {file = "boto3-1.26.161.tar.gz", hash = "sha256:662731e464d14af1035f44fc6a46b0e3112ee011ac0a5ed416d205daa3e15f25"}, -] -boto3-stubs = [ - {file = "boto3-stubs-1.28.40.tar.gz", hash = "sha256:76079a82f199087319762c931f13506e02129132e80257dab0888d3da7dc11c7"}, - {file = "boto3_stubs-1.28.40-py3-none-any.whl", hash = "sha256:bd1d1cbdcbf18902a090d4a746cdecef2a7ebe31cf9a474bbe407d57eaa79a6a"}, -] -botocore = [ - {file = "botocore-1.29.161-py3-none-any.whl", hash = "sha256:b906999dd53dda2ef0ef6f7f55fcc81a4b06b9f1c8a9f65c546e0b981f959f5f"}, - {file = "botocore-1.29.161.tar.gz", hash = "sha256:a50edd715eb510343e27849f36483804aae4b871590db4d4996aa53368dcac40"}, -] -botocore-stubs = [ - {file = "botocore_stubs-1.31.40-py3-none-any.whl", hash = "sha256:aab534d7e7949cd543bc9b2fadc1a36712033cb00e6f31e2475eefe8486d19ae"}, - {file = "botocore_stubs-1.31.40.tar.gz", hash = "sha256:2001a253daf4ae2e171e6137b9982a00a7fbfc7a53449a16856dc049e7cd5214"}, -] -cachelib = [ - {file = "cachelib-0.9.0-py3-none-any.whl", hash = "sha256:811ceeb1209d2fe51cd2b62810bd1eccf70feba5c52641532498be5c675493b3"}, - {file = "cachelib-0.9.0.tar.gz", hash = "sha256:38222cc7c1b79a23606de5c2607f4925779e37cdcea1c2ad21b8bae94b5425a5"}, -] -cachetools = [ - {file = "cachetools-5.3.1-py3-none-any.whl", hash = "sha256:95ef631eeaea14ba2e36f06437f36463aac3a096799e876ee55e5cdccb102590"}, - {file = "cachetools-5.3.1.tar.gz", hash = "sha256:dce83f2d9b4e1f732a8cd44af8e8fab2dbe46201467fc98b3ef8f269092bf62b"}, -] -cattrs = [ - {file = "cattrs-23.1.2-py3-none-any.whl", hash = "sha256:b2bb14311ac17bed0d58785e5a60f022e5431aca3932e3fc5cc8ed8639de50a4"}, - {file = "cattrs-23.1.2.tar.gz", hash = "sha256:db1c821b8c537382b2c7c66678c3790091ca0275ac486c76f3c8f3920e83c657"}, -] -certifi = [ - {file = "certifi-2023.7.22-py3-none-any.whl", hash = "sha256:92d6037539857d8206b8f6ae472e8b77db8058fec5937a1ef3f54304089edbb9"}, - {file = "certifi-2023.7.22.tar.gz", hash = "sha256:539cc1d13202e33ca466e88b2807e29f4c13049d6d87031a3c110744495cb082"}, -] -cffi = [ - {file = "cffi-1.15.1-cp27-cp27m-macosx_10_9_x86_64.whl", hash = "sha256:a66d3508133af6e8548451b25058d5812812ec3798c886bf38ed24a98216fab2"}, - {file = "cffi-1.15.1-cp27-cp27m-manylinux1_i686.whl", hash = "sha256:470c103ae716238bbe698d67ad020e1db9d9dba34fa5a899b5e21577e6d52ed2"}, - {file = "cffi-1.15.1-cp27-cp27m-manylinux1_x86_64.whl", hash = "sha256:9ad5db27f9cabae298d151c85cf2bad1d359a1b9c686a275df03385758e2f914"}, - {file = "cffi-1.15.1-cp27-cp27m-win32.whl", hash = "sha256:b3bbeb01c2b273cca1e1e0c5df57f12dce9a4dd331b4fa1635b8bec26350bde3"}, - {file = "cffi-1.15.1-cp27-cp27m-win_amd64.whl", hash = "sha256:e00b098126fd45523dd056d2efba6c5a63b71ffe9f2bbe1a4fe1716e1d0c331e"}, - {file = "cffi-1.15.1-cp27-cp27mu-manylinux1_i686.whl", hash = "sha256:d61f4695e6c866a23a21acab0509af1cdfd2c013cf256bbf5b6b5e2695827162"}, - {file = "cffi-1.15.1-cp27-cp27mu-manylinux1_x86_64.whl", hash = "sha256:ed9cb427ba5504c1dc15ede7d516b84757c3e3d7868ccc85121d9310d27eed0b"}, - {file = "cffi-1.15.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:39d39875251ca8f612b6f33e6b1195af86d1b3e60086068be9cc053aa4376e21"}, - {file = "cffi-1.15.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:285d29981935eb726a4399badae8f0ffdff4f5050eaa6d0cfc3f64b857b77185"}, - {file = "cffi-1.15.1-cp310-cp310-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3eb6971dcff08619f8d91607cfc726518b6fa2a9eba42856be181c6d0d9515fd"}, - {file = "cffi-1.15.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:21157295583fe8943475029ed5abdcf71eb3911894724e360acff1d61c1d54bc"}, - {file = "cffi-1.15.1-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5635bd9cb9731e6d4a1132a498dd34f764034a8ce60cef4f5319c0541159392f"}, - {file = "cffi-1.15.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2012c72d854c2d03e45d06ae57f40d78e5770d252f195b93f581acf3ba44496e"}, - {file = "cffi-1.15.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dd86c085fae2efd48ac91dd7ccffcfc0571387fe1193d33b6394db7ef31fe2a4"}, - {file = "cffi-1.15.1-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:fa6693661a4c91757f4412306191b6dc88c1703f780c8234035eac011922bc01"}, - {file = "cffi-1.15.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:59c0b02d0a6c384d453fece7566d1c7e6b7bae4fc5874ef2ef46d56776d61c9e"}, - {file = "cffi-1.15.1-cp310-cp310-win32.whl", hash = "sha256:cba9d6b9a7d64d4bd46167096fc9d2f835e25d7e4c121fb2ddfc6528fb0413b2"}, - {file = "cffi-1.15.1-cp310-cp310-win_amd64.whl", hash = "sha256:ce4bcc037df4fc5e3d184794f27bdaab018943698f4ca31630bc7f84a7b69c6d"}, - {file = "cffi-1.15.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:3d08afd128ddaa624a48cf2b859afef385b720bb4b43df214f85616922e6a5ac"}, - {file = "cffi-1.15.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:3799aecf2e17cf585d977b780ce79ff0dc9b78d799fc694221ce814c2c19db83"}, - {file = "cffi-1.15.1-cp311-cp311-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a591fe9e525846e4d154205572a029f653ada1a78b93697f3b5a8f1f2bc055b9"}, - {file = "cffi-1.15.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3548db281cd7d2561c9ad9984681c95f7b0e38881201e157833a2342c30d5e8c"}, - {file = "cffi-1.15.1-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:91fc98adde3d7881af9b59ed0294046f3806221863722ba7d8d120c575314325"}, - {file = "cffi-1.15.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:94411f22c3985acaec6f83c6df553f2dbe17b698cc7f8ae751ff2237d96b9e3c"}, - {file = "cffi-1.15.1-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:03425bdae262c76aad70202debd780501fabeaca237cdfddc008987c0e0f59ef"}, - {file = "cffi-1.15.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:cc4d65aeeaa04136a12677d3dd0b1c0c94dc43abac5860ab33cceb42b801c1e8"}, - {file = "cffi-1.15.1-cp311-cp311-win32.whl", hash = "sha256:a0f100c8912c114ff53e1202d0078b425bee3649ae34d7b070e9697f93c5d52d"}, - {file = "cffi-1.15.1-cp311-cp311-win_amd64.whl", hash = "sha256:04ed324bda3cda42b9b695d51bb7d54b680b9719cfab04227cdd1e04e5de3104"}, - {file = "cffi-1.15.1-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:50a74364d85fd319352182ef59c5c790484a336f6db772c1a9231f1c3ed0cbd7"}, - {file = "cffi-1.15.1-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e263d77ee3dd201c3a142934a086a4450861778baaeeb45db4591ef65550b0a6"}, - {file = "cffi-1.15.1-cp36-cp36m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:cec7d9412a9102bdc577382c3929b337320c4c4c4849f2c5cdd14d7368c5562d"}, - {file = "cffi-1.15.1-cp36-cp36m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:4289fc34b2f5316fbb762d75362931e351941fa95fa18789191b33fc4cf9504a"}, - {file = "cffi-1.15.1-cp36-cp36m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:173379135477dc8cac4bc58f45db08ab45d228b3363adb7af79436135d028405"}, - {file = "cffi-1.15.1-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:6975a3fac6bc83c4a65c9f9fcab9e47019a11d3d2cf7f3c0d03431bf145a941e"}, - {file = "cffi-1.15.1-cp36-cp36m-win32.whl", hash = "sha256:2470043b93ff09bf8fb1d46d1cb756ce6132c54826661a32d4e4d132e1977adf"}, - {file = "cffi-1.15.1-cp36-cp36m-win_amd64.whl", hash = "sha256:30d78fbc8ebf9c92c9b7823ee18eb92f2e6ef79b45ac84db507f52fbe3ec4497"}, - {file = "cffi-1.15.1-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:198caafb44239b60e252492445da556afafc7d1e3ab7a1fb3f0584ef6d742375"}, - {file = "cffi-1.15.1-cp37-cp37m-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:5ef34d190326c3b1f822a5b7a45f6c4535e2f47ed06fec77d3d799c450b2651e"}, - {file = "cffi-1.15.1-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8102eaf27e1e448db915d08afa8b41d6c7ca7a04b7d73af6514df10a3e74bd82"}, - {file = "cffi-1.15.1-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5df2768244d19ab7f60546d0c7c63ce1581f7af8b5de3eb3004b9b6fc8a9f84b"}, - {file = "cffi-1.15.1-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a8c4917bd7ad33e8eb21e9a5bbba979b49d9a97acb3a803092cbc1133e20343c"}, - {file = "cffi-1.15.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0e2642fe3142e4cc4af0799748233ad6da94c62a8bec3a6648bf8ee68b1c7426"}, - {file = "cffi-1.15.1-cp37-cp37m-win32.whl", hash = "sha256:e229a521186c75c8ad9490854fd8bbdd9a0c9aa3a524326b55be83b54d4e0ad9"}, - {file = "cffi-1.15.1-cp37-cp37m-win_amd64.whl", hash = "sha256:a0b71b1b8fbf2b96e41c4d990244165e2c9be83d54962a9a1d118fd8657d2045"}, - {file = "cffi-1.15.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:320dab6e7cb2eacdf0e658569d2575c4dad258c0fcc794f46215e1e39f90f2c3"}, - {file = "cffi-1.15.1-cp38-cp38-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1e74c6b51a9ed6589199c787bf5f9875612ca4a8a0785fb2d4a84429badaf22a"}, - {file = "cffi-1.15.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a5c84c68147988265e60416b57fc83425a78058853509c1b0629c180094904a5"}, - {file = "cffi-1.15.1-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3b926aa83d1edb5aa5b427b4053dc420ec295a08e40911296b9eb1b6170f6cca"}, - {file = "cffi-1.15.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:87c450779d0914f2861b8526e035c5e6da0a3199d8f1add1a665e1cbc6fc6d02"}, - {file = "cffi-1.15.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4f2c9f67e9821cad2e5f480bc8d83b8742896f1242dba247911072d4fa94c192"}, - {file = "cffi-1.15.1-cp38-cp38-win32.whl", hash = "sha256:8b7ee99e510d7b66cdb6c593f21c043c248537a32e0bedf02e01e9553a172314"}, - {file = "cffi-1.15.1-cp38-cp38-win_amd64.whl", hash = "sha256:00a9ed42e88df81ffae7a8ab6d9356b371399b91dbdf0c3cb1e84c03a13aceb5"}, - {file = "cffi-1.15.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:54a2db7b78338edd780e7ef7f9f6c442500fb0d41a5a4ea24fff1c929d5af585"}, - {file = "cffi-1.15.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:fcd131dd944808b5bdb38e6f5b53013c5aa4f334c5cad0c72742f6eba4b73db0"}, - {file = "cffi-1.15.1-cp39-cp39-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7473e861101c9e72452f9bf8acb984947aa1661a7704553a9f6e4baa5ba64415"}, - {file = "cffi-1.15.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6c9a799e985904922a4d207a94eae35c78ebae90e128f0c4e521ce339396be9d"}, - {file = "cffi-1.15.1-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3bcde07039e586f91b45c88f8583ea7cf7a0770df3a1649627bf598332cb6984"}, - {file = "cffi-1.15.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:33ab79603146aace82c2427da5ca6e58f2b3f2fb5da893ceac0c42218a40be35"}, - {file = "cffi-1.15.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5d598b938678ebf3c67377cdd45e09d431369c3b1a5b331058c338e201f12b27"}, - {file = "cffi-1.15.1-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:db0fbb9c62743ce59a9ff687eb5f4afbe77e5e8403d6697f7446e5f609976f76"}, - {file = "cffi-1.15.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:98d85c6a2bef81588d9227dde12db8a7f47f639f4a17c9ae08e773aa9c697bf3"}, - {file = "cffi-1.15.1-cp39-cp39-win32.whl", hash = "sha256:40f4774f5a9d4f5e344f31a32b5096977b5d48560c5592e2f3d2c4374bd543ee"}, - {file = "cffi-1.15.1-cp39-cp39-win_amd64.whl", hash = "sha256:70df4e3b545a17496c9b3f41f5115e69a4f2e77e94e1d2a8e1070bc0c38c8a3c"}, - {file = "cffi-1.15.1.tar.gz", hash = "sha256:d400bfb9a37b1351253cb402671cea7e89bdecc294e8016a707f6d1d8ac934f9"}, -] -chardet = [ - {file = "chardet-5.2.0-py3-none-any.whl", hash = "sha256:e1cf59446890a00105fe7b7912492ea04b6e6f06d4b742b2c788469e34c82970"}, - {file = "chardet-5.2.0.tar.gz", hash = "sha256:1b3b6ff479a8c414bc3fa2c0852995695c4a026dcd6d0633b2dd092ca39c1cf7"}, -] -charset-normalizer = [ - {file = "charset-normalizer-3.2.0.tar.gz", hash = "sha256:3bb3d25a8e6c0aedd251753a79ae98a093c7e7b471faa3aa9a93a81431987ace"}, - {file = "charset_normalizer-3.2.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:0b87549028f680ca955556e3bd57013ab47474c3124dc069faa0b6545b6c9710"}, - {file = "charset_normalizer-3.2.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:7c70087bfee18a42b4040bb9ec1ca15a08242cf5867c58726530bdf3945672ed"}, - {file = "charset_normalizer-3.2.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:a103b3a7069b62f5d4890ae1b8f0597618f628b286b03d4bc9195230b154bfa9"}, - {file = "charset_normalizer-3.2.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:94aea8eff76ee6d1cdacb07dd2123a68283cb5569e0250feab1240058f53b623"}, - {file = "charset_normalizer-3.2.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:db901e2ac34c931d73054d9797383d0f8009991e723dab15109740a63e7f902a"}, - {file = "charset_normalizer-3.2.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b0dac0ff919ba34d4df1b6131f59ce95b08b9065233446be7e459f95554c0dc8"}, - {file = "charset_normalizer-3.2.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:193cbc708ea3aca45e7221ae58f0fd63f933753a9bfb498a3b474878f12caaad"}, - {file = "charset_normalizer-3.2.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:09393e1b2a9461950b1c9a45d5fd251dc7c6f228acab64da1c9c0165d9c7765c"}, - {file = "charset_normalizer-3.2.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:baacc6aee0b2ef6f3d308e197b5d7a81c0e70b06beae1f1fcacffdbd124fe0e3"}, - {file = "charset_normalizer-3.2.0-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:bf420121d4c8dce6b889f0e8e4ec0ca34b7f40186203f06a946fa0276ba54029"}, - {file = "charset_normalizer-3.2.0-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:c04a46716adde8d927adb9457bbe39cf473e1e2c2f5d0a16ceb837e5d841ad4f"}, - {file = "charset_normalizer-3.2.0-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:aaf63899c94de41fe3cf934601b0f7ccb6b428c6e4eeb80da72c58eab077b19a"}, - {file = "charset_normalizer-3.2.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:d62e51710986674142526ab9f78663ca2b0726066ae26b78b22e0f5e571238dd"}, - {file = "charset_normalizer-3.2.0-cp310-cp310-win32.whl", hash = "sha256:04e57ab9fbf9607b77f7d057974694b4f6b142da9ed4a199859d9d4d5c63fe96"}, - {file = "charset_normalizer-3.2.0-cp310-cp310-win_amd64.whl", hash = "sha256:48021783bdf96e3d6de03a6e39a1171ed5bd7e8bb93fc84cc649d11490f87cea"}, - {file = "charset_normalizer-3.2.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:4957669ef390f0e6719db3613ab3a7631e68424604a7b448f079bee145da6e09"}, - {file = "charset_normalizer-3.2.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:46fb8c61d794b78ec7134a715a3e564aafc8f6b5e338417cb19fe9f57a5a9bf2"}, - {file = "charset_normalizer-3.2.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:f779d3ad205f108d14e99bb3859aa7dd8e9c68874617c72354d7ecaec2a054ac"}, - {file = "charset_normalizer-3.2.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f25c229a6ba38a35ae6e25ca1264621cc25d4d38dca2942a7fce0b67a4efe918"}, - {file = "charset_normalizer-3.2.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:2efb1bd13885392adfda4614c33d3b68dee4921fd0ac1d3988f8cbb7d589e72a"}, - {file = "charset_normalizer-3.2.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:1f30b48dd7fa1474554b0b0f3fdfdd4c13b5c737a3c6284d3cdc424ec0ffff3a"}, - {file = "charset_normalizer-3.2.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:246de67b99b6851627d945db38147d1b209a899311b1305dd84916f2b88526c6"}, - {file = "charset_normalizer-3.2.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:9bd9b3b31adcb054116447ea22caa61a285d92e94d710aa5ec97992ff5eb7cf3"}, - {file = "charset_normalizer-3.2.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:8c2f5e83493748286002f9369f3e6607c565a6a90425a3a1fef5ae32a36d749d"}, - {file = "charset_normalizer-3.2.0-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:3170c9399da12c9dc66366e9d14da8bf7147e1e9d9ea566067bbce7bb74bd9c2"}, - {file = "charset_normalizer-3.2.0-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:7a4826ad2bd6b07ca615c74ab91f32f6c96d08f6fcc3902ceeedaec8cdc3bcd6"}, - {file = "charset_normalizer-3.2.0-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:3b1613dd5aee995ec6d4c69f00378bbd07614702a315a2cf6c1d21461fe17c23"}, - {file = "charset_normalizer-3.2.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:9e608aafdb55eb9f255034709e20d5a83b6d60c054df0802fa9c9883d0a937aa"}, - {file = "charset_normalizer-3.2.0-cp311-cp311-win32.whl", hash = "sha256:f2a1d0fd4242bd8643ce6f98927cf9c04540af6efa92323e9d3124f57727bfc1"}, - {file = "charset_normalizer-3.2.0-cp311-cp311-win_amd64.whl", hash = "sha256:681eb3d7e02e3c3655d1b16059fbfb605ac464c834a0c629048a30fad2b27489"}, - {file = "charset_normalizer-3.2.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:c57921cda3a80d0f2b8aec7e25c8aa14479ea92b5b51b6876d975d925a2ea346"}, - {file = "charset_normalizer-3.2.0-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:41b25eaa7d15909cf3ac4c96088c1f266a9a93ec44f87f1d13d4a0e86c81b982"}, - {file = "charset_normalizer-3.2.0-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f058f6963fd82eb143c692cecdc89e075fa0828db2e5b291070485390b2f1c9c"}, - {file = "charset_normalizer-3.2.0-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a7647ebdfb9682b7bb97e2a5e7cb6ae735b1c25008a70b906aecca294ee96cf4"}, - {file = "charset_normalizer-3.2.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:eef9df1eefada2c09a5e7a40991b9fc6ac6ef20b1372abd48d2794a316dc0449"}, - {file = "charset_normalizer-3.2.0-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e03b8895a6990c9ab2cdcd0f2fe44088ca1c65ae592b8f795c3294af00a461c3"}, - {file = "charset_normalizer-3.2.0-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:ee4006268ed33370957f55bf2e6f4d263eaf4dc3cfc473d1d90baff6ed36ce4a"}, - {file = "charset_normalizer-3.2.0-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:c4983bf937209c57240cff65906b18bb35e64ae872da6a0db937d7b4af845dd7"}, - {file = "charset_normalizer-3.2.0-cp37-cp37m-musllinux_1_1_ppc64le.whl", hash = "sha256:3bb7fda7260735efe66d5107fb7e6af6a7c04c7fce9b2514e04b7a74b06bf5dd"}, - {file = "charset_normalizer-3.2.0-cp37-cp37m-musllinux_1_1_s390x.whl", hash = "sha256:72814c01533f51d68702802d74f77ea026b5ec52793c791e2da806a3844a46c3"}, - {file = "charset_normalizer-3.2.0-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:70c610f6cbe4b9fce272c407dd9d07e33e6bf7b4aa1b7ffb6f6ded8e634e3592"}, - {file = "charset_normalizer-3.2.0-cp37-cp37m-win32.whl", hash = "sha256:a401b4598e5d3f4a9a811f3daf42ee2291790c7f9d74b18d75d6e21dda98a1a1"}, - {file = "charset_normalizer-3.2.0-cp37-cp37m-win_amd64.whl", hash = "sha256:c0b21078a4b56965e2b12f247467b234734491897e99c1d51cee628da9786959"}, - {file = "charset_normalizer-3.2.0-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:95eb302ff792e12aba9a8b8f8474ab229a83c103d74a750ec0bd1c1eea32e669"}, - {file = "charset_normalizer-3.2.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:1a100c6d595a7f316f1b6f01d20815d916e75ff98c27a01ae817439ea7726329"}, - {file = "charset_normalizer-3.2.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:6339d047dab2780cc6220f46306628e04d9750f02f983ddb37439ca47ced7149"}, - {file = "charset_normalizer-3.2.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e4b749b9cc6ee664a3300bb3a273c1ca8068c46be705b6c31cf5d276f8628a94"}, - {file = "charset_normalizer-3.2.0-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a38856a971c602f98472050165cea2cdc97709240373041b69030be15047691f"}, - {file = "charset_normalizer-3.2.0-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f87f746ee241d30d6ed93969de31e5ffd09a2961a051e60ae6bddde9ec3583aa"}, - {file = "charset_normalizer-3.2.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:89f1b185a01fe560bc8ae5f619e924407efca2191b56ce749ec84982fc59a32a"}, - {file = "charset_normalizer-3.2.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e1c8a2f4c69e08e89632defbfabec2feb8a8d99edc9f89ce33c4b9e36ab63037"}, - {file = "charset_normalizer-3.2.0-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:2f4ac36d8e2b4cc1aa71df3dd84ff8efbe3bfb97ac41242fbcfc053c67434f46"}, - {file = "charset_normalizer-3.2.0-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:a386ebe437176aab38c041de1260cd3ea459c6ce5263594399880bbc398225b2"}, - {file = "charset_normalizer-3.2.0-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:ccd16eb18a849fd8dcb23e23380e2f0a354e8daa0c984b8a732d9cfaba3a776d"}, - {file = "charset_normalizer-3.2.0-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:e6a5bf2cba5ae1bb80b154ed68a3cfa2fa00fde979a7f50d6598d3e17d9ac20c"}, - {file = "charset_normalizer-3.2.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:45de3f87179c1823e6d9e32156fb14c1927fcc9aba21433f088fdfb555b77c10"}, - {file = "charset_normalizer-3.2.0-cp38-cp38-win32.whl", hash = "sha256:1000fba1057b92a65daec275aec30586c3de2401ccdcd41f8a5c1e2c87078706"}, - {file = "charset_normalizer-3.2.0-cp38-cp38-win_amd64.whl", hash = "sha256:8b2c760cfc7042b27ebdb4a43a4453bd829a5742503599144d54a032c5dc7e9e"}, - {file = "charset_normalizer-3.2.0-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:855eafa5d5a2034b4621c74925d89c5efef61418570e5ef9b37717d9c796419c"}, - {file = "charset_normalizer-3.2.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:203f0c8871d5a7987be20c72442488a0b8cfd0f43b7973771640fc593f56321f"}, - {file = "charset_normalizer-3.2.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:e857a2232ba53ae940d3456f7533ce6ca98b81917d47adc3c7fd55dad8fab858"}, - {file = "charset_normalizer-3.2.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5e86d77b090dbddbe78867a0275cb4df08ea195e660f1f7f13435a4649e954e5"}, - {file = "charset_normalizer-3.2.0-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c4fb39a81950ec280984b3a44f5bd12819953dc5fa3a7e6fa7a80db5ee853952"}, - {file = "charset_normalizer-3.2.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2dee8e57f052ef5353cf608e0b4c871aee320dd1b87d351c28764fc0ca55f9f4"}, - {file = "charset_normalizer-3.2.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8700f06d0ce6f128de3ccdbc1acaea1ee264d2caa9ca05daaf492fde7c2a7200"}, - {file = "charset_normalizer-3.2.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1920d4ff15ce893210c1f0c0e9d19bfbecb7983c76b33f046c13a8ffbd570252"}, - {file = "charset_normalizer-3.2.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:c1c76a1743432b4b60ab3358c937a3fe1341c828ae6194108a94c69028247f22"}, - {file = "charset_normalizer-3.2.0-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:f7560358a6811e52e9c4d142d497f1a6e10103d3a6881f18d04dbce3729c0e2c"}, - {file = "charset_normalizer-3.2.0-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:c8063cf17b19661471ecbdb3df1c84f24ad2e389e326ccaf89e3fb2484d8dd7e"}, - {file = "charset_normalizer-3.2.0-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:cd6dbe0238f7743d0efe563ab46294f54f9bc8f4b9bcf57c3c666cc5bc9d1299"}, - {file = "charset_normalizer-3.2.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:1249cbbf3d3b04902ff081ffbb33ce3377fa6e4c7356f759f3cd076cc138d020"}, - {file = "charset_normalizer-3.2.0-cp39-cp39-win32.whl", hash = "sha256:6c409c0deba34f147f77efaa67b8e4bb83d2f11c8806405f76397ae5b8c0d1c9"}, - {file = "charset_normalizer-3.2.0-cp39-cp39-win_amd64.whl", hash = "sha256:7095f6fbfaa55defb6b733cfeb14efaae7a29f0b59d8cf213be4e7ca0b857b80"}, - {file = "charset_normalizer-3.2.0-py3-none-any.whl", hash = "sha256:8e098148dd37b4ce3baca71fb394c81dc5d9c7728c95df695d2dca218edf40e6"}, -] -click = [ - {file = "click-8.1.7-py3-none-any.whl", hash = "sha256:ae74fb96c20a0277a1d615f1e4d73c8414f5a98db8b799a7931d1582f3390c28"}, - {file = "click-8.1.7.tar.gz", hash = "sha256:ca9853ad459e787e2192211578cc907e7594e294c7ccc834310722b41b9ca6de"}, -] -clickclick = [ - {file = "clickclick-20.10.2-py2.py3-none-any.whl", hash = "sha256:c8f33e6d9ec83f68416dd2136a7950125bd256ec39ccc9a85c6e280a16be2bb5"}, - {file = "clickclick-20.10.2.tar.gz", hash = "sha256:4efb13e62353e34c5eef7ed6582c4920b418d7dedc86d819e22ee089ba01802c"}, -] -colorama = [ - {file = "colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6"}, - {file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"}, -] -coloredlogs = [ - {file = "coloredlogs-15.0.1-py2.py3-none-any.whl", hash = "sha256:612ee75c546f53e92e70049c9dbfcc18c935a2b9a53b66085ce9ef6a6e5c0934"}, - {file = "coloredlogs-15.0.1.tar.gz", hash = "sha256:7c991aa71a4577af2f82600d8f8f3a89f936baeaf9b50a9c197da014e5bf16b0"}, -] -colorlog = [ - {file = "colorlog-4.8.0-py2.py3-none-any.whl", hash = "sha256:3dd15cb27e8119a24c1a7b5c93f9f3b455855e0f73993b1c25921b2f646f1dcd"}, - {file = "colorlog-4.8.0.tar.gz", hash = "sha256:59b53160c60902c405cdec28d38356e09d40686659048893e026ecbd589516b1"}, -] -configupdater = [ - {file = "ConfigUpdater-3.1.1-py2.py3-none-any.whl", hash = "sha256:805986dbeba317886c7a8d348b2e34986dc9e3128cd3761ecc35decbd372b286"}, - {file = "ConfigUpdater-3.1.1.tar.gz", hash = "sha256:46f0c74d73efa723776764b43c9739f68052495dd3d734319c1d0eb58511f15b"}, -] -connectorx = [ - {file = "connectorx-0.3.2-cp310-cp310-macosx_10_7_x86_64.whl", hash = "sha256:98274242c64a2831a8b1c86e0fa2c46a557dd8cbcf00c3adcf5a602455fb02d7"}, - {file = "connectorx-0.3.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:e2b11ba49efd330a7348bef3ce09c98218eea21d92a12dd75cd8f0ade5c99ffc"}, - {file = "connectorx-0.3.2-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:3f6431a30304271f9137bd7854d2850231041f95164c6b749d9ede4c0d92d10c"}, - {file = "connectorx-0.3.2-cp310-none-win_amd64.whl", hash = "sha256:b370ebe8f44d2049254dd506f17c62322cc2db1b782a57f22cce01ddcdcc8fed"}, - {file = "connectorx-0.3.2-cp311-cp311-macosx_10_7_x86_64.whl", hash = "sha256:d5277fc936a80da3d1dcf889020e45da3493179070d9be8a47500c7001fab967"}, - {file = "connectorx-0.3.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:8cc6c963237c3d3b02f7dcd47e1be9fc6e8b93ef0aeed8694f65c62b3c4688a1"}, - {file = "connectorx-0.3.2-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:9403902685b3423cba786db01a36f36efef90ae3d429e45b74dadb4ae9e328dc"}, - {file = "connectorx-0.3.2-cp311-none-win_amd64.whl", hash = "sha256:6b5f518194a2cf12d5ad031d488ded4e4678eff3b63551856f2a6f1a83197bb8"}, - {file = "connectorx-0.3.2-cp38-cp38-macosx_10_7_x86_64.whl", hash = "sha256:a5602ae0531e55c58af8cfca92b8e9454fc1ccd82c801cff8ee0f17c728b4988"}, - {file = "connectorx-0.3.2-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:7c5959bfb4a049bb8ce1f590b5824cd1105460b6552ffec336c4bd740eebd5bd"}, - {file = "connectorx-0.3.2-cp38-cp38-manylinux_2_28_x86_64.whl", hash = "sha256:c4387bb27ba3acde0ab6921fdafa3811e09fce0db3d1f1ede8547d9de3aab685"}, - {file = "connectorx-0.3.2-cp38-none-win_amd64.whl", hash = "sha256:4b1920c191be9a372629c31c92d5f71fc63f49f283e5adfc4111169de40427d9"}, - {file = "connectorx-0.3.2-cp39-cp39-macosx_10_7_x86_64.whl", hash = "sha256:4473fc06ac3618c673cea63a7050e721fe536782d5c1b6e433589c37a63de704"}, - {file = "connectorx-0.3.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:4009b16399457340326137a223921a24e3e166b45db4dbf3ef637b9981914dc2"}, - {file = "connectorx-0.3.2-cp39-cp39-manylinux_2_28_x86_64.whl", hash = "sha256:74f5b93535663cf47f9fc3d7964f93e652c07003fa71c38d7a68f42167f54bba"}, - {file = "connectorx-0.3.2-cp39-none-win_amd64.whl", hash = "sha256:0b80acca13326856c14ee726b47699011ab1baa10897180240c8783423ca5e8c"}, -] -connexion = [ - {file = "connexion-2.14.1-py2.py3-none-any.whl", hash = "sha256:f343717241b4c4802a694c38fee66fb1693c897fe4ea5a957fa9b3b07caf6394"}, - {file = "connexion-2.14.1.tar.gz", hash = "sha256:99aa5781e70a7b94f8ffae8cf89f309d49cdb811bbd65a8e2f2546f3b19a01e6"}, -] -cron-descriptor = [ - {file = "cron_descriptor-1.4.0.tar.gz", hash = "sha256:b6ff4e3a988d7ca04a4ab150248e9f166fb7a5c828a85090e75bcc25aa93b4dd"}, -] -croniter = [ - {file = "croniter-1.4.1-py2.py3-none-any.whl", hash = "sha256:9595da48af37ea06ec3a9f899738f1b2c1c13da3c38cea606ef7cd03ea421128"}, - {file = "croniter-1.4.1.tar.gz", hash = "sha256:1a6df60eacec3b7a0aa52a8f2ef251ae3dd2a7c7c8b9874e73e791636d55a361"}, -] -cryptography = [ - {file = "cryptography-41.0.7-cp37-abi3-macosx_10_12_universal2.whl", hash = "sha256:3c78451b78313fa81607fa1b3f1ae0a5ddd8014c38a02d9db0616133987b9cdf"}, - {file = "cryptography-41.0.7-cp37-abi3-macosx_10_12_x86_64.whl", hash = "sha256:928258ba5d6f8ae644e764d0f996d61a8777559f72dfeb2eea7e2fe0ad6e782d"}, - {file = "cryptography-41.0.7-cp37-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5a1b41bc97f1ad230a41657d9155113c7521953869ae57ac39ac7f1bb471469a"}, - {file = "cryptography-41.0.7-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:841df4caa01008bad253bce2a6f7b47f86dc9f08df4b433c404def869f590a15"}, - {file = "cryptography-41.0.7-cp37-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:5429ec739a29df2e29e15d082f1d9ad683701f0ec7709ca479b3ff2708dae65a"}, - {file = "cryptography-41.0.7-cp37-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:43f2552a2378b44869fe8827aa19e69512e3245a219104438692385b0ee119d1"}, - {file = "cryptography-41.0.7-cp37-abi3-musllinux_1_1_aarch64.whl", hash = "sha256:af03b32695b24d85a75d40e1ba39ffe7db7ffcb099fe507b39fd41a565f1b157"}, - {file = "cryptography-41.0.7-cp37-abi3-musllinux_1_1_x86_64.whl", hash = "sha256:49f0805fc0b2ac8d4882dd52f4a3b935b210935d500b6b805f321addc8177406"}, - {file = "cryptography-41.0.7-cp37-abi3-win32.whl", hash = "sha256:f983596065a18a2183e7f79ab3fd4c475205b839e02cbc0efbbf9666c4b3083d"}, - {file = "cryptography-41.0.7-cp37-abi3-win_amd64.whl", hash = "sha256:90452ba79b8788fa380dfb587cca692976ef4e757b194b093d845e8d99f612f2"}, - {file = "cryptography-41.0.7-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:079b85658ea2f59c4f43b70f8119a52414cdb7be34da5d019a77bf96d473b960"}, - {file = "cryptography-41.0.7-pp310-pypy310_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:b640981bf64a3e978a56167594a0e97db71c89a479da8e175d8bb5be5178c003"}, - {file = "cryptography-41.0.7-pp310-pypy310_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:e3114da6d7f95d2dee7d3f4eec16dacff819740bbab931aff8648cb13c5ff5e7"}, - {file = "cryptography-41.0.7-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:d5ec85080cce7b0513cfd233914eb8b7bbd0633f1d1703aa28d1dd5a72f678ec"}, - {file = "cryptography-41.0.7-pp38-pypy38_pp73-macosx_10_12_x86_64.whl", hash = "sha256:7a698cb1dac82c35fcf8fe3417a3aaba97de16a01ac914b89a0889d364d2f6be"}, - {file = "cryptography-41.0.7-pp38-pypy38_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:37a138589b12069efb424220bf78eac59ca68b95696fc622b6ccc1c0a197204a"}, - {file = "cryptography-41.0.7-pp38-pypy38_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:68a2dec79deebc5d26d617bfdf6e8aab065a4f34934b22d3b5010df3ba36612c"}, - {file = "cryptography-41.0.7-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:09616eeaef406f99046553b8a40fbf8b1e70795a91885ba4c96a70793de5504a"}, - {file = "cryptography-41.0.7-pp39-pypy39_pp73-macosx_10_12_x86_64.whl", hash = "sha256:48a0476626da912a44cc078f9893f292f0b3e4c739caf289268168d8f4702a39"}, - {file = "cryptography-41.0.7-pp39-pypy39_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:c7f3201ec47d5207841402594f1d7950879ef890c0c495052fa62f58283fde1a"}, - {file = "cryptography-41.0.7-pp39-pypy39_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:c5ca78485a255e03c32b513f8c2bc39fedb7f5c5f8535545bdc223a03b24f248"}, - {file = "cryptography-41.0.7-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:d6c391c021ab1f7a82da5d8d0b3cee2f4b2c455ec86c8aebbc84837a631ff309"}, - {file = "cryptography-41.0.7.tar.gz", hash = "sha256:13f93ce9bea8016c253b34afc6bd6a75993e5c40672ed5405a9c832f0d4a00bc"}, -] -databind-core = [ - {file = "databind.core-4.4.0-py3-none-any.whl", hash = "sha256:3c8a4d9abc93e158af9931d8cec389ddfc0514e02aec03b397948d243db11881"}, - {file = "databind.core-4.4.0.tar.gz", hash = "sha256:715d485e934c073f819f0250bbfcaf59c1319f83427365bc7cfd4c347f87576d"}, -] -databind-json = [ - {file = "databind.json-4.4.0-py3-none-any.whl", hash = "sha256:df8874118cfba6fd0e77ec3d41a87e04e26034bd545230cab0db1fe904bf1b09"}, - {file = "databind.json-4.4.0.tar.gz", hash = "sha256:4356afdf0aeefcc053eda0888650c59cc558be2686f08a58324d675ccd023586"}, -] -databricks-sdk = [ - {file = "databricks-sdk-0.17.0.tar.gz", hash = "sha256:0a1baa6783aba9b034b9a017da8d0cf839ec61ae8318792b78bfb3db0374dd9c"}, - {file = "databricks_sdk-0.17.0-py3-none-any.whl", hash = "sha256:ad90e01c7b1a9d60a3de6a35606c79ac982e8972d3ad3ff89c251c24439c8bb9"}, -] -databricks-sql-connector = [ - {file = "databricks_sql_connector-2.9.3-py3-none-any.whl", hash = "sha256:e37b5aa8bea22e84a9920e87ad9ba6cafbe656008c180a790baa53b711dd9889"}, - {file = "databricks_sql_connector-2.9.3.tar.gz", hash = "sha256:09a1686de3470091e78640de276053d4e18f8c03ba3627ed45b368f78bf87db9"}, -] -dbt-athena-community = [ - {file = "dbt-athena-community-1.7.1.tar.gz", hash = "sha256:02c7bc461628e2adbfaf9d3f51fbe9a5cb5e06ee2ea8329259758518ceafdc12"}, - {file = "dbt_athena_community-1.7.1-py3-none-any.whl", hash = "sha256:2a376fa128e2bd98cb774fcbf718ebe4fbc9cac7857aa037b9e36bec75448361"}, -] -dbt-bigquery = [ - {file = "dbt-bigquery-1.7.2.tar.gz", hash = "sha256:27c7f492f65ab5d1d43432a4467a436fc3637e3cb72c5b4ab07ddf7573c43596"}, - {file = "dbt_bigquery-1.7.2-py3-none-any.whl", hash = "sha256:75015755363d9e8b8cebe190d59a5e08375032b37bcfec41ec8753e7dea29f6e"}, -] -dbt-core = [ - {file = "dbt-core-1.7.4.tar.gz", hash = "sha256:769b95949210cb0d1eafdb7be48b01e59984650403f86510fdee65bd0f70f76d"}, - {file = "dbt_core-1.7.4-py3-none-any.whl", hash = "sha256:50050ae44fe9bad63e1b639810ed3629822cdc7a2af0eff6e08461c94c4527c0"}, -] -dbt-databricks = [ - {file = "dbt-databricks-1.7.3.tar.gz", hash = "sha256:045e26240c825342259a59004c2e35e7773b0b6cbb255e6896bd46d3810f9607"}, - {file = "dbt_databricks-1.7.3-py3-none-any.whl", hash = "sha256:7c2b7bd7228a401d8262781749fc496c825fe6050e661e5ab3f1c66343e311cc"}, -] -dbt-duckdb = [ - {file = "dbt-duckdb-1.7.1.tar.gz", hash = "sha256:e59b3e58d7a461988d000892b75ce95245cdf899c847e3a430eb2e9e10e63bb9"}, - {file = "dbt_duckdb-1.7.1-py3-none-any.whl", hash = "sha256:bd75b1a72924b942794d0c3293a1159a01f21ab9d82c9f18b22c253dedad101a"}, -] -dbt-extractor = [ - {file = "dbt_extractor-0.5.1-cp38-abi3-macosx_10_12_x86_64.macosx_11_0_arm64.macosx_10_12_universal2.whl", hash = "sha256:3b91e6106b967d908b34f83929d3f50ee2b498876a1be9c055fe060ed728c556"}, - {file = "dbt_extractor-0.5.1-cp38-abi3-macosx_10_12_x86_64.whl", hash = "sha256:3614ce9f83ae4cd0dc95f77730034a793a1c090a52dcf698ba1c94050afe3a8b"}, - {file = "dbt_extractor-0.5.1-cp38-abi3-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:ea4edf33035d0a060b1e01c42fb2d99316457d44c954d6ed4eed9f1948664d87"}, - {file = "dbt_extractor-0.5.1-cp38-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d3b9bf50eb062b4344d9546fe42038996c6e7e7daa10724aa955d64717260e5d"}, - {file = "dbt_extractor-0.5.1-cp38-abi3-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:c0ce901d4ebf0664977e4e1cbf596d4afc6c1339fcc7d2cf67ce3481566a626f"}, - {file = "dbt_extractor-0.5.1-cp38-abi3-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:cbe338b76e9ffaa18275456e041af56c21bb517f6fbda7a58308138703da0996"}, - {file = "dbt_extractor-0.5.1-cp38-abi3-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1b25fa7a276ab26aa2d70ff6e0cf4cfb1490d7831fb57ee1337c24d2b0333b84"}, - {file = "dbt_extractor-0.5.1-cp38-abi3-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c5651e458be910ff567c0da3ea2eb084fd01884cc88888ac2cf1e240dcddacc2"}, - {file = "dbt_extractor-0.5.1-cp38-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:62e4f040fd338b652683421ce48e903812e27fd6e7af58b1b70a4e1f9f2c79e3"}, - {file = "dbt_extractor-0.5.1-cp38-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:91e25ad78f1f4feadd27587ebbcc46ad909cfad843118908f30336d08d8400ca"}, - {file = "dbt_extractor-0.5.1-cp38-abi3-musllinux_1_2_armv7l.whl", hash = "sha256:cdf9938b36cd098bcdd80f43dc03864da3f69f57d903a9160a32236540d4ddcd"}, - {file = "dbt_extractor-0.5.1-cp38-abi3-musllinux_1_2_i686.whl", hash = "sha256:475e2c05b17eb4976eff6c8f7635be42bec33f15a74ceb87a40242c94a99cebf"}, - {file = "dbt_extractor-0.5.1-cp38-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:100453ba06e169cbdb118234ab3f06f6722a2e0e316089b81c88dea701212abc"}, - {file = "dbt_extractor-0.5.1-cp38-abi3-win32.whl", hash = "sha256:6916aae085fd5f2af069fd6947933e78b742c9e3d2165e1740c2e28ae543309a"}, - {file = "dbt_extractor-0.5.1-cp38-abi3-win_amd64.whl", hash = "sha256:eecc08f3743e802a8ede60c89f7b2bce872acc86120cbc0ae7df229bb8a95083"}, - {file = "dbt_extractor-0.5.1.tar.gz", hash = "sha256:cd5d95576a8dea4190240aaf9936a37fd74b4b7913ca69a3c368fc4472bb7e13"}, -] -dbt-postgres = [ - {file = "dbt-postgres-1.7.4.tar.gz", hash = "sha256:16185b8de36d1a2052a2e4b85512306ab55085b1ea323a353d0dc3628473208d"}, - {file = "dbt_postgres-1.7.4-py3-none-any.whl", hash = "sha256:d414b070ca5e48925ea9ab12706bbb9e2294f7d4509c28e7af42268596334044"}, -] -dbt-redshift = [ - {file = "dbt-redshift-1.7.1.tar.gz", hash = "sha256:6da69a83038d011570d131b85171842d0858a46bca3757419ae193b5724a2119"}, - {file = "dbt_redshift-1.7.1-py3-none-any.whl", hash = "sha256:2a48b9424934f5445e4285740ebe512afaa75882138121536ccc21d027ef62f2"}, -] -dbt-semantic-interfaces = [ - {file = "dbt_semantic_interfaces-0.4.3-py3-none-any.whl", hash = "sha256:af6ab8509da81ae5f5f1d5631c9761cccaed8cd5311d4824a8d4168ecd0f2093"}, - {file = "dbt_semantic_interfaces-0.4.3.tar.gz", hash = "sha256:9a46d07ad022a4c48783565a776ebc6f1d19e0412e70c4759bc9d7bba461ea1c"}, -] -dbt-snowflake = [ - {file = "dbt-snowflake-1.7.1.tar.gz", hash = "sha256:842a9e87b9e2d999e3bc27aaa369398a4d02bb3f8bb7447aa6151204d4eb90f0"}, - {file = "dbt_snowflake-1.7.1-py3-none-any.whl", hash = "sha256:32ef8733f67dcf4eb594d1b80852ef0b67e920f25bb8a2953031a3868a8d2b3e"}, -] -dbt-spark = [ - {file = "dbt-spark-1.7.1.tar.gz", hash = "sha256:a10e5d1bfdb2ca98e7ae2badd06150e2695d9d4fa18ae2354ed5bd093d77f947"}, - {file = "dbt_spark-1.7.1-py3-none-any.whl", hash = "sha256:99b5002edcdb82058a3b0ad33eb18b91a4bdde887d94855e8bd6f633d78837dc"}, -] -decopatch = [ - {file = "decopatch-1.4.10-py2.py3-none-any.whl", hash = "sha256:e151f7f93de2b1b3fd3f3272dcc7cefd1a69f68ec1c2d8e288ecd9deb36dc5f7"}, - {file = "decopatch-1.4.10.tar.gz", hash = "sha256:957f49c93f4150182c23f8fb51d13bb3213e0f17a79e09c8cca7057598b55720"}, -] -decorator = [ - {file = "decorator-5.1.1-py3-none-any.whl", hash = "sha256:b8c3f85900b9dc423225913c5aace94729fe1fa9763b38939a95226f02d37186"}, - {file = "decorator-5.1.1.tar.gz", hash = "sha256:637996211036b6385ef91435e4fae22989472f9d571faba8927ba8253acbc330"}, -] -deprecated = [ - {file = "Deprecated-1.2.14-py2.py3-none-any.whl", hash = "sha256:6fac8b097794a90302bdbb17b9b815e732d3c4720583ff1b198499d78470466c"}, - {file = "Deprecated-1.2.14.tar.gz", hash = "sha256:e5323eb936458dccc2582dc6f9c322c852a775a27065ff2b0c4970b9d53d01b3"}, -] -diff-cover = [ - {file = "diff_cover-7.7.0-py3-none-any.whl", hash = "sha256:bf86f32ec999f9a9e79bf24969f7127ea7b4e55c3ef3cd9300feb13188c89736"}, - {file = "diff_cover-7.7.0.tar.gz", hash = "sha256:60614cf7e722cf7fb1bde497afac0b514294e1e26534449622dac4da296123fb"}, -] -dill = [ - {file = "dill-0.3.7-py3-none-any.whl", hash = "sha256:76b122c08ef4ce2eedcd4d1abd8e641114bfc6c2867f49f3c41facf65bf19f5e"}, - {file = "dill-0.3.7.tar.gz", hash = "sha256:cc1c8b182eb3013e24bd475ff2e9295af86c1a38eb1aff128dac8962a9ce3c03"}, -] -dnspython = [ - {file = "dnspython-2.4.2-py3-none-any.whl", hash = "sha256:57c6fbaaeaaf39c891292012060beb141791735dbb4004798328fc2c467402d8"}, - {file = "dnspython-2.4.2.tar.gz", hash = "sha256:8dcfae8c7460a2f84b4072e26f1c9f4101ca20c071649cb7c34e8b6a93d58984"}, -] -docspec = [ - {file = "docspec-2.2.1-py3-none-any.whl", hash = "sha256:7538f750095a9688c6980ff9a4e029a823a500f64bd00b6b4bdb27951feb31cb"}, - {file = "docspec-2.2.1.tar.gz", hash = "sha256:4854e77edc0e2de40e785e57e95880f7095a05fe978f8b54cef7a269586e15ff"}, -] -docspec-python = [ - {file = "docspec_python-2.2.1-py3-none-any.whl", hash = "sha256:76ac41d35a8face35b2d766c2e8a416fb8832359785d396f0d53bcb00f178e54"}, - {file = "docspec_python-2.2.1.tar.gz", hash = "sha256:c41b850b4d6f4de30999ea6f82c9cdb9183d9bcba45559ee9173d3dab7281559"}, -] -docstring-parser = [ - {file = "docstring_parser-0.11.tar.gz", hash = "sha256:93b3f8f481c7d24e37c5d9f30293c89e2933fa209421c8abd731dd3ef0715ecb"}, -] -docutils = [ - {file = "docutils-0.20.1-py3-none-any.whl", hash = "sha256:96f387a2c5562db4476f09f13bbab2192e764cac08ebbf3a34a95d9b1e4a59d6"}, - {file = "docutils-0.20.1.tar.gz", hash = "sha256:f08a4e276c3a1583a86dce3e34aba3fe04d02bba2dd51ed16106244e8a923e3b"}, -] -domdf-python-tools = [ - {file = "domdf_python_tools-3.6.1-py3-none-any.whl", hash = "sha256:e18158460850957f18e740eb94ede56f580ddb0cb162ab9d9834ed8bbb1b6431"}, - {file = "domdf_python_tools-3.6.1.tar.gz", hash = "sha256:acc04563d23bce4d437dd08af6b9bea788328c412772a044d8ca428a7ad861be"}, -] -duckdb = [ - {file = "duckdb-0.9.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:6c724e105ecd78c8d86b3c03639b24e1df982392fc836705eb007e4b1b488864"}, - {file = "duckdb-0.9.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:75f12c5a3086079fb6440122565f1762ef1a610a954f2d8081014c1dd0646e1a"}, - {file = "duckdb-0.9.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:151f5410c32f8f8fe03bf23462b9604349bc0b4bd3a51049bbf5e6a482a435e8"}, - {file = "duckdb-0.9.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9c1d066fdae22b9b711b1603541651a378017645f9fbc4adc9764b2f3c9e9e4a"}, - {file = "duckdb-0.9.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1de56d8b7bd7a7653428c1bd4b8948316df488626d27e9c388194f2e0d1428d4"}, - {file = "duckdb-0.9.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:1fb6cd590b1bb4e31fde8efd25fedfbfa19a86fa72789fa5b31a71da0d95bce4"}, - {file = "duckdb-0.9.1-cp310-cp310-win32.whl", hash = "sha256:1039e073714d668cef9069bb02c2a6756c7969cedda0bff1332520c4462951c8"}, - {file = "duckdb-0.9.1-cp310-cp310-win_amd64.whl", hash = "sha256:7e6ac4c28918e1d278a89ff26fd528882aa823868ed530df69d6c8a193ae4e41"}, - {file = "duckdb-0.9.1-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:5eb750f2ee44397a61343f32ee9d9e8c8b5d053fa27ba4185d0e31507157f130"}, - {file = "duckdb-0.9.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:aea2a46881d75dc069a242cb164642d7a4f792889010fb98210953ab7ff48849"}, - {file = "duckdb-0.9.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:ed3dcedfc7a9449b6d73f9a2715c730180056e0ba837123e7967be1cd3935081"}, - {file = "duckdb-0.9.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0c55397bed0087ec4445b96f8d55f924680f6d40fbaa7f2e35468c54367214a5"}, - {file = "duckdb-0.9.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3261696130f1cfb955735647c93297b4a6241753fb0de26c05d96d50986c6347"}, - {file = "duckdb-0.9.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:64c04b1728e3e37cf93748829b5d1e028227deea75115bb5ead01c608ece44b1"}, - {file = "duckdb-0.9.1-cp311-cp311-win32.whl", hash = "sha256:12cf9fb441a32702e31534330a7b4d569083d46a91bf185e0c9415000a978789"}, - {file = "duckdb-0.9.1-cp311-cp311-win_amd64.whl", hash = "sha256:fdfd85575ce9540e593d5d25c9d32050bd636c27786afd7b776aae0f6432b55e"}, - {file = "duckdb-0.9.1-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:704700a4b469e3bb1a7e85ac12e58037daaf2b555ef64a3fe2913ffef7bd585b"}, - {file = "duckdb-0.9.1-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cf55b303b7b1a8c2165a96e609eb30484bc47481d94a5fb1e23123e728df0a74"}, - {file = "duckdb-0.9.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b70e23c14746904ca5de316436e43a685eb769c67fe3dbfaacbd3cce996c5045"}, - {file = "duckdb-0.9.1-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:77379f7f1f8b4dc98e01f8f6f8f15a0858cf456e2385e22507f3cb93348a88f9"}, - {file = "duckdb-0.9.1-cp37-cp37m-win32.whl", hash = "sha256:92c8f738489838666cae9ef41703f8b16f660bb146970d1eba8b2c06cb3afa39"}, - {file = "duckdb-0.9.1-cp37-cp37m-win_amd64.whl", hash = "sha256:08c5484ac06ab714f745526d791141f547e2f5ac92f97a0a1b37dfbb3ea1bd13"}, - {file = "duckdb-0.9.1-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:f66d3c07c7f6938d3277294677eb7dad75165e7c57c8dd505503fc5ef10f67ad"}, - {file = "duckdb-0.9.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:c38044e5f78c0c7b58e9f937dcc6c34de17e9ca6be42f9f8f1a5a239f7a847a5"}, - {file = "duckdb-0.9.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:73bc0d715b79566b3ede00c367235cfcce67be0eddda06e17665c7a233d6854a"}, - {file = "duckdb-0.9.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d26622c3b4ea6a8328d95882059e3cc646cdc62d267d48d09e55988a3bba0165"}, - {file = "duckdb-0.9.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3367d10096ff2b7919cedddcf60d308d22d6e53e72ee2702f6e6ca03d361004a"}, - {file = "duckdb-0.9.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:d88a119f1cb41911a22f08a6f084d061a8c864e28b9433435beb50a56b0d06bb"}, - {file = "duckdb-0.9.1-cp38-cp38-win32.whl", hash = "sha256:99567496e45b55c67427133dc916013e8eb20a811fc7079213f5f03b2a4f5fc0"}, - {file = "duckdb-0.9.1-cp38-cp38-win_amd64.whl", hash = "sha256:5b3da4da73422a3235c3500b3fb541ac546adb3e35642ef1119dbcd9cc7f68b8"}, - {file = "duckdb-0.9.1-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:eca00c0c2062c0265c6c0e78ca2f6a30611b28f3afef062036610e9fc9d4a67d"}, - {file = "duckdb-0.9.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:eb5af8e89d40fc4baab1515787ea1520a6c6cf6aa40ab9f107df6c3a75686ce1"}, - {file = "duckdb-0.9.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:9fae3d4f83ebcb47995f6acad7c6d57d003a9b6f0e1b31f79a3edd6feb377443"}, - {file = "duckdb-0.9.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:16b9a7efc745bc3c5d1018c3a2f58d9e6ce49c0446819a9600fdba5f78e54c47"}, - {file = "duckdb-0.9.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:66b0b60167f5537772e9f5af940e69dcf50e66f5247732b8bb84a493a9af6055"}, - {file = "duckdb-0.9.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:4f27f5e94c47df6c4ccddf18e3277b7464eea3db07356d2c4bf033b5c88359b8"}, - {file = "duckdb-0.9.1-cp39-cp39-win32.whl", hash = "sha256:d43cd7e6f783006b59dcc5e40fcf157d21ee3d0c8dfced35278091209e9974d7"}, - {file = "duckdb-0.9.1-cp39-cp39-win_amd64.whl", hash = "sha256:e666795887d9cf1d6b6f6cbb9d487270680e5ff6205ebc54b2308151f13b8cff"}, - {file = "duckdb-0.9.1.tar.gz", hash = "sha256:603a878746015a3f2363a65eb48bcbec816261b6ee8d71eee53061117f6eef9d"}, -] -email-validator = [ - {file = "email_validator-1.3.1-py2.py3-none-any.whl", hash = "sha256:49a72f5fa6ed26be1c964f0567d931d10bf3fdeeacdf97bc26ef1cd2a44e0bda"}, - {file = "email_validator-1.3.1.tar.gz", hash = "sha256:d178c5c6fa6c6824e9b04f199cf23e79ac15756786573c190d2ad13089411ad2"}, -] -enlighten = [ - {file = "enlighten-1.11.2-py2.py3-none-any.whl", hash = "sha256:98c9eb20e022b6a57f1c8d4f17e16760780b6881e6d658c40f52d21255ea45f3"}, - {file = "enlighten-1.11.2.tar.gz", hash = "sha256:9284861dee5a272e0e1a3758cd3f3b7180b1bd1754875da76876f2a7f46ccb61"}, -] -et-xmlfile = [ - {file = "et_xmlfile-1.1.0-py3-none-any.whl", hash = "sha256:a2ba85d1d6a74ef63837eed693bcb89c3f752169b0e3e7ae5b16ca5e1b3deada"}, - {file = "et_xmlfile-1.1.0.tar.gz", hash = "sha256:8eb9e2bc2f8c97e37a2dc85a09ecdcdec9d8a396530a6d5a33b30b9a92da0c5c"}, -] -exceptiongroup = [ - {file = "exceptiongroup-1.1.3-py3-none-any.whl", hash = "sha256:343280667a4585d195ca1cf9cef84a4e178c4b6cf2274caef9859782b567d5e3"}, - {file = "exceptiongroup-1.1.3.tar.gz", hash = "sha256:097acd85d473d75af5bb98e41b61ff7fe35efe6675e4f9370ec6ec5126d160e9"}, -] -fastembed = [ - {file = "fastembed-0.1.1-py3-none-any.whl", hash = "sha256:131413ae52cd72f4c8cced7a675f8269dbfd1a852abade3c815e265114bcc05a"}, - {file = "fastembed-0.1.1.tar.gz", hash = "sha256:f7e524ee4f74bb8aad16be5b687d1f77f608d40e96e292c87881dc36baf8f4c7"}, -] -filelock = [ - {file = "filelock-3.12.3-py3-none-any.whl", hash = "sha256:f067e40ccc40f2b48395a80fcbd4728262fab54e232e090a4063ab804179efeb"}, - {file = "filelock-3.12.3.tar.gz", hash = "sha256:0ecc1dd2ec4672a10c8550a8182f1bd0c0a5088470ecd5a125e45f49472fac3d"}, -] -flake8 = [ - {file = "flake8-5.0.4-py2.py3-none-any.whl", hash = "sha256:7a1cf6b73744f5806ab95e526f6f0d8c01c66d7bbe349562d22dfca20610b248"}, - {file = "flake8-5.0.4.tar.gz", hash = "sha256:6fbe320aad8d6b95cec8b8e47bc933004678dc63095be98528b7bdd2a9f510db"}, -] -flake8-bugbear = [ - {file = "flake8-bugbear-22.12.6.tar.gz", hash = "sha256:4cdb2c06e229971104443ae293e75e64c6107798229202fbe4f4091427a30ac0"}, - {file = "flake8_bugbear-22.12.6-py3-none-any.whl", hash = "sha256:b69a510634f8a9c298dfda2b18a8036455e6b19ecac4fe582e4d7a0abfa50a30"}, -] -flake8-builtins = [ - {file = "flake8-builtins-1.5.3.tar.gz", hash = "sha256:09998853b2405e98e61d2ff3027c47033adbdc17f9fe44ca58443d876eb00f3b"}, - {file = "flake8_builtins-1.5.3-py2.py3-none-any.whl", hash = "sha256:7706babee43879320376861897e5d1468e396a40b8918ed7bccf70e5f90b8687"}, -] -flake8-encodings = [ - {file = "flake8_encodings-0.5.0.post1-py3-none-any.whl", hash = "sha256:d2fecca0e89ba09c86e5d61cf6bdb1b337f0d74746aac67bbcf0c517b4cb6cba"}, - {file = "flake8_encodings-0.5.0.post1.tar.gz", hash = "sha256:082c0163325c85b438a8106e876283b5ed3cbfc53e68d89130d70be8be4c9977"}, -] -flake8-helper = [ - {file = "flake8_helper-0.2.1-py3-none-any.whl", hash = "sha256:9123cdf351ad32ee8a51b85036052302c478122d62fb512c0773e111b3d05241"}, - {file = "flake8_helper-0.2.1.tar.gz", hash = "sha256:479f86d1c52df8e49ff876ecd3873242699f93eeece7e6675cdca9c37c9b0a16"}, -] -flake8-tidy-imports = [ - {file = "flake8_tidy_imports-4.10.0-py3-none-any.whl", hash = "sha256:b0387fb2ea200441bd142309e716fb7b8f4b0937bdf5f8b7c0c118a5f5e2b8ed"}, - {file = "flake8_tidy_imports-4.10.0.tar.gz", hash = "sha256:bd6cf86465402d2b86903009b748d85a628e599e17b76e810c9857e3a2815173"}, -] -flask = [ - {file = "Flask-2.2.5-py3-none-any.whl", hash = "sha256:58107ed83443e86067e41eff4631b058178191a355886f8e479e347fa1285fdf"}, - {file = "Flask-2.2.5.tar.gz", hash = "sha256:edee9b0a7ff26621bd5a8c10ff484ae28737a2410d99b0bb9a6850c7fb977aa0"}, -] -flask-appbuilder = [ - {file = "Flask-AppBuilder-4.3.6.tar.gz", hash = "sha256:8ca9710fa7d2704747d195e11b487d45a571f40559d8399d9d5dfa42ea1f3c78"}, - {file = "Flask_AppBuilder-4.3.6-py3-none-any.whl", hash = "sha256:840480dfd43134bebf78f3c7dc909e324c2689d2d9f27aeb1880a8a25466bc8d"}, -] -flask-babel = [ - {file = "Flask-Babel-2.0.0.tar.gz", hash = "sha256:f9faf45cdb2e1a32ea2ec14403587d4295108f35017a7821a2b1acb8cfd9257d"}, - {file = "Flask_Babel-2.0.0-py3-none-any.whl", hash = "sha256:e6820a052a8d344e178cdd36dd4bb8aea09b4bda3d5f9fa9f008df2c7f2f5468"}, -] -flask-caching = [ - {file = "Flask-Caching-2.0.2.tar.gz", hash = "sha256:24b60c552d59a9605cc1b6a42c56cdb39a82a28dab4532bbedb9222ae54ecb4e"}, - {file = "Flask_Caching-2.0.2-py3-none-any.whl", hash = "sha256:19571f2570e9b8dd9dd9d2f49d7cbee69c14ebe8cc001100b1eb98c379dd80ad"}, -] -flask-jwt-extended = [ - {file = "Flask-JWT-Extended-4.5.2.tar.gz", hash = "sha256:ba56245ba43b71c8ae936784b867625dce8b9956faeedec2953222e57942fb0b"}, - {file = "Flask_JWT_Extended-4.5.2-py2.py3-none-any.whl", hash = "sha256:e0ef23d8c863746bd141046167073699e1a7b03c97169cbba70f05b8d9cd6b9e"}, -] -flask-limiter = [ - {file = "Flask-Limiter-3.5.0.tar.gz", hash = "sha256:13a3491b994c49f7cb4706587a38ca47e8162b576530472df38be68104f299c0"}, - {file = "Flask_Limiter-3.5.0-py3-none-any.whl", hash = "sha256:dbda4174f44e6cb858c6eb75e7488186f2977dd5d33d7028ba1aabf179de1bee"}, -] -flask-login = [ - {file = "Flask-Login-0.6.2.tar.gz", hash = "sha256:c0a7baa9fdc448cdd3dd6f0939df72eec5177b2f7abe6cb82fc934d29caac9c3"}, - {file = "Flask_Login-0.6.2-py3-none-any.whl", hash = "sha256:1ef79843f5eddd0f143c2cd994c1b05ac83c0401dc6234c143495af9a939613f"}, -] -flask-session = [ - {file = "Flask-Session-0.5.0.tar.gz", hash = "sha256:190875e6aebf2953c6803d42379ef3b934bc209ef8ef006f97aecb08f5aaeb86"}, - {file = "flask_session-0.5.0-py3-none-any.whl", hash = "sha256:1619bcbc16f04f64e90f8e0b17145ba5c9700090bb1294e889956c1282d58631"}, -] -flask-sqlalchemy = [ - {file = "Flask-SQLAlchemy-2.5.1.tar.gz", hash = "sha256:2bda44b43e7cacb15d4e05ff3cc1f8bc97936cc464623424102bfc2c35e95912"}, - {file = "Flask_SQLAlchemy-2.5.1-py2.py3-none-any.whl", hash = "sha256:f12c3d4cc5cc7fdcc148b9527ea05671718c3ea45d50c7e732cceb33f574b390"}, -] -flask-wtf = [ - {file = "Flask-WTF-1.1.1.tar.gz", hash = "sha256:41c4244e9ae626d63bed42ae4785b90667b885b1535d5a4095e1f63060d12aa9"}, - {file = "Flask_WTF-1.1.1-py3-none-any.whl", hash = "sha256:7887d6f1ebb3e17bf648647422f0944c9a469d0fcf63e3b66fb9a83037e38b2c"}, -] -flatbuffers = [ - {file = "flatbuffers-23.5.26-py2.py3-none-any.whl", hash = "sha256:c0ff356da363087b915fde4b8b45bdda73432fc17cddb3c8157472eab1422ad1"}, - {file = "flatbuffers-23.5.26.tar.gz", hash = "sha256:9ea1144cac05ce5d86e2859f431c6cd5e66cd9c78c558317c7955fb8d4c78d89"}, -] -frozenlist = [ - {file = "frozenlist-1.4.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:764226ceef3125e53ea2cb275000e309c0aa5464d43bd72abd661e27fffc26ab"}, - {file = "frozenlist-1.4.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:d6484756b12f40003c6128bfcc3fa9f0d49a687e171186c2d85ec82e3758c559"}, - {file = "frozenlist-1.4.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:9ac08e601308e41eb533f232dbf6b7e4cea762f9f84f6357136eed926c15d12c"}, - {file = "frozenlist-1.4.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d081f13b095d74b67d550de04df1c756831f3b83dc9881c38985834387487f1b"}, - {file = "frozenlist-1.4.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:71932b597f9895f011f47f17d6428252fc728ba2ae6024e13c3398a087c2cdea"}, - {file = "frozenlist-1.4.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:981b9ab5a0a3178ff413bca62526bb784249421c24ad7381e39d67981be2c326"}, - {file = "frozenlist-1.4.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e41f3de4df3e80de75845d3e743b3f1c4c8613c3997a912dbf0229fc61a8b963"}, - {file = "frozenlist-1.4.0-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6918d49b1f90821e93069682c06ffde41829c346c66b721e65a5c62b4bab0300"}, - {file = "frozenlist-1.4.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:0e5c8764c7829343d919cc2dfc587a8db01c4f70a4ebbc49abde5d4b158b007b"}, - {file = "frozenlist-1.4.0-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:8d0edd6b1c7fb94922bf569c9b092ee187a83f03fb1a63076e7774b60f9481a8"}, - {file = "frozenlist-1.4.0-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:e29cda763f752553fa14c68fb2195150bfab22b352572cb36c43c47bedba70eb"}, - {file = "frozenlist-1.4.0-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:0c7c1b47859ee2cac3846fde1c1dc0f15da6cec5a0e5c72d101e0f83dcb67ff9"}, - {file = "frozenlist-1.4.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:901289d524fdd571be1c7be054f48b1f88ce8dddcbdf1ec698b27d4b8b9e5d62"}, - {file = "frozenlist-1.4.0-cp310-cp310-win32.whl", hash = "sha256:1a0848b52815006ea6596c395f87449f693dc419061cc21e970f139d466dc0a0"}, - {file = "frozenlist-1.4.0-cp310-cp310-win_amd64.whl", hash = "sha256:b206646d176a007466358aa21d85cd8600a415c67c9bd15403336c331a10d956"}, - {file = "frozenlist-1.4.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:de343e75f40e972bae1ef6090267f8260c1446a1695e77096db6cfa25e759a95"}, - {file = "frozenlist-1.4.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:ad2a9eb6d9839ae241701d0918f54c51365a51407fd80f6b8289e2dfca977cc3"}, - {file = "frozenlist-1.4.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:bd7bd3b3830247580de99c99ea2a01416dfc3c34471ca1298bccabf86d0ff4dc"}, - {file = "frozenlist-1.4.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bdf1847068c362f16b353163391210269e4f0569a3c166bc6a9f74ccbfc7e839"}, - {file = "frozenlist-1.4.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:38461d02d66de17455072c9ba981d35f1d2a73024bee7790ac2f9e361ef1cd0c"}, - {file = "frozenlist-1.4.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d5a32087d720c608f42caed0ef36d2b3ea61a9d09ee59a5142d6070da9041b8f"}, - {file = "frozenlist-1.4.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:dd65632acaf0d47608190a71bfe46b209719bf2beb59507db08ccdbe712f969b"}, - {file = "frozenlist-1.4.0-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:261b9f5d17cac914531331ff1b1d452125bf5daa05faf73b71d935485b0c510b"}, - {file = "frozenlist-1.4.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:b89ac9768b82205936771f8d2eb3ce88503b1556324c9f903e7156669f521472"}, - {file = "frozenlist-1.4.0-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:008eb8b31b3ea6896da16c38c1b136cb9fec9e249e77f6211d479db79a4eaf01"}, - {file = "frozenlist-1.4.0-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:e74b0506fa5aa5598ac6a975a12aa8928cbb58e1f5ac8360792ef15de1aa848f"}, - {file = "frozenlist-1.4.0-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:490132667476f6781b4c9458298b0c1cddf237488abd228b0b3650e5ecba7467"}, - {file = "frozenlist-1.4.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:76d4711f6f6d08551a7e9ef28c722f4a50dd0fc204c56b4bcd95c6cc05ce6fbb"}, - {file = "frozenlist-1.4.0-cp311-cp311-win32.whl", hash = "sha256:a02eb8ab2b8f200179b5f62b59757685ae9987996ae549ccf30f983f40602431"}, - {file = "frozenlist-1.4.0-cp311-cp311-win_amd64.whl", hash = "sha256:515e1abc578dd3b275d6a5114030b1330ba044ffba03f94091842852f806f1c1"}, - {file = "frozenlist-1.4.0-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:f0ed05f5079c708fe74bf9027e95125334b6978bf07fd5ab923e9e55e5fbb9d3"}, - {file = "frozenlist-1.4.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:ca265542ca427bf97aed183c1676e2a9c66942e822b14dc6e5f42e038f92a503"}, - {file = "frozenlist-1.4.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:491e014f5c43656da08958808588cc6c016847b4360e327a62cb308c791bd2d9"}, - {file = "frozenlist-1.4.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:17ae5cd0f333f94f2e03aaf140bb762c64783935cc764ff9c82dff626089bebf"}, - {file = "frozenlist-1.4.0-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1e78fb68cf9c1a6aa4a9a12e960a5c9dfbdb89b3695197aa7064705662515de2"}, - {file = "frozenlist-1.4.0-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d5655a942f5f5d2c9ed93d72148226d75369b4f6952680211972a33e59b1dfdc"}, - {file = "frozenlist-1.4.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c11b0746f5d946fecf750428a95f3e9ebe792c1ee3b1e96eeba145dc631a9672"}, - {file = "frozenlist-1.4.0-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e66d2a64d44d50d2543405fb183a21f76b3b5fd16f130f5c99187c3fb4e64919"}, - {file = "frozenlist-1.4.0-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:88f7bc0fcca81f985f78dd0fa68d2c75abf8272b1f5c323ea4a01a4d7a614efc"}, - {file = "frozenlist-1.4.0-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:5833593c25ac59ede40ed4de6d67eb42928cca97f26feea219f21d0ed0959b79"}, - {file = "frozenlist-1.4.0-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:fec520865f42e5c7f050c2a79038897b1c7d1595e907a9e08e3353293ffc948e"}, - {file = "frozenlist-1.4.0-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:b826d97e4276750beca7c8f0f1a4938892697a6bcd8ec8217b3312dad6982781"}, - {file = "frozenlist-1.4.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:ceb6ec0a10c65540421e20ebd29083c50e6d1143278746a4ef6bcf6153171eb8"}, - {file = "frozenlist-1.4.0-cp38-cp38-win32.whl", hash = "sha256:2b8bcf994563466db019fab287ff390fffbfdb4f905fc77bc1c1d604b1c689cc"}, - {file = "frozenlist-1.4.0-cp38-cp38-win_amd64.whl", hash = "sha256:a6c8097e01886188e5be3e6b14e94ab365f384736aa1fca6a0b9e35bd4a30bc7"}, - {file = "frozenlist-1.4.0-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:6c38721585f285203e4b4132a352eb3daa19121a035f3182e08e437cface44bf"}, - {file = "frozenlist-1.4.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:a0c6da9aee33ff0b1a451e867da0c1f47408112b3391dd43133838339e410963"}, - {file = "frozenlist-1.4.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:93ea75c050c5bb3d98016b4ba2497851eadf0ac154d88a67d7a6816206f6fa7f"}, - {file = "frozenlist-1.4.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f61e2dc5ad442c52b4887f1fdc112f97caeff4d9e6ebe78879364ac59f1663e1"}, - {file = "frozenlist-1.4.0-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:aa384489fefeb62321b238e64c07ef48398fe80f9e1e6afeff22e140e0850eef"}, - {file = "frozenlist-1.4.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:10ff5faaa22786315ef57097a279b833ecab1a0bfb07d604c9cbb1c4cdc2ed87"}, - {file = "frozenlist-1.4.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:007df07a6e3eb3e33e9a1fe6a9db7af152bbd8a185f9aaa6ece10a3529e3e1c6"}, - {file = "frozenlist-1.4.0-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7f4f399d28478d1f604c2ff9119907af9726aed73680e5ed1ca634d377abb087"}, - {file = "frozenlist-1.4.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:c5374b80521d3d3f2ec5572e05adc94601985cc526fb276d0c8574a6d749f1b3"}, - {file = "frozenlist-1.4.0-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:ce31ae3e19f3c902de379cf1323d90c649425b86de7bbdf82871b8a2a0615f3d"}, - {file = "frozenlist-1.4.0-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:7211ef110a9194b6042449431e08c4d80c0481e5891e58d429df5899690511c2"}, - {file = "frozenlist-1.4.0-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:556de4430ce324c836789fa4560ca62d1591d2538b8ceb0b4f68fb7b2384a27a"}, - {file = "frozenlist-1.4.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:7645a8e814a3ee34a89c4a372011dcd817964ce8cb273c8ed6119d706e9613e3"}, - {file = "frozenlist-1.4.0-cp39-cp39-win32.whl", hash = "sha256:19488c57c12d4e8095a922f328df3f179c820c212940a498623ed39160bc3c2f"}, - {file = "frozenlist-1.4.0-cp39-cp39-win_amd64.whl", hash = "sha256:6221d84d463fb110bdd7619b69cb43878a11d51cbb9394ae3105d082d5199167"}, - {file = "frozenlist-1.4.0.tar.gz", hash = "sha256:09163bdf0b2907454042edb19f887c6d33806adc71fbd54afc14908bfdc22251"}, -] -fsspec = [ - {file = "fsspec-2023.6.0-py3-none-any.whl", hash = "sha256:1cbad1faef3e391fba6dc005ae9b5bdcbf43005c9167ce78c915549c352c869a"}, - {file = "fsspec-2023.6.0.tar.gz", hash = "sha256:d0b2f935446169753e7a5c5c55681c54ea91996cc67be93c39a154fb3a2742af"}, -] -future = [ - {file = "future-0.18.3.tar.gz", hash = "sha256:34a17436ed1e96697a86f9de3d15a3b0be01d8bc8de9c1dffd59fb8234ed5307"}, -] -gcsfs = [ - {file = "gcsfs-2023.6.0-py2.py3-none-any.whl", hash = "sha256:3b3c7d8eddd4ec1380f3b49fbb861ee1e974adb223564401f10884b6260d406f"}, - {file = "gcsfs-2023.6.0.tar.gz", hash = "sha256:30b14fccadb3b7f0d99b2cd03bd8507c40f3a9a7d05847edca571f642bedbdff"}, -] -gitdb = [ - {file = "gitdb-4.0.10-py3-none-any.whl", hash = "sha256:c286cf298426064079ed96a9e4a9d39e7f3e9bf15ba60701e95f5492f28415c7"}, - {file = "gitdb-4.0.10.tar.gz", hash = "sha256:6eb990b69df4e15bad899ea868dc46572c3f75339735663b81de79b06f17eb9a"}, -] -gitpython = [ - {file = "GitPython-3.1.34-py3-none-any.whl", hash = "sha256:5d3802b98a3bae1c2b8ae0e1ff2e4aa16bcdf02c145da34d092324f599f01395"}, - {file = "GitPython-3.1.34.tar.gz", hash = "sha256:85f7d365d1f6bf677ae51039c1ef67ca59091c7ebd5a3509aa399d4eda02d6dd"}, -] -giturlparse = [ - {file = "giturlparse-0.11.1-py2.py3-none-any.whl", hash = "sha256:6422f25c8ca563e1a3cb6b85862e48614be804cd1334e6d84be5630eb26b343f"}, - {file = "giturlparse-0.11.1.tar.gz", hash = "sha256:cdbe0c062096c69e00f08397826dddebc1f73bc15b793994579c13aafc70c990"}, -] -google-api-core = [ - {file = "google-api-core-2.11.1.tar.gz", hash = "sha256:25d29e05a0058ed5f19c61c0a78b1b53adea4d9364b464d014fbda941f6d1c9a"}, - {file = "google_api_core-2.11.1-py3-none-any.whl", hash = "sha256:d92a5a92dc36dd4f4b9ee4e55528a90e432b059f93aee6ad857f9de8cc7ae94a"}, -] -google-api-python-client = [ - {file = "google-api-python-client-2.97.0.tar.gz", hash = "sha256:48277291894876a1ca7ed4127e055e81f81e6343ced1b544a7200ae2c119dcd7"}, - {file = "google_api_python_client-2.97.0-py2.py3-none-any.whl", hash = "sha256:5215f4cd577753fc4192ccfbe0bb8b55d4bb5fd68fa6268ac5cf271b6305de31"}, -] -google-auth = [ - {file = "google-auth-2.22.0.tar.gz", hash = "sha256:164cba9af4e6e4e40c3a4f90a1a6c12ee56f14c0b4868d1ca91b32826ab334ce"}, - {file = "google_auth-2.22.0-py2.py3-none-any.whl", hash = "sha256:d61d1b40897407b574da67da1a833bdc10d5a11642566e506565d1b1a46ba873"}, -] -google-auth-httplib2 = [ - {file = "google-auth-httplib2-0.1.0.tar.gz", hash = "sha256:a07c39fd632becacd3f07718dfd6021bf396978f03ad3ce4321d060015cc30ac"}, - {file = "google_auth_httplib2-0.1.0-py2.py3-none-any.whl", hash = "sha256:31e49c36c6b5643b57e82617cb3e021e3e1d2df9da63af67252c02fa9c1f4a10"}, -] -google-auth-oauthlib = [ - {file = "google-auth-oauthlib-1.0.0.tar.gz", hash = "sha256:e375064964820b47221a7e1b7ee1fd77051b6323c3f9e3e19785f78ab67ecfc5"}, - {file = "google_auth_oauthlib-1.0.0-py2.py3-none-any.whl", hash = "sha256:95880ca704928c300f48194d1770cf5b1462835b6e49db61445a520f793fd5fb"}, -] -google-cloud-bigquery = [ - {file = "google-cloud-bigquery-3.11.4.tar.gz", hash = "sha256:697df117241a2283bcbb93b21e10badc14e51c9a90800d2a7e1a3e1c7d842974"}, - {file = "google_cloud_bigquery-3.11.4-py2.py3-none-any.whl", hash = "sha256:5fa7897743a0ed949ade25a0942fc9e7557d8fce307c6f8a76d1b604cf27f1b1"}, -] -google-cloud-core = [ - {file = "google-cloud-core-2.3.3.tar.gz", hash = "sha256:37b80273c8d7eee1ae816b3a20ae43585ea50506cb0e60f3cf5be5f87f1373cb"}, - {file = "google_cloud_core-2.3.3-py2.py3-none-any.whl", hash = "sha256:fbd11cad3e98a7e5b0343dc07cb1039a5ffd7a5bb96e1f1e27cee4bda4a90863"}, -] -google-cloud-dataproc = [ - {file = "google-cloud-dataproc-5.4.3.tar.gz", hash = "sha256:d9c77c52aa5ddf52ae657736dbfb5312402933f72bab8480fc2d2afe98697402"}, - {file = "google_cloud_dataproc-5.4.3-py2.py3-none-any.whl", hash = "sha256:9cfff56cb53621cdffd0a3d6b10701e886e0a8ad54891e6c223eb67c0ff753ad"}, -] -google-cloud-storage = [ - {file = "google-cloud-storage-2.10.0.tar.gz", hash = "sha256:934b31ead5f3994e5360f9ff5750982c5b6b11604dc072bc452c25965e076dc7"}, - {file = "google_cloud_storage-2.10.0-py2.py3-none-any.whl", hash = "sha256:9433cf28801671de1c80434238fb1e7e4a1ba3087470e90f70c928ea77c2b9d7"}, -] -google-crc32c = [ - {file = "google-crc32c-1.5.0.tar.gz", hash = "sha256:89284716bc6a5a415d4eaa11b1726d2d60a0cd12aadf5439828353662ede9dd7"}, - {file = "google_crc32c-1.5.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:596d1f98fc70232fcb6590c439f43b350cb762fb5d61ce7b0e9db4539654cc13"}, - {file = "google_crc32c-1.5.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:be82c3c8cfb15b30f36768797a640e800513793d6ae1724aaaafe5bf86f8f346"}, - {file = "google_crc32c-1.5.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:461665ff58895f508e2866824a47bdee72497b091c730071f2b7575d5762ab65"}, - {file = "google_crc32c-1.5.0-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e2096eddb4e7c7bdae4bd69ad364e55e07b8316653234a56552d9c988bd2d61b"}, - {file = "google_crc32c-1.5.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:116a7c3c616dd14a3de8c64a965828b197e5f2d121fedd2f8c5585c547e87b02"}, - {file = "google_crc32c-1.5.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:5829b792bf5822fd0a6f6eb34c5f81dd074f01d570ed7f36aa101d6fc7a0a6e4"}, - {file = "google_crc32c-1.5.0-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:64e52e2b3970bd891309c113b54cf0e4384762c934d5ae56e283f9a0afcd953e"}, - {file = "google_crc32c-1.5.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:02ebb8bf46c13e36998aeaad1de9b48f4caf545e91d14041270d9dca767b780c"}, - {file = "google_crc32c-1.5.0-cp310-cp310-win32.whl", hash = "sha256:2e920d506ec85eb4ba50cd4228c2bec05642894d4c73c59b3a2fe20346bd00ee"}, - {file = "google_crc32c-1.5.0-cp310-cp310-win_amd64.whl", hash = "sha256:07eb3c611ce363c51a933bf6bd7f8e3878a51d124acfc89452a75120bc436289"}, - {file = "google_crc32c-1.5.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:cae0274952c079886567f3f4f685bcaf5708f0a23a5f5216fdab71f81a6c0273"}, - {file = "google_crc32c-1.5.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:1034d91442ead5a95b5aaef90dbfaca8633b0247d1e41621d1e9f9db88c36298"}, - {file = "google_crc32c-1.5.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7c42c70cd1d362284289c6273adda4c6af8039a8ae12dc451dcd61cdabb8ab57"}, - {file = "google_crc32c-1.5.0-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:8485b340a6a9e76c62a7dce3c98e5f102c9219f4cfbf896a00cf48caf078d438"}, - {file = "google_crc32c-1.5.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:77e2fd3057c9d78e225fa0a2160f96b64a824de17840351b26825b0848022906"}, - {file = "google_crc32c-1.5.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:f583edb943cf2e09c60441b910d6a20b4d9d626c75a36c8fcac01a6c96c01183"}, - {file = "google_crc32c-1.5.0-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:a1fd716e7a01f8e717490fbe2e431d2905ab8aa598b9b12f8d10abebb36b04dd"}, - {file = "google_crc32c-1.5.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:72218785ce41b9cfd2fc1d6a017dc1ff7acfc4c17d01053265c41a2c0cc39b8c"}, - {file = "google_crc32c-1.5.0-cp311-cp311-win32.whl", hash = "sha256:66741ef4ee08ea0b2cc3c86916ab66b6aef03768525627fd6a1b34968b4e3709"}, - {file = "google_crc32c-1.5.0-cp311-cp311-win_amd64.whl", hash = "sha256:ba1eb1843304b1e5537e1fca632fa894d6f6deca8d6389636ee5b4797affb968"}, - {file = "google_crc32c-1.5.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:98cb4d057f285bd80d8778ebc4fde6b4d509ac3f331758fb1528b733215443ae"}, - {file = "google_crc32c-1.5.0-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fd8536e902db7e365f49e7d9029283403974ccf29b13fc7028b97e2295b33556"}, - {file = "google_crc32c-1.5.0-cp37-cp37m-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:19e0a019d2c4dcc5e598cd4a4bc7b008546b0358bd322537c74ad47a5386884f"}, - {file = "google_crc32c-1.5.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:02c65b9817512edc6a4ae7c7e987fea799d2e0ee40c53ec573a692bee24de876"}, - {file = "google_crc32c-1.5.0-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:6ac08d24c1f16bd2bf5eca8eaf8304812f44af5cfe5062006ec676e7e1d50afc"}, - {file = "google_crc32c-1.5.0-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:3359fc442a743e870f4588fcf5dcbc1bf929df1fad8fb9905cd94e5edb02e84c"}, - {file = "google_crc32c-1.5.0-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:1e986b206dae4476f41bcec1faa057851f3889503a70e1bdb2378d406223994a"}, - {file = "google_crc32c-1.5.0-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:de06adc872bcd8c2a4e0dc51250e9e65ef2ca91be023b9d13ebd67c2ba552e1e"}, - {file = "google_crc32c-1.5.0-cp37-cp37m-win32.whl", hash = "sha256:d3515f198eaa2f0ed49f8819d5732d70698c3fa37384146079b3799b97667a94"}, - {file = "google_crc32c-1.5.0-cp37-cp37m-win_amd64.whl", hash = "sha256:67b741654b851abafb7bc625b6d1cdd520a379074e64b6a128e3b688c3c04740"}, - {file = "google_crc32c-1.5.0-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:c02ec1c5856179f171e032a31d6f8bf84e5a75c45c33b2e20a3de353b266ebd8"}, - {file = "google_crc32c-1.5.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:edfedb64740750e1a3b16152620220f51d58ff1b4abceb339ca92e934775c27a"}, - {file = "google_crc32c-1.5.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:84e6e8cd997930fc66d5bb4fde61e2b62ba19d62b7abd7a69920406f9ecca946"}, - {file = "google_crc32c-1.5.0-cp38-cp38-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:024894d9d3cfbc5943f8f230e23950cd4906b2fe004c72e29b209420a1e6b05a"}, - {file = "google_crc32c-1.5.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:998679bf62b7fb599d2878aa3ed06b9ce688b8974893e7223c60db155f26bd8d"}, - {file = "google_crc32c-1.5.0-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:83c681c526a3439b5cf94f7420471705bbf96262f49a6fe546a6db5f687a3d4a"}, - {file = "google_crc32c-1.5.0-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:4c6fdd4fccbec90cc8a01fc00773fcd5fa28db683c116ee3cb35cd5da9ef6c37"}, - {file = "google_crc32c-1.5.0-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:5ae44e10a8e3407dbe138984f21e536583f2bba1be9491239f942c2464ac0894"}, - {file = "google_crc32c-1.5.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:37933ec6e693e51a5b07505bd05de57eee12f3e8c32b07da7e73669398e6630a"}, - {file = "google_crc32c-1.5.0-cp38-cp38-win32.whl", hash = "sha256:fe70e325aa68fa4b5edf7d1a4b6f691eb04bbccac0ace68e34820d283b5f80d4"}, - {file = "google_crc32c-1.5.0-cp38-cp38-win_amd64.whl", hash = "sha256:74dea7751d98034887dbd821b7aae3e1d36eda111d6ca36c206c44478035709c"}, - {file = "google_crc32c-1.5.0-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:c6c777a480337ac14f38564ac88ae82d4cd238bf293f0a22295b66eb89ffced7"}, - {file = "google_crc32c-1.5.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:759ce4851a4bb15ecabae28f4d2e18983c244eddd767f560165563bf9aefbc8d"}, - {file = "google_crc32c-1.5.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f13cae8cc389a440def0c8c52057f37359014ccbc9dc1f0827936bcd367c6100"}, - {file = "google_crc32c-1.5.0-cp39-cp39-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e560628513ed34759456a416bf86b54b2476c59144a9138165c9a1575801d0d9"}, - {file = "google_crc32c-1.5.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e1674e4307fa3024fc897ca774e9c7562c957af85df55efe2988ed9056dc4e57"}, - {file = "google_crc32c-1.5.0-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:278d2ed7c16cfc075c91378c4f47924c0625f5fc84b2d50d921b18b7975bd210"}, - {file = "google_crc32c-1.5.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:d5280312b9af0976231f9e317c20e4a61cd2f9629b7bfea6a693d1878a264ebd"}, - {file = "google_crc32c-1.5.0-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:8b87e1a59c38f275c0e3676fc2ab6d59eccecfd460be267ac360cc31f7bcde96"}, - {file = "google_crc32c-1.5.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:7c074fece789b5034b9b1404a1f8208fc2d4c6ce9decdd16e8220c5a793e6f61"}, - {file = "google_crc32c-1.5.0-cp39-cp39-win32.whl", hash = "sha256:7f57f14606cd1dd0f0de396e1e53824c371e9544a822648cd76c034d209b559c"}, - {file = "google_crc32c-1.5.0-cp39-cp39-win_amd64.whl", hash = "sha256:a2355cba1f4ad8b6988a4ca3feed5bff33f6af2d7f134852cf279c2aebfde541"}, - {file = "google_crc32c-1.5.0-pp37-pypy37_pp73-macosx_10_9_x86_64.whl", hash = "sha256:f314013e7dcd5cf45ab1945d92e713eec788166262ae8deb2cfacd53def27325"}, - {file = "google_crc32c-1.5.0-pp37-pypy37_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3b747a674c20a67343cb61d43fdd9207ce5da6a99f629c6e2541aa0e89215bcd"}, - {file = "google_crc32c-1.5.0-pp37-pypy37_pp73-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:8f24ed114432de109aa9fd317278518a5af2d31ac2ea6b952b2f7782b43da091"}, - {file = "google_crc32c-1.5.0-pp37-pypy37_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b8667b48e7a7ef66afba2c81e1094ef526388d35b873966d8a9a447974ed9178"}, - {file = "google_crc32c-1.5.0-pp37-pypy37_pp73-win_amd64.whl", hash = "sha256:1c7abdac90433b09bad6c43a43af253e688c9cfc1c86d332aed13f9a7c7f65e2"}, - {file = "google_crc32c-1.5.0-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:6f998db4e71b645350b9ac28a2167e6632c239963ca9da411523bb439c5c514d"}, - {file = "google_crc32c-1.5.0-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9c99616c853bb585301df6de07ca2cadad344fd1ada6d62bb30aec05219c45d2"}, - {file = "google_crc32c-1.5.0-pp38-pypy38_pp73-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2ad40e31093a4af319dadf503b2467ccdc8f67c72e4bcba97f8c10cb078207b5"}, - {file = "google_crc32c-1.5.0-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cd67cf24a553339d5062eff51013780a00d6f97a39ca062781d06b3a73b15462"}, - {file = "google_crc32c-1.5.0-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:398af5e3ba9cf768787eef45c803ff9614cc3e22a5b2f7d7ae116df8b11e3314"}, - {file = "google_crc32c-1.5.0-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:b1f8133c9a275df5613a451e73f36c2aea4fe13c5c8997e22cf355ebd7bd0728"}, - {file = "google_crc32c-1.5.0-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9ba053c5f50430a3fcfd36f75aff9caeba0440b2d076afdb79a318d6ca245f88"}, - {file = "google_crc32c-1.5.0-pp39-pypy39_pp73-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:272d3892a1e1a2dbc39cc5cde96834c236d5327e2122d3aaa19f6614531bb6eb"}, - {file = "google_crc32c-1.5.0-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:635f5d4dd18758a1fbd1049a8e8d2fee4ffed124462d837d1a02a0e009c3ab31"}, - {file = "google_crc32c-1.5.0-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:c672d99a345849301784604bfeaeba4db0c7aae50b95be04dd651fd2a7310b93"}, -] -google-re2 = [ - {file = "google-re2-1.1.tar.gz", hash = "sha256:d3a9467ee52b46ac77ca928f6d0cbeaccfd92f03ca0f0f65b9df6a95184f3a1c"}, - {file = "google_re2-1.1-1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:874d2e36dfa506b03d4f9c4aef1701a65304f4004c96c7edac7d8aea08fe193e"}, - {file = "google_re2-1.1-1-cp310-cp310-macosx_11_0_x86_64.whl", hash = "sha256:b66eb84850afdce09aabca40bcd6f2a0e96178a1b4990d555678edb1f59bf255"}, - {file = "google_re2-1.1-1-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:c461640a07db26dc2b51f43de607b7520e7debaf4f6a000f796a3c0196ca52af"}, - {file = "google_re2-1.1-1-cp310-cp310-macosx_12_0_x86_64.whl", hash = "sha256:7f9ba69eaee6e7a9f5ddfb919bf1a866af14a18b26a179e3fb1a6fe3d0cbf349"}, - {file = "google_re2-1.1-1-cp310-cp310-macosx_13_0_arm64.whl", hash = "sha256:f95cf16739cc3ea63728366881221b119f2322b4b739b7da6522d45a68792cea"}, - {file = "google_re2-1.1-1-cp310-cp310-macosx_13_0_x86_64.whl", hash = "sha256:9fb56a41250191298e6a2859b0fdea1e83330c9870fe8d84e5836c506ae46e96"}, - {file = "google_re2-1.1-1-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:fb22ea995564d87baf4a4bfbb3ca024be913683a710f4f0dc9c94dc663afab20"}, - {file = "google_re2-1.1-1-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:19b3f0bfbb2a2ca58ed0aaa9356d07a5c0921383a6dbeca086b2b74472f5ee08"}, - {file = "google_re2-1.1-1-cp310-cp310-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:34fd7f97b84af7453cf05b25adfe2491ba3cef1ca548ac2907efa63d3510954d"}, - {file = "google_re2-1.1-1-cp310-cp310-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:3e029664192d8d30f7c977706183ef483e82ca239302272df74e01d2e22897ca"}, - {file = "google_re2-1.1-1-cp310-cp310-win32.whl", hash = "sha256:41a8f222f9839d059d37efd28e4deec203502d7e39c3759d83d6a33deadf1d2e"}, - {file = "google_re2-1.1-1-cp310-cp310-win_amd64.whl", hash = "sha256:6141d569fdf72aa693f040ba05c469036587395af07ff419b9a3c009d6ffefd3"}, - {file = "google_re2-1.1-1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:a2d03f6aaf22788ba13a770f0d183b8eebe55545bcbb6e4c41dcccac7ded014d"}, - {file = "google_re2-1.1-1-cp311-cp311-macosx_11_0_x86_64.whl", hash = "sha256:a98f15fd9c31bf80d368698447191a2e9703880b305dbf34d9a63ce634b8a557"}, - {file = "google_re2-1.1-1-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:42128916cc2966623832aabbd224c88e862d1c531d6bc49ab141f565e6321a90"}, - {file = "google_re2-1.1-1-cp311-cp311-macosx_12_0_x86_64.whl", hash = "sha256:6e27986a166903ad7000635f6faed8ab5072d687f822ac9f692c40b2470aebcf"}, - {file = "google_re2-1.1-1-cp311-cp311-macosx_13_0_arm64.whl", hash = "sha256:5e9edcd743a830d0c0b2729201e42ab86fceef8f4086df65563f482e4544359e"}, - {file = "google_re2-1.1-1-cp311-cp311-macosx_13_0_x86_64.whl", hash = "sha256:d33145bbfd32e916f1c911cd9225be5364a36c3959742a0cc4dfc0692d6a2a5e"}, - {file = "google_re2-1.1-1-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:8b27cc2544b69a357ab2a749dc0c13a1b9055198c56f4c2c3b0f61d693f8e203"}, - {file = "google_re2-1.1-1-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:3cdf8982b6def987e95b37984d0c1c878de32635dd78acde3273f730b69708c9"}, - {file = "google_re2-1.1-1-cp311-cp311-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:71ac661a7365e134741fe5542f13d7ce1e6187446b96ddee4c8b7d153fc8f05a"}, - {file = "google_re2-1.1-1-cp311-cp311-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:35a902ba31a71a3e9e114e44473624d9aa9f9b85ec981bfa91671aefe0ef1a6c"}, - {file = "google_re2-1.1-1-cp311-cp311-win32.whl", hash = "sha256:9469f26b485da2784c658e687a766c72e1a17b1e63b3ed24b5f64c3d19fbae3d"}, - {file = "google_re2-1.1-1-cp311-cp311-win_amd64.whl", hash = "sha256:07dd0780240ee431781119b46c3bbf76f5cef24a2cbb542f6a08c643e0a68d98"}, - {file = "google_re2-1.1-1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:9857dc4d69b8025057c8129e98406a24d51bdaf1b96e481dbba7e69e0ec85104"}, - {file = "google_re2-1.1-1-cp38-cp38-macosx_11_0_x86_64.whl", hash = "sha256:a6eaaa5f200022eb0bdded5949c91454fc96e1edd6f9e9a96dd1dc32c821c00e"}, - {file = "google_re2-1.1-1-cp38-cp38-macosx_12_0_arm64.whl", hash = "sha256:a32bb2afe128d90b8edc20d4f7d297f7e2753206eba92937a57e5280736eac74"}, - {file = "google_re2-1.1-1-cp38-cp38-macosx_12_0_x86_64.whl", hash = "sha256:4f2754616c61b76ab4e5a4f39892a52a00897203b859c5abd7e3c630dd883cda"}, - {file = "google_re2-1.1-1-cp38-cp38-macosx_13_0_arm64.whl", hash = "sha256:b110f3d657e8f67a43a699d327ce47095b80180ea1118e2de44cb5c7002503d9"}, - {file = "google_re2-1.1-1-cp38-cp38-macosx_13_0_x86_64.whl", hash = "sha256:fd62ba2853eef65e249a9c4437a9ecac568222062bc956f0c61a3d1151a6271b"}, - {file = "google_re2-1.1-1-cp38-cp38-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:23b50eb74dc3e1d480b04b987c61242df5dade50d08bc16e25eb3582b83fca80"}, - {file = "google_re2-1.1-1-cp38-cp38-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:e1bde89855dd5ab0811187d21eec149975510c80e865c771c883524a452445e7"}, - {file = "google_re2-1.1-1-cp38-cp38-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:10c6cddc720151a509beb98ab310fa0cc8bcb265f83518ebf831de2c9ff73af0"}, - {file = "google_re2-1.1-1-cp38-cp38-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:9bea09c5e8401ec50b8f211bc820ec2f0ca5e744ac67431a1b39bdacbd266553"}, - {file = "google_re2-1.1-1-cp38-cp38-win32.whl", hash = "sha256:ffa51b118037518bcdf63c7649d0b4be7071982b83f48ee3bbabf24a9cb48f8a"}, - {file = "google_re2-1.1-1-cp38-cp38-win_amd64.whl", hash = "sha256:3b47715b6d43c9351957eb5092ad0fa625d04106d81f34cb8a726c53395ad474"}, - {file = "google_re2-1.1-1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:998f31bf7efbc9bb603d0c356c1c77e5331f689c71783df8e21e67bb025fc66a"}, - {file = "google_re2-1.1-1-cp39-cp39-macosx_11_0_x86_64.whl", hash = "sha256:0b5f0eaab859d3ba5f462c82bf37ab56e9d37e19b40b5898c731dbe4213a85f7"}, - {file = "google_re2-1.1-1-cp39-cp39-macosx_12_0_arm64.whl", hash = "sha256:f6d591d9c4cbc7142b729ddcc3f654d059d8ebc3bc95891198808a4785a6b4d8"}, - {file = "google_re2-1.1-1-cp39-cp39-macosx_12_0_x86_64.whl", hash = "sha256:3c325c2eae197b423330a04ab62e2e1cf942676cd5560907db4d63e23ce0648a"}, - {file = "google_re2-1.1-1-cp39-cp39-macosx_13_0_arm64.whl", hash = "sha256:1e019e8f57955806ee843254ce454249b58800a6e872b2c8e9df2ef3459de0d5"}, - {file = "google_re2-1.1-1-cp39-cp39-macosx_13_0_x86_64.whl", hash = "sha256:58ebbcc7ad2b639768a6bca586357291660ea40dfac83039208e5055c357513b"}, - {file = "google_re2-1.1-1-cp39-cp39-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:723f8553e7fc022294071f14fb7dfc7958c365dc7d4a71d4938ccd2df8c6eca4"}, - {file = "google_re2-1.1-1-cp39-cp39-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:d81512b08e6787fc8ef29fea365d3fdbf957553a625550e1d96c36877ae30355"}, - {file = "google_re2-1.1-1-cp39-cp39-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c58601b155651cc572a23ee2860788c77581aad85d3567a55b89b0674702f34d"}, - {file = "google_re2-1.1-1-cp39-cp39-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:9c6c9f64b9724ec38da8e514f404ac64e9a6a5e8b1d7031c2dadd05c1f4c16fd"}, - {file = "google_re2-1.1-1-cp39-cp39-win32.whl", hash = "sha256:d1b751b9ab9f8e2ab2a36d72b909281ce65f328c9115a1685acae1a2d1afd7a4"}, - {file = "google_re2-1.1-1-cp39-cp39-win_amd64.whl", hash = "sha256:ac775c75cec7069351d201da4e0fb0cae4c1c5ebecd08fa34e1be89740c1d80b"}, -] -google-resumable-media = [ - {file = "google-resumable-media-2.5.0.tar.gz", hash = "sha256:218931e8e2b2a73a58eb354a288e03a0fd5fb1c4583261ac6e4c078666468c93"}, - {file = "google_resumable_media-2.5.0-py2.py3-none-any.whl", hash = "sha256:da1bd943e2e114a56d85d6848497ebf9be6a14d3db23e9fc57581e7c3e8170ec"}, -] -googleapis-common-protos = [ - {file = "googleapis-common-protos-1.60.0.tar.gz", hash = "sha256:e73ebb404098db405ba95d1e1ae0aa91c3e15a71da031a2eeb6b2e23e7bc3708"}, - {file = "googleapis_common_protos-1.60.0-py2.py3-none-any.whl", hash = "sha256:69f9bbcc6acde92cab2db95ce30a70bd2b81d20b12eff3f1aabaffcbe8a93918"}, -] -grapheme = [ - {file = "grapheme-0.6.0.tar.gz", hash = "sha256:44c2b9f21bbe77cfb05835fec230bd435954275267fea1858013b102f8603cca"}, -] -graphviz = [ - {file = "graphviz-0.20.1-py3-none-any.whl", hash = "sha256:587c58a223b51611c0cf461132da386edd896a029524ca61a1462b880bf97977"}, - {file = "graphviz-0.20.1.zip", hash = "sha256:8c58f14adaa3b947daf26c19bc1e98c4e0702cdc31cf99153e6f06904d492bf8"}, -] -greenlet = [ - {file = "greenlet-2.0.2-cp27-cp27m-macosx_10_14_x86_64.whl", hash = "sha256:bdfea8c661e80d3c1c99ad7c3ff74e6e87184895bbaca6ee8cc61209f8b9b85d"}, - {file = "greenlet-2.0.2-cp27-cp27m-manylinux2010_x86_64.whl", hash = "sha256:9d14b83fab60d5e8abe587d51c75b252bcc21683f24699ada8fb275d7712f5a9"}, - {file = "greenlet-2.0.2-cp27-cp27m-win32.whl", hash = "sha256:6c3acb79b0bfd4fe733dff8bc62695283b57949ebcca05ae5c129eb606ff2d74"}, - {file = "greenlet-2.0.2-cp27-cp27m-win_amd64.whl", hash = "sha256:283737e0da3f08bd637b5ad058507e578dd462db259f7f6e4c5c365ba4ee9343"}, - {file = "greenlet-2.0.2-cp27-cp27mu-manylinux2010_x86_64.whl", hash = "sha256:d27ec7509b9c18b6d73f2f5ede2622441de812e7b1a80bbd446cb0633bd3d5ae"}, - {file = "greenlet-2.0.2-cp310-cp310-macosx_11_0_x86_64.whl", hash = "sha256:30bcf80dda7f15ac77ba5af2b961bdd9dbc77fd4ac6105cee85b0d0a5fcf74df"}, - {file = "greenlet-2.0.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:26fbfce90728d82bc9e6c38ea4d038cba20b7faf8a0ca53a9c07b67318d46088"}, - {file = "greenlet-2.0.2-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9190f09060ea4debddd24665d6804b995a9c122ef5917ab26e1566dcc712ceeb"}, - {file = "greenlet-2.0.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d75209eed723105f9596807495d58d10b3470fa6732dd6756595e89925ce2470"}, - {file = "greenlet-2.0.2-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:3a51c9751078733d88e013587b108f1b7a1fb106d402fb390740f002b6f6551a"}, - {file = "greenlet-2.0.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:76ae285c8104046b3a7f06b42f29c7b73f77683df18c49ab5af7983994c2dd91"}, - {file = "greenlet-2.0.2-cp310-cp310-win_amd64.whl", hash = "sha256:2d4686f195e32d36b4d7cf2d166857dbd0ee9f3d20ae349b6bf8afc8485b3645"}, - {file = "greenlet-2.0.2-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:c4302695ad8027363e96311df24ee28978162cdcdd2006476c43970b384a244c"}, - {file = "greenlet-2.0.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c48f54ef8e05f04d6eff74b8233f6063cb1ed960243eacc474ee73a2ea8573ca"}, - {file = "greenlet-2.0.2-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a1846f1b999e78e13837c93c778dcfc3365902cfb8d1bdb7dd73ead37059f0d0"}, - {file = "greenlet-2.0.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3a06ad5312349fec0ab944664b01d26f8d1f05009566339ac6f63f56589bc1a2"}, - {file = "greenlet-2.0.2-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:eff4eb9b7eb3e4d0cae3d28c283dc16d9bed6b193c2e1ace3ed86ce48ea8df19"}, - {file = "greenlet-2.0.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:5454276c07d27a740c5892f4907c86327b632127dd9abec42ee62e12427ff7e3"}, - {file = "greenlet-2.0.2-cp311-cp311-win_amd64.whl", hash = "sha256:7cafd1208fdbe93b67c7086876f061f660cfddc44f404279c1585bbf3cdc64c5"}, - {file = "greenlet-2.0.2-cp35-cp35m-macosx_10_14_x86_64.whl", hash = "sha256:910841381caba4f744a44bf81bfd573c94e10b3045ee00de0cbf436fe50673a6"}, - {file = "greenlet-2.0.2-cp35-cp35m-manylinux2010_x86_64.whl", hash = "sha256:18a7f18b82b52ee85322d7a7874e676f34ab319b9f8cce5de06067384aa8ff43"}, - {file = "greenlet-2.0.2-cp35-cp35m-win32.whl", hash = "sha256:03a8f4f3430c3b3ff8d10a2a86028c660355ab637cee9333d63d66b56f09d52a"}, - {file = "greenlet-2.0.2-cp35-cp35m-win_amd64.whl", hash = "sha256:4b58adb399c4d61d912c4c331984d60eb66565175cdf4a34792cd9600f21b394"}, - {file = "greenlet-2.0.2-cp36-cp36m-macosx_10_14_x86_64.whl", hash = "sha256:703f18f3fda276b9a916f0934d2fb6d989bf0b4fb5a64825260eb9bfd52d78f0"}, - {file = "greenlet-2.0.2-cp36-cp36m-manylinux2010_x86_64.whl", hash = "sha256:32e5b64b148966d9cccc2c8d35a671409e45f195864560829f395a54226408d3"}, - {file = "greenlet-2.0.2-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2dd11f291565a81d71dab10b7033395b7a3a5456e637cf997a6f33ebdf06f8db"}, - {file = "greenlet-2.0.2-cp36-cp36m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e0f72c9ddb8cd28532185f54cc1453f2c16fb417a08b53a855c4e6a418edd099"}, - {file = "greenlet-2.0.2-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cd021c754b162c0fb55ad5d6b9d960db667faad0fa2ff25bb6e1301b0b6e6a75"}, - {file = "greenlet-2.0.2-cp36-cp36m-musllinux_1_1_aarch64.whl", hash = "sha256:3c9b12575734155d0c09d6c3e10dbd81665d5c18e1a7c6597df72fd05990c8cf"}, - {file = "greenlet-2.0.2-cp36-cp36m-musllinux_1_1_x86_64.whl", hash = "sha256:b9ec052b06a0524f0e35bd8790686a1da006bd911dd1ef7d50b77bfbad74e292"}, - {file = "greenlet-2.0.2-cp36-cp36m-win32.whl", hash = "sha256:dbfcfc0218093a19c252ca8eb9aee3d29cfdcb586df21049b9d777fd32c14fd9"}, - {file = "greenlet-2.0.2-cp36-cp36m-win_amd64.whl", hash = "sha256:9f35ec95538f50292f6d8f2c9c9f8a3c6540bbfec21c9e5b4b751e0a7c20864f"}, - {file = "greenlet-2.0.2-cp37-cp37m-macosx_10_15_x86_64.whl", hash = "sha256:d5508f0b173e6aa47273bdc0a0b5ba055b59662ba7c7ee5119528f466585526b"}, - {file = "greenlet-2.0.2-cp37-cp37m-manylinux2010_x86_64.whl", hash = "sha256:f82d4d717d8ef19188687aa32b8363e96062911e63ba22a0cff7802a8e58e5f1"}, - {file = "greenlet-2.0.2-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c9c59a2120b55788e800d82dfa99b9e156ff8f2227f07c5e3012a45a399620b7"}, - {file = "greenlet-2.0.2-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:2780572ec463d44c1d3ae850239508dbeb9fed38e294c68d19a24d925d9223ca"}, - {file = "greenlet-2.0.2-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:937e9020b514ceedb9c830c55d5c9872abc90f4b5862f89c0887033ae33c6f73"}, - {file = "greenlet-2.0.2-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:36abbf031e1c0f79dd5d596bfaf8e921c41df2bdf54ee1eed921ce1f52999a86"}, - {file = "greenlet-2.0.2-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:18e98fb3de7dba1c0a852731c3070cf022d14f0d68b4c87a19cc1016f3bb8b33"}, - {file = "greenlet-2.0.2-cp37-cp37m-win32.whl", hash = "sha256:3f6ea9bd35eb450837a3d80e77b517ea5bc56b4647f5502cd28de13675ee12f7"}, - {file = "greenlet-2.0.2-cp37-cp37m-win_amd64.whl", hash = "sha256:7492e2b7bd7c9b9916388d9df23fa49d9b88ac0640db0a5b4ecc2b653bf451e3"}, - {file = "greenlet-2.0.2-cp38-cp38-macosx_10_15_x86_64.whl", hash = "sha256:b864ba53912b6c3ab6bcb2beb19f19edd01a6bfcbdfe1f37ddd1778abfe75a30"}, - {file = "greenlet-2.0.2-cp38-cp38-manylinux2010_x86_64.whl", hash = "sha256:ba2956617f1c42598a308a84c6cf021a90ff3862eddafd20c3333d50f0edb45b"}, - {file = "greenlet-2.0.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fc3a569657468b6f3fb60587e48356fe512c1754ca05a564f11366ac9e306526"}, - {file = "greenlet-2.0.2-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:8eab883b3b2a38cc1e050819ef06a7e6344d4a990d24d45bc6f2cf959045a45b"}, - {file = "greenlet-2.0.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:acd2162a36d3de67ee896c43effcd5ee3de247eb00354db411feb025aa319857"}, - {file = "greenlet-2.0.2-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:0bf60faf0bc2468089bdc5edd10555bab6e85152191df713e2ab1fcc86382b5a"}, - {file = "greenlet-2.0.2-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:b0ef99cdbe2b682b9ccbb964743a6aca37905fda5e0452e5ee239b1654d37f2a"}, - {file = "greenlet-2.0.2-cp38-cp38-win32.whl", hash = "sha256:b80f600eddddce72320dbbc8e3784d16bd3fb7b517e82476d8da921f27d4b249"}, - {file = "greenlet-2.0.2-cp38-cp38-win_amd64.whl", hash = "sha256:4d2e11331fc0c02b6e84b0d28ece3a36e0548ee1a1ce9ddde03752d9b79bba40"}, - {file = "greenlet-2.0.2-cp39-cp39-macosx_11_0_x86_64.whl", hash = "sha256:88d9ab96491d38a5ab7c56dd7a3cc37d83336ecc564e4e8816dbed12e5aaefc8"}, - {file = "greenlet-2.0.2-cp39-cp39-manylinux2010_x86_64.whl", hash = "sha256:561091a7be172ab497a3527602d467e2b3fbe75f9e783d8b8ce403fa414f71a6"}, - {file = "greenlet-2.0.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:971ce5e14dc5e73715755d0ca2975ac88cfdaefcaab078a284fea6cfabf866df"}, - {file = "greenlet-2.0.2-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:be4ed120b52ae4d974aa40215fcdfde9194d63541c7ded40ee12eb4dda57b76b"}, - {file = "greenlet-2.0.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:94c817e84245513926588caf1152e3b559ff794d505555211ca041f032abbb6b"}, - {file = "greenlet-2.0.2-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:1a819eef4b0e0b96bb0d98d797bef17dc1b4a10e8d7446be32d1da33e095dbb8"}, - {file = "greenlet-2.0.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:7efde645ca1cc441d6dc4b48c0f7101e8d86b54c8530141b09fd31cef5149ec9"}, - {file = "greenlet-2.0.2-cp39-cp39-win32.whl", hash = "sha256:ea9872c80c132f4663822dd2a08d404073a5a9b5ba6155bea72fb2a79d1093b5"}, - {file = "greenlet-2.0.2-cp39-cp39-win_amd64.whl", hash = "sha256:db1a39669102a1d8d12b57de2bb7e2ec9066a6f2b3da35ae511ff93b01b5d564"}, - {file = "greenlet-2.0.2.tar.gz", hash = "sha256:e7c8dc13af7db097bed64a051d2dd49e9f0af495c26995c00a9ee842690d34c0"}, -] -grpc-google-iam-v1 = [ - {file = "grpc-google-iam-v1-0.12.6.tar.gz", hash = "sha256:2bc4b8fdf22115a65d751c9317329322602c39b7c86a289c9b72d228d960ef5f"}, - {file = "grpc_google_iam_v1-0.12.6-py2.py3-none-any.whl", hash = "sha256:5c10f3d8dc2d88678ab1a9b0cb5482735c5efee71e6c0cd59f872eef22913f5c"}, -] -grpcio = [ - {file = "grpcio-1.57.0-cp310-cp310-linux_armv7l.whl", hash = "sha256:092fa155b945015754bdf988be47793c377b52b88d546e45c6a9f9579ac7f7b6"}, - {file = "grpcio-1.57.0-cp310-cp310-macosx_12_0_universal2.whl", hash = "sha256:2f7349786da979a94690cc5c2b804cab4e8774a3cf59be40d037c4342c906649"}, - {file = "grpcio-1.57.0-cp310-cp310-manylinux_2_17_aarch64.whl", hash = "sha256:82640e57fb86ea1d71ea9ab54f7e942502cf98a429a200b2e743d8672171734f"}, - {file = "grpcio-1.57.0-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:40b72effd4c789de94ce1be2b5f88d7b9b5f7379fe9645f198854112a6567d9a"}, - {file = "grpcio-1.57.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2f708a6a17868ad8bf586598bee69abded4996b18adf26fd2d91191383b79019"}, - {file = "grpcio-1.57.0-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:60fe15288a0a65d5c1cb5b4a62b1850d07336e3ba728257a810317be14f0c527"}, - {file = "grpcio-1.57.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:6907b1cf8bb29b058081d2aad677b15757a44ef2d4d8d9130271d2ad5e33efca"}, - {file = "grpcio-1.57.0-cp310-cp310-win32.whl", hash = "sha256:57b183e8b252825c4dd29114d6c13559be95387aafc10a7be645462a0fc98bbb"}, - {file = "grpcio-1.57.0-cp310-cp310-win_amd64.whl", hash = "sha256:7b400807fa749a9eb286e2cd893e501b110b4d356a218426cb9c825a0474ca56"}, - {file = "grpcio-1.57.0-cp311-cp311-linux_armv7l.whl", hash = "sha256:c6ebecfb7a31385393203eb04ed8b6a08f5002f53df3d59e5e795edb80999652"}, - {file = "grpcio-1.57.0-cp311-cp311-macosx_10_10_universal2.whl", hash = "sha256:00258cbe3f5188629828363ae8ff78477ce976a6f63fb2bb5e90088396faa82e"}, - {file = "grpcio-1.57.0-cp311-cp311-manylinux_2_17_aarch64.whl", hash = "sha256:23e7d8849a0e58b806253fd206ac105b328171e01b8f18c7d5922274958cc87e"}, - {file = "grpcio-1.57.0-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:5371bcd861e679d63b8274f73ac281751d34bd54eccdbfcd6aa00e692a82cd7b"}, - {file = "grpcio-1.57.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:aed90d93b731929e742967e236f842a4a2174dc5db077c8f9ad2c5996f89f63e"}, - {file = "grpcio-1.57.0-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:fe752639919aad9ffb0dee0d87f29a6467d1ef764f13c4644d212a9a853a078d"}, - {file = "grpcio-1.57.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:fada6b07ec4f0befe05218181f4b85176f11d531911b64c715d1875c4736d73a"}, - {file = "grpcio-1.57.0-cp311-cp311-win32.whl", hash = "sha256:bb396952cfa7ad2f01061fbc7dc1ad91dd9d69243bcb8110cf4e36924785a0fe"}, - {file = "grpcio-1.57.0-cp311-cp311-win_amd64.whl", hash = "sha256:e503cb45ed12b924b5b988ba9576dc9949b2f5283b8e33b21dcb6be74a7c58d0"}, - {file = "grpcio-1.57.0-cp37-cp37m-linux_armv7l.whl", hash = "sha256:fd173b4cf02b20f60860dc2ffe30115c18972d7d6d2d69df97ac38dee03be5bf"}, - {file = "grpcio-1.57.0-cp37-cp37m-macosx_10_10_universal2.whl", hash = "sha256:d7f8df114d6b4cf5a916b98389aeaf1e3132035420a88beea4e3d977e5f267a5"}, - {file = "grpcio-1.57.0-cp37-cp37m-manylinux_2_17_aarch64.whl", hash = "sha256:76c44efa4ede1f42a9d5b2fed1fe9377e73a109bef8675fb0728eb80b0b8e8f2"}, - {file = "grpcio-1.57.0-cp37-cp37m-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4faea2cfdf762a664ab90589b66f416274887641ae17817de510b8178356bf73"}, - {file = "grpcio-1.57.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c60b83c43faeb6d0a9831f0351d7787a0753f5087cc6fa218d78fdf38e5acef0"}, - {file = "grpcio-1.57.0-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:b363bbb5253e5f9c23d8a0a034dfdf1b7c9e7f12e602fc788c435171e96daccc"}, - {file = "grpcio-1.57.0-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:f1fb0fd4a1e9b11ac21c30c169d169ef434c6e9344ee0ab27cfa6f605f6387b2"}, - {file = "grpcio-1.57.0-cp37-cp37m-win_amd64.whl", hash = "sha256:34950353539e7d93f61c6796a007c705d663f3be41166358e3d88c45760c7d98"}, - {file = "grpcio-1.57.0-cp38-cp38-linux_armv7l.whl", hash = "sha256:871f9999e0211f9551f368612460442a5436d9444606184652117d6a688c9f51"}, - {file = "grpcio-1.57.0-cp38-cp38-macosx_10_10_universal2.whl", hash = "sha256:a8a8e560e8dbbdf29288872e91efd22af71e88b0e5736b0daf7773c1fecd99f0"}, - {file = "grpcio-1.57.0-cp38-cp38-manylinux_2_17_aarch64.whl", hash = "sha256:2313b124e475aa9017a9844bdc5eafb2d5abdda9d456af16fc4535408c7d6da6"}, - {file = "grpcio-1.57.0-cp38-cp38-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b4098b6b638d9e0ca839a81656a2fd4bc26c9486ea707e8b1437d6f9d61c3941"}, - {file = "grpcio-1.57.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5e5b58e32ae14658085c16986d11e99abd002ddbf51c8daae8a0671fffb3467f"}, - {file = "grpcio-1.57.0-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:0f80bf37f09e1caba6a8063e56e2b87fa335add314cf2b78ebf7cb45aa7e3d06"}, - {file = "grpcio-1.57.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:5b7a4ce8f862fe32b2a10b57752cf3169f5fe2915acfe7e6a1e155db3da99e79"}, - {file = "grpcio-1.57.0-cp38-cp38-win32.whl", hash = "sha256:9338bacf172e942e62e5889b6364e56657fbf8ac68062e8b25c48843e7b202bb"}, - {file = "grpcio-1.57.0-cp38-cp38-win_amd64.whl", hash = "sha256:e1cb52fa2d67d7f7fab310b600f22ce1ff04d562d46e9e0ac3e3403c2bb4cc16"}, - {file = "grpcio-1.57.0-cp39-cp39-linux_armv7l.whl", hash = "sha256:fee387d2fab144e8a34e0e9c5ca0f45c9376b99de45628265cfa9886b1dbe62b"}, - {file = "grpcio-1.57.0-cp39-cp39-macosx_10_10_universal2.whl", hash = "sha256:b53333627283e7241fcc217323f225c37783b5f0472316edcaa4479a213abfa6"}, - {file = "grpcio-1.57.0-cp39-cp39-manylinux_2_17_aarch64.whl", hash = "sha256:f19ac6ac0a256cf77d3cc926ef0b4e64a9725cc612f97228cd5dc4bd9dbab03b"}, - {file = "grpcio-1.57.0-cp39-cp39-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e3fdf04e402f12e1de8074458549337febb3b45f21076cc02ef4ff786aff687e"}, - {file = "grpcio-1.57.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5613a2fecc82f95d6c51d15b9a72705553aa0d7c932fad7aed7afb51dc982ee5"}, - {file = "grpcio-1.57.0-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:b670c2faa92124b7397b42303e4d8eb64a4cd0b7a77e35a9e865a55d61c57ef9"}, - {file = "grpcio-1.57.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:7a635589201b18510ff988161b7b573f50c6a48fae9cb567657920ca82022b37"}, - {file = "grpcio-1.57.0-cp39-cp39-win32.whl", hash = "sha256:d78d8b86fcdfa1e4c21f8896614b6cc7ee01a2a758ec0c4382d662f2a62cf766"}, - {file = "grpcio-1.57.0-cp39-cp39-win_amd64.whl", hash = "sha256:20ec6fc4ad47d1b6e12deec5045ec3cd5402d9a1597f738263e98f490fe07056"}, - {file = "grpcio-1.57.0.tar.gz", hash = "sha256:4b089f7ad1eb00a104078bab8015b0ed0ebcb3b589e527ab009c53893fd4e613"}, -] -grpcio-status = [ - {file = "grpcio-status-1.57.0.tar.gz", hash = "sha256:b098da99df1eebe58337f8f78e50df990273ccacc1226fddeb47c590e3df9e02"}, - {file = "grpcio_status-1.57.0-py3-none-any.whl", hash = "sha256:15d6af055914ebbc4ed17e55ebfb8e6bb17a45a57fea32e6af19978fb7844690"}, -] -grpcio-tools = [ - {file = "grpcio-tools-1.57.0.tar.gz", hash = "sha256:2f16130d869ce27ecd623194547b649dd657333ec7e8644cc571c645781a9b85"}, - {file = "grpcio_tools-1.57.0-cp310-cp310-linux_armv7l.whl", hash = "sha256:4fb8a8468031f858381a576078924af364a08833d8f8f3237018252c4573a802"}, - {file = "grpcio_tools-1.57.0-cp310-cp310-macosx_12_0_universal2.whl", hash = "sha256:35bf0dad8a3562043345236c26d0053a856fb06c04d7da652f2ded914e508ae7"}, - {file = "grpcio_tools-1.57.0-cp310-cp310-manylinux_2_17_aarch64.whl", hash = "sha256:ec9aab2fb6783c7fc54bc28f58eb75f1ca77594e6b0fd5e5e7a8114a95169fe0"}, - {file = "grpcio_tools-1.57.0-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0cf5fc0a1c23f8ea34b408b72fb0e90eec0f404ad4dba98e8f6da3c9ce34e2ed"}, - {file = "grpcio_tools-1.57.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:26e69d08a515554e0cfe1ec4d31568836f4b17f0ff82294f957f629388629eb9"}, - {file = "grpcio_tools-1.57.0-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:c39a3656576b6fdaaf28abe0467f7a7231df4230c1bee132322dbc3209419e7f"}, - {file = "grpcio_tools-1.57.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:f64f8ab22d27d4a5693310748d35a696061c3b5c7b8c4fb4ab3b4bc1068b6b56"}, - {file = "grpcio_tools-1.57.0-cp310-cp310-win32.whl", hash = "sha256:d2a134756f4db34759a5cc7f7e43f7eb87540b68d1cca62925593c6fb93924f7"}, - {file = "grpcio_tools-1.57.0-cp310-cp310-win_amd64.whl", hash = "sha256:9a3d60fb8d46ede26c1907c146561b3a9caa20a7aff961bc661ef8226f85a2e9"}, - {file = "grpcio_tools-1.57.0-cp311-cp311-linux_armv7l.whl", hash = "sha256:aac98ecad8f7bd4301855669d42a5d97ef7bb34bec2b1e74c7a0641d47e313cf"}, - {file = "grpcio_tools-1.57.0-cp311-cp311-macosx_10_10_universal2.whl", hash = "sha256:cdd020cb68b51462983b7c2dfbc3eb6ede032b8bf438d4554df0c3f08ce35c76"}, - {file = "grpcio_tools-1.57.0-cp311-cp311-manylinux_2_17_aarch64.whl", hash = "sha256:f54081b08419a39221cd646363b5708857c696b3ad4784f1dcf310891e33a5f7"}, - {file = "grpcio_tools-1.57.0-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ed85a0291fff45b67f2557fe7f117d3bc7af8b54b8619d27bf374b5c8b7e3ca2"}, - {file = "grpcio_tools-1.57.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e868cd6feb3ef07d4b35be104fe1fd0657db05259ff8f8ec5e08f4f89ca1191d"}, - {file = "grpcio_tools-1.57.0-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:dfb6f6120587b8e228a3cae5ee4985b5bdc18501bad05c49df61965dfc9d70a9"}, - {file = "grpcio_tools-1.57.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:4a7ad7f328e28fc97c356d0f10fb10d8b5151bb65aa7cf14bf8084513f0b7306"}, - {file = "grpcio_tools-1.57.0-cp311-cp311-win32.whl", hash = "sha256:9867f2817b1a0c93c523f89ac6c9d8625548af4620a7ce438bf5a76e23327284"}, - {file = "grpcio_tools-1.57.0-cp311-cp311-win_amd64.whl", hash = "sha256:1f9e917a9f18087f6c14b4d4508fb94fca5c2f96852363a89232fb9b2124ac1f"}, - {file = "grpcio_tools-1.57.0-cp37-cp37m-linux_armv7l.whl", hash = "sha256:9f2aefa8a37bd2c4db1a3f1aca11377e2766214520fb70e67071f4ff8d8b0fa5"}, - {file = "grpcio_tools-1.57.0-cp37-cp37m-macosx_10_10_universal2.whl", hash = "sha256:850cbda0ec5d24c39e7215ede410276040692ca45d105fbbeada407fa03f0ac0"}, - {file = "grpcio_tools-1.57.0-cp37-cp37m-manylinux_2_17_aarch64.whl", hash = "sha256:6fa52972c9647876ea35f6dc2b51002a74ed900ec7894586cbb2fe76f64f99de"}, - {file = "grpcio_tools-1.57.0-cp37-cp37m-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:76c0eea89d7542719594e50e2283f51a072978b953e8b3e9fd7c59a2c762d4c1"}, - {file = "grpcio_tools-1.57.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f3da5240211252fc70a6451fe00c143e2ab2f7bfc2445695ad2ed056b8e48d96"}, - {file = "grpcio_tools-1.57.0-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:a0256f8786ac9e4db618a1aa492bb3472569a0946fd3ee862ffe23196323da55"}, - {file = "grpcio_tools-1.57.0-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:c026bdf5c1366ce88b7bbe2d8207374d675afd3fd911f60752103de3da4a41d2"}, - {file = "grpcio_tools-1.57.0-cp37-cp37m-win_amd64.whl", hash = "sha256:9053c2f655589545be08b9d6a673e92970173a4bf11a4b9f18cd6e9af626b587"}, - {file = "grpcio_tools-1.57.0-cp38-cp38-linux_armv7l.whl", hash = "sha256:81ec4dbb696e095057b2528d11a8da04be6bbe2b967fa07d4ea9ba6354338cbf"}, - {file = "grpcio_tools-1.57.0-cp38-cp38-macosx_10_10_universal2.whl", hash = "sha256:495e2946406963e0b9f063f76d5af0f2a19517dac2b367b5b044432ac9194296"}, - {file = "grpcio_tools-1.57.0-cp38-cp38-manylinux_2_17_aarch64.whl", hash = "sha256:7b46fc6aa8eb7edd18cafcd21fd98703cb6c09e46b507de335fca7f0161dfccb"}, - {file = "grpcio_tools-1.57.0-cp38-cp38-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:fb81ff861692111fa81bd85f64584e624cb4013bd66fbce8a209b8893f5ce398"}, - {file = "grpcio_tools-1.57.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8a42dc220eb5305f470855c9284f4c8e85ae59d6d742cd07946b0cbe5e9ca186"}, - {file = "grpcio_tools-1.57.0-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:90d10d9038ba46a595a223a34f136c9230e3d6d7abc2433dbf0e1c31939d3a8b"}, - {file = "grpcio_tools-1.57.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:5bc3e6d338aefb052e19cedabe00452be46d0c10a4ed29ee77abb00402e438fe"}, - {file = "grpcio_tools-1.57.0-cp38-cp38-win32.whl", hash = "sha256:34b36217b17b5bea674a414229913e1fd80ede328be51e1b531fcc62abd393b0"}, - {file = "grpcio_tools-1.57.0-cp38-cp38-win_amd64.whl", hash = "sha256:dbde4004a0688400036342ff73e3706e8940483e2871547b1354d59e93a38277"}, - {file = "grpcio_tools-1.57.0-cp39-cp39-linux_armv7l.whl", hash = "sha256:784574709b9690dc28696617ea69352e2132352fdfc9bc89afa8e39f99ae538e"}, - {file = "grpcio_tools-1.57.0-cp39-cp39-macosx_10_10_universal2.whl", hash = "sha256:85ac4e62eb44428cde025fd9ab7554002315fc7880f791c553fc5a0015cc9931"}, - {file = "grpcio_tools-1.57.0-cp39-cp39-manylinux_2_17_aarch64.whl", hash = "sha256:dc771d4db5701f280957bbcee91745e0686d00ed1c6aa7e05ba30a58b02d70a1"}, - {file = "grpcio_tools-1.57.0-cp39-cp39-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f3ac06703c412f8167a9062eaf6099409967e33bf98fa5b02be4b4689b6bdf39"}, - {file = "grpcio_tools-1.57.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:02d78c034109f46032c7217260066d49d41e6bcaf588fa28fa40fe2f83445347"}, - {file = "grpcio_tools-1.57.0-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:2db25f15ed44327f2e02d0c4fe741ac966f9500e407047d8a7c7fccf2df65616"}, - {file = "grpcio_tools-1.57.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:2b417c97936d94874a3ce7ed8deab910f2233e3612134507cfee4af8735c38a6"}, - {file = "grpcio_tools-1.57.0-cp39-cp39-win32.whl", hash = "sha256:f717cce5093e6b6049d9ea6d12fdf3658efdb1a80772f7737db1f8510b876df6"}, - {file = "grpcio_tools-1.57.0-cp39-cp39-win_amd64.whl", hash = "sha256:1c0e8a1a32973a5d59fbcc19232f925e5c48116e9411f788033a31c5ca5130b4"}, -] -gunicorn = [ - {file = "gunicorn-21.2.0-py3-none-any.whl", hash = "sha256:3213aa5e8c24949e792bcacfc176fef362e7aac80b76c56f6b5122bf350722f0"}, - {file = "gunicorn-21.2.0.tar.gz", hash = "sha256:88ec8bff1d634f98e61b9f65bc4bf3cd918a90806c6f5c48bc5603849ec81033"}, -] -h11 = [ - {file = "h11-0.14.0-py3-none-any.whl", hash = "sha256:e3fe4ac4b851c468cc8363d500db52c2ead036020723024a109d37346efaa761"}, - {file = "h11-0.14.0.tar.gz", hash = "sha256:8f19fbbe99e72420ff35c00b27a34cb9937e902a8b810e2c88300c6f0a3b699d"}, -] -h2 = [ - {file = "h2-4.1.0-py3-none-any.whl", hash = "sha256:03a46bcf682256c95b5fd9e9a99c1323584c3eec6440d379b9903d709476bc6d"}, - {file = "h2-4.1.0.tar.gz", hash = "sha256:a83aca08fbe7aacb79fec788c9c0bac936343560ed9ec18b82a13a12c28d2abb"}, -] -hexbytes = [ - {file = "hexbytes-0.3.1-py3-none-any.whl", hash = "sha256:383595ad75026cf00abd570f44b368c6cdac0c6becfae5c39ff88829877f8a59"}, - {file = "hexbytes-0.3.1.tar.gz", hash = "sha256:a3fe35c6831ee8fafd048c4c086b986075fc14fd46258fa24ecb8d65745f9a9d"}, -] -hpack = [ - {file = "hpack-4.0.0-py3-none-any.whl", hash = "sha256:84a076fad3dc9a9f8063ccb8041ef100867b1878b25ef0ee63847a5d53818a6c"}, - {file = "hpack-4.0.0.tar.gz", hash = "sha256:fc41de0c63e687ebffde81187a948221294896f6bdc0ae2312708df339430095"}, -] -httpcore = [ - {file = "httpcore-0.17.3-py3-none-any.whl", hash = "sha256:c2789b767ddddfa2a5782e3199b2b7f6894540b17b16ec26b2c4d8e103510b87"}, - {file = "httpcore-0.17.3.tar.gz", hash = "sha256:a6f30213335e34c1ade7be6ec7c47f19f50c56db36abef1a9dfa3815b1cb3888"}, -] -httplib2 = [ - {file = "httplib2-0.22.0-py3-none-any.whl", hash = "sha256:14ae0a53c1ba8f3d37e9e27cf37eabb0fb9980f435ba405d546948b009dd64dc"}, - {file = "httplib2-0.22.0.tar.gz", hash = "sha256:d7a10bc5ef5ab08322488bde8c726eeee5c8618723fdb399597ec58f3d82df81"}, -] -httpx = [ - {file = "httpx-0.24.1-py3-none-any.whl", hash = "sha256:06781eb9ac53cde990577af654bd990a4949de37a28bdb4a230d434f3a30b9bd"}, - {file = "httpx-0.24.1.tar.gz", hash = "sha256:5853a43053df830c20f8110c5e69fe44d035d850b2dfe795e196f00fdb774bdd"}, -] -humanfriendly = [ - {file = "humanfriendly-10.0-py2.py3-none-any.whl", hash = "sha256:1697e1a8a8f550fd43c2865cd84542fc175a61dcb779b6fee18cf6b6ccba1477"}, - {file = "humanfriendly-10.0.tar.gz", hash = "sha256:6b0b831ce8f15f7300721aa49829fc4e83921a9a301cc7f606be6686a2288ddc"}, -] -humanize = [ - {file = "humanize-4.8.0-py3-none-any.whl", hash = "sha256:8bc9e2bb9315e61ec06bf690151ae35aeb65651ab091266941edf97c90836404"}, - {file = "humanize-4.8.0.tar.gz", hash = "sha256:9783373bf1eec713a770ecaa7c2d7a7902c98398009dfa3d8a2df91eec9311e8"}, -] -hyperframe = [ - {file = "hyperframe-6.0.1-py3-none-any.whl", hash = "sha256:0ec6bafd80d8ad2195c4f03aacba3a8265e57bc4cff261e802bf39970ed02a15"}, - {file = "hyperframe-6.0.1.tar.gz", hash = "sha256:ae510046231dc8e9ecb1a6586f63d2347bf4c8905914aa84ba585ae85f28a914"}, -] -idna = [ - {file = "idna-3.4-py3-none-any.whl", hash = "sha256:90b77e79eaa3eba6de819a0c442c0b4ceefc341a7a2ab77d7562bf49f425c5c2"}, - {file = "idna-3.4.tar.gz", hash = "sha256:814f528e8dead7d329833b91c5faa87d60bf71824cd12a7530b5526063d02cb4"}, -] -importlib-metadata = [ - {file = "importlib_metadata-6.11.0-py3-none-any.whl", hash = "sha256:f0afba6205ad8f8947c7d338b5342d5db2afbfd82f9cbef7879a9539cc12eb9b"}, - {file = "importlib_metadata-6.11.0.tar.gz", hash = "sha256:1231cf92d825c9e03cfc4da076a16de6422c863558229ea0b22b675657463443"}, -] -importlib-resources = [ - {file = "importlib_resources-6.0.1-py3-none-any.whl", hash = "sha256:134832a506243891221b88b4ae1213327eea96ceb4e407a00d790bb0626f45cf"}, - {file = "importlib_resources-6.0.1.tar.gz", hash = "sha256:4359457e42708462b9626a04657c6208ad799ceb41e5c58c57ffa0e6a098a5d4"}, -] -inflection = [ - {file = "inflection-0.5.1-py2.py3-none-any.whl", hash = "sha256:f38b2b640938a4f35ade69ac3d053042959b62a0f1076a5bbaa1b9526605a8a2"}, - {file = "inflection-0.5.1.tar.gz", hash = "sha256:1a29730d366e996aaacffb2f1f1cb9593dc38e2ddd30c91250c6dde09ea9b417"}, -] -iniconfig = [ - {file = "iniconfig-2.0.0-py3-none-any.whl", hash = "sha256:b6a85871a79d2e3b22d2d1b94ac2824226a63c6b741c88f7ae975f18b6778374"}, - {file = "iniconfig-2.0.0.tar.gz", hash = "sha256:2d91e135bf72d31a410b17c16da610a82cb55f6b0477d1a902134b24a455b8b3"}, -] -isodate = [ - {file = "isodate-0.6.1-py2.py3-none-any.whl", hash = "sha256:0751eece944162659049d35f4f549ed815792b38793f07cf73381c1c87cbed96"}, - {file = "isodate-0.6.1.tar.gz", hash = "sha256:48c5881de7e8b0a0d648cb024c8062dc84e7b840ed81e864c7614fd3c127bde9"}, -] -isort = [ - {file = "isort-5.12.0-py3-none-any.whl", hash = "sha256:f84c2818376e66cf843d497486ea8fed8700b340f308f076c6fb1229dff318b6"}, - {file = "isort-5.12.0.tar.gz", hash = "sha256:8bef7dde241278824a6d83f44a544709b065191b95b6e50894bdc722fcba0504"}, -] -itsdangerous = [ - {file = "itsdangerous-2.1.2-py3-none-any.whl", hash = "sha256:2c2349112351b88699d8d4b6b075022c0808887cb7ad10069318a8b0bc88db44"}, - {file = "itsdangerous-2.1.2.tar.gz", hash = "sha256:5dbbc68b317e5e42f327f9021763545dc3fc3bfe22e6deb96aaf1fc38874156a"}, -] -jaraco-classes = [ - {file = "jaraco.classes-3.3.0-py3-none-any.whl", hash = "sha256:10afa92b6743f25c0cf5f37c6bb6e18e2c5bb84a16527ccfc0040ea377e7aaeb"}, - {file = "jaraco.classes-3.3.0.tar.gz", hash = "sha256:c063dd08e89217cee02c8d5e5ec560f2c8ce6cdc2fcdc2e68f7b2e5547ed3621"}, -] -jeepney = [ - {file = "jeepney-0.8.0-py3-none-any.whl", hash = "sha256:c0a454ad016ca575060802ee4d590dd912e35c122fa04e70306de3d076cce755"}, - {file = "jeepney-0.8.0.tar.gz", hash = "sha256:5efe48d255973902f6badc3ce55e2aa6c5c3b3bc642059ef3a91247bcfcc5806"}, -] -jinja2 = [ - {file = "Jinja2-3.1.2-py3-none-any.whl", hash = "sha256:6088930bfe239f0e6710546ab9c19c9ef35e29792895fed6e6e31a023a182a61"}, - {file = "Jinja2-3.1.2.tar.gz", hash = "sha256:31351a702a408a9e7595a8fc6150fc3f43bb6bf7e319770cbc0db9df9437e852"}, -] -jinxed = [ - {file = "jinxed-1.2.0-py2.py3-none-any.whl", hash = "sha256:cfc2b2e4e3b4326954d546ba6d6b9a7a796ddcb0aef8d03161d005177eb0d48b"}, - {file = "jinxed-1.2.0.tar.gz", hash = "sha256:032acda92d5c57cd216033cbbd53de731e6ed50deb63eb4781336ca55f72cda5"}, -] -jmespath = [ - {file = "jmespath-1.0.1-py3-none-any.whl", hash = "sha256:02e2e4cc71b5bcab88332eebf907519190dd9e6e82107fa7f83b1003a6252980"}, - {file = "jmespath-1.0.1.tar.gz", hash = "sha256:90261b206d6defd58fdd5e85f478bf633a2901798906be2ad389150c5c60edbe"}, -] -jsonpath-ng = [ - {file = "jsonpath-ng-1.5.3.tar.gz", hash = "sha256:a273b182a82c1256daab86a313b937059261b5c5f8c4fa3fc38b882b344dd567"}, - {file = "jsonpath_ng-1.5.3-py2-none-any.whl", hash = "sha256:f75b95dbecb8a0f3b86fd2ead21c2b022c3f5770957492b9b6196ecccfeb10aa"}, - {file = "jsonpath_ng-1.5.3-py3-none-any.whl", hash = "sha256:292a93569d74029ba75ac2dc3d3630fc0e17b2df26119a165fa1d498ca47bf65"}, -] -jsonschema = [ - {file = "jsonschema-4.19.0-py3-none-any.whl", hash = "sha256:043dc26a3845ff09d20e4420d6012a9c91c9aa8999fa184e7efcfeccb41e32cb"}, - {file = "jsonschema-4.19.0.tar.gz", hash = "sha256:6e1e7569ac13be8139b2dd2c21a55d350066ee3f80df06c608b398cdc6f30e8f"}, -] -jsonschema-specifications = [ - {file = "jsonschema_specifications-2023.7.1-py3-none-any.whl", hash = "sha256:05adf340b659828a004220a9613be00fa3f223f2b82002e273dee62fd50524b1"}, - {file = "jsonschema_specifications-2023.7.1.tar.gz", hash = "sha256:c91a50404e88a1f6ba40636778e2ee08f6e24c5613fe4c53ac24578a5a7f72bb"}, -] -keyring = [ - {file = "keyring-24.2.0-py3-none-any.whl", hash = "sha256:4901caaf597bfd3bbd78c9a0c7c4c29fcd8310dab2cffefe749e916b6527acd6"}, - {file = "keyring-24.2.0.tar.gz", hash = "sha256:ca0746a19ec421219f4d713f848fa297a661a8a8c1504867e55bfb5e09091509"}, -] -lazy-object-proxy = [ - {file = "lazy-object-proxy-1.9.0.tar.gz", hash = "sha256:659fb5809fa4629b8a1ac5106f669cfc7bef26fbb389dda53b3e010d1ac4ebae"}, - {file = "lazy_object_proxy-1.9.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:b40387277b0ed2d0602b8293b94d7257e17d1479e257b4de114ea11a8cb7f2d7"}, - {file = "lazy_object_proxy-1.9.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e8c6cfb338b133fbdbc5cfaa10fe3c6aeea827db80c978dbd13bc9dd8526b7d4"}, - {file = "lazy_object_proxy-1.9.0-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:721532711daa7db0d8b779b0bb0318fa87af1c10d7fe5e52ef30f8eff254d0cd"}, - {file = "lazy_object_proxy-1.9.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:66a3de4a3ec06cd8af3f61b8e1ec67614fbb7c995d02fa224813cb7afefee701"}, - {file = "lazy_object_proxy-1.9.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:1aa3de4088c89a1b69f8ec0dcc169aa725b0ff017899ac568fe44ddc1396df46"}, - {file = "lazy_object_proxy-1.9.0-cp310-cp310-win32.whl", hash = "sha256:f0705c376533ed2a9e5e97aacdbfe04cecd71e0aa84c7c0595d02ef93b6e4455"}, - {file = "lazy_object_proxy-1.9.0-cp310-cp310-win_amd64.whl", hash = "sha256:ea806fd4c37bf7e7ad82537b0757999264d5f70c45468447bb2b91afdbe73a6e"}, - {file = "lazy_object_proxy-1.9.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:946d27deaff6cf8452ed0dba83ba38839a87f4f7a9732e8f9fd4107b21e6ff07"}, - {file = "lazy_object_proxy-1.9.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:79a31b086e7e68b24b99b23d57723ef7e2c6d81ed21007b6281ebcd1688acb0a"}, - {file = "lazy_object_proxy-1.9.0-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f699ac1c768270c9e384e4cbd268d6e67aebcfae6cd623b4d7c3bfde5a35db59"}, - {file = "lazy_object_proxy-1.9.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:bfb38f9ffb53b942f2b5954e0f610f1e721ccebe9cce9025a38c8ccf4a5183a4"}, - {file = "lazy_object_proxy-1.9.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:189bbd5d41ae7a498397287c408617fe5c48633e7755287b21d741f7db2706a9"}, - {file = "lazy_object_proxy-1.9.0-cp311-cp311-win32.whl", hash = "sha256:81fc4d08b062b535d95c9ea70dbe8a335c45c04029878e62d744bdced5141586"}, - {file = "lazy_object_proxy-1.9.0-cp311-cp311-win_amd64.whl", hash = "sha256:f2457189d8257dd41ae9b434ba33298aec198e30adf2dcdaaa3a28b9994f6adb"}, - {file = "lazy_object_proxy-1.9.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:d9e25ef10a39e8afe59a5c348a4dbf29b4868ab76269f81ce1674494e2565a6e"}, - {file = "lazy_object_proxy-1.9.0-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cbf9b082426036e19c6924a9ce90c740a9861e2bdc27a4834fd0a910742ac1e8"}, - {file = "lazy_object_proxy-1.9.0-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9f5fa4a61ce2438267163891961cfd5e32ec97a2c444e5b842d574251ade27d2"}, - {file = "lazy_object_proxy-1.9.0-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:8fa02eaab317b1e9e03f69aab1f91e120e7899b392c4fc19807a8278a07a97e8"}, - {file = "lazy_object_proxy-1.9.0-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:e7c21c95cae3c05c14aafffe2865bbd5e377cfc1348c4f7751d9dc9a48ca4bda"}, - {file = "lazy_object_proxy-1.9.0-cp37-cp37m-win32.whl", hash = "sha256:f12ad7126ae0c98d601a7ee504c1122bcef553d1d5e0c3bfa77b16b3968d2734"}, - {file = "lazy_object_proxy-1.9.0-cp37-cp37m-win_amd64.whl", hash = "sha256:edd20c5a55acb67c7ed471fa2b5fb66cb17f61430b7a6b9c3b4a1e40293b1671"}, - {file = "lazy_object_proxy-1.9.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:2d0daa332786cf3bb49e10dc6a17a52f6a8f9601b4cf5c295a4f85854d61de63"}, - {file = "lazy_object_proxy-1.9.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9cd077f3d04a58e83d04b20e334f678c2b0ff9879b9375ed107d5d07ff160171"}, - {file = "lazy_object_proxy-1.9.0-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:660c94ea760b3ce47d1855a30984c78327500493d396eac4dfd8bd82041b22be"}, - {file = "lazy_object_proxy-1.9.0-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:212774e4dfa851e74d393a2370871e174d7ff0ebc980907723bb67d25c8a7c30"}, - {file = "lazy_object_proxy-1.9.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:f0117049dd1d5635bbff65444496c90e0baa48ea405125c088e93d9cf4525b11"}, - {file = "lazy_object_proxy-1.9.0-cp38-cp38-win32.whl", hash = "sha256:0a891e4e41b54fd5b8313b96399f8b0e173bbbfc03c7631f01efbe29bb0bcf82"}, - {file = "lazy_object_proxy-1.9.0-cp38-cp38-win_amd64.whl", hash = "sha256:9990d8e71b9f6488e91ad25f322898c136b008d87bf852ff65391b004da5e17b"}, - {file = "lazy_object_proxy-1.9.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:9e7551208b2aded9c1447453ee366f1c4070602b3d932ace044715d89666899b"}, - {file = "lazy_object_proxy-1.9.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5f83ac4d83ef0ab017683d715ed356e30dd48a93746309c8f3517e1287523ef4"}, - {file = "lazy_object_proxy-1.9.0-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7322c3d6f1766d4ef1e51a465f47955f1e8123caee67dd641e67d539a534d006"}, - {file = "lazy_object_proxy-1.9.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:18b78ec83edbbeb69efdc0e9c1cb41a3b1b1ed11ddd8ded602464c3fc6020494"}, - {file = "lazy_object_proxy-1.9.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:09763491ce220c0299688940f8dc2c5d05fd1f45af1e42e636b2e8b2303e4382"}, - {file = "lazy_object_proxy-1.9.0-cp39-cp39-win32.whl", hash = "sha256:9090d8e53235aa280fc9239a86ae3ea8ac58eff66a705fa6aa2ec4968b95c821"}, - {file = "lazy_object_proxy-1.9.0-cp39-cp39-win_amd64.whl", hash = "sha256:db1c1722726f47e10e0b5fdbf15ac3b8adb58c091d12b3ab713965795036985f"}, -] -leather = [ - {file = "leather-0.3.4-py2.py3-none-any.whl", hash = "sha256:5e741daee96e9f1e9e06081b8c8a10c4ac199301a0564cdd99b09df15b4603d2"}, - {file = "leather-0.3.4.tar.gz", hash = "sha256:b43e21c8fa46b2679de8449f4d953c06418666dc058ce41055ee8a8d3bb40918"}, -] -limits = [ - {file = "limits-3.6.0-py3-none-any.whl", hash = "sha256:32fe29a398352c71bc43d53773117d47e22c5ea4200aef28d3f5fdee10334cd7"}, - {file = "limits-3.6.0.tar.gz", hash = "sha256:57a9c69fd37ad1e4fa3886dff8d035227e1f6af87f47e9118627e72cf1ced3bf"}, -] -linkify-it-py = [ - {file = "linkify-it-py-2.0.2.tar.gz", hash = "sha256:19f3060727842c254c808e99d465c80c49d2c7306788140987a1a7a29b0d6ad2"}, - {file = "linkify_it_py-2.0.2-py3-none-any.whl", hash = "sha256:a3a24428f6c96f27370d7fe61d2ac0be09017be5190d68d8658233171f1b6541"}, -] -lockfile = [ - {file = "lockfile-0.12.2-py2.py3-none-any.whl", hash = "sha256:6c3cb24f344923d30b2785d5ad75182c8ea7ac1b6171b08657258ec7429d50fa"}, - {file = "lockfile-0.12.2.tar.gz", hash = "sha256:6aed02de03cba24efabcd600b30540140634fc06cfa603822d508d5361e9f799"}, -] -logbook = [ - {file = "Logbook-1.5.3-cp27-cp27m-win32.whl", hash = "sha256:56ee54c11df3377314cedcd6507638f015b4b88c0238c2e01b5eb44fd3a6ad1b"}, - {file = "Logbook-1.5.3-cp27-cp27m-win_amd64.whl", hash = "sha256:2dc85f1510533fddb481e97677bb7bca913560862734c0b3b289bfed04f78c92"}, - {file = "Logbook-1.5.3-cp35-cp35m-win32.whl", hash = "sha256:94e2e11ff3c2304b0d09a36c6208e5ae756eb948b210e5cbd63cd8d27f911542"}, - {file = "Logbook-1.5.3-cp35-cp35m-win_amd64.whl", hash = "sha256:97fee1bd9605f76335b169430ed65e15e457a844b2121bd1d90a08cf7e30aba0"}, - {file = "Logbook-1.5.3-cp36-cp36m-win32.whl", hash = "sha256:7c533eb728b3d220b1b5414ba4635292d149d79f74f6973b4aa744c850ca944a"}, - {file = "Logbook-1.5.3-cp36-cp36m-win_amd64.whl", hash = "sha256:e18f7422214b1cf0240c56f884fd9c9b4ff9d0da2eabca9abccba56df7222f66"}, - {file = "Logbook-1.5.3-cp37-cp37m-win32.whl", hash = "sha256:8f76a2e7b1f72595f753228732f81ce342caf03babc3fed6bbdcf366f2f20f18"}, - {file = "Logbook-1.5.3-cp37-cp37m-win_amd64.whl", hash = "sha256:0cf2cdbfb65a03b5987d19109dacad13417809dcf697f66e1a7084fb21744ea9"}, - {file = "Logbook-1.5.3.tar.gz", hash = "sha256:66f454ada0f56eae43066f604a222b09893f98c1adc18df169710761b8f32fe8"}, -] -lxml = [ - {file = "lxml-4.9.3-cp27-cp27m-macosx_11_0_x86_64.whl", hash = "sha256:b0a545b46b526d418eb91754565ba5b63b1c0b12f9bd2f808c852d9b4b2f9b5c"}, - {file = "lxml-4.9.3-cp27-cp27m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:075b731ddd9e7f68ad24c635374211376aa05a281673ede86cbe1d1b3455279d"}, - {file = "lxml-4.9.3-cp27-cp27m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:1e224d5755dba2f4a9498e150c43792392ac9b5380aa1b845f98a1618c94eeef"}, - {file = "lxml-4.9.3-cp27-cp27m-win32.whl", hash = "sha256:2c74524e179f2ad6d2a4f7caf70e2d96639c0954c943ad601a9e146c76408ed7"}, - {file = "lxml-4.9.3-cp27-cp27m-win_amd64.whl", hash = "sha256:4f1026bc732b6a7f96369f7bfe1a4f2290fb34dce00d8644bc3036fb351a4ca1"}, - {file = "lxml-4.9.3-cp27-cp27mu-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:c0781a98ff5e6586926293e59480b64ddd46282953203c76ae15dbbbf302e8bb"}, - {file = "lxml-4.9.3-cp27-cp27mu-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:cef2502e7e8a96fe5ad686d60b49e1ab03e438bd9123987994528febd569868e"}, - {file = "lxml-4.9.3-cp310-cp310-macosx_11_0_x86_64.whl", hash = "sha256:b86164d2cff4d3aaa1f04a14685cbc072efd0b4f99ca5708b2ad1b9b5988a991"}, - {file = "lxml-4.9.3-cp310-cp310-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_24_i686.whl", hash = "sha256:42871176e7896d5d45138f6d28751053c711ed4d48d8e30b498da155af39aebd"}, - {file = "lxml-4.9.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:ae8b9c6deb1e634ba4f1930eb67ef6e6bf6a44b6eb5ad605642b2d6d5ed9ce3c"}, - {file = "lxml-4.9.3-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:411007c0d88188d9f621b11d252cce90c4a2d1a49db6c068e3c16422f306eab8"}, - {file = "lxml-4.9.3-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:cd47b4a0d41d2afa3e58e5bf1f62069255aa2fd6ff5ee41604418ca925911d76"}, - {file = "lxml-4.9.3-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:0e2cb47860da1f7e9a5256254b74ae331687b9672dfa780eed355c4c9c3dbd23"}, - {file = "lxml-4.9.3-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:1247694b26342a7bf47c02e513d32225ededd18045264d40758abeb3c838a51f"}, - {file = "lxml-4.9.3-cp310-cp310-win32.whl", hash = "sha256:cdb650fc86227eba20de1a29d4b2c1bfe139dc75a0669270033cb2ea3d391b85"}, - {file = "lxml-4.9.3-cp310-cp310-win_amd64.whl", hash = "sha256:97047f0d25cd4bcae81f9ec9dc290ca3e15927c192df17331b53bebe0e3ff96d"}, - {file = "lxml-4.9.3-cp311-cp311-macosx_11_0_universal2.whl", hash = "sha256:1f447ea5429b54f9582d4b955f5f1985f278ce5cf169f72eea8afd9502973dd5"}, - {file = "lxml-4.9.3-cp311-cp311-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_24_i686.whl", hash = "sha256:57d6ba0ca2b0c462f339640d22882acc711de224d769edf29962b09f77129cbf"}, - {file = "lxml-4.9.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:9767e79108424fb6c3edf8f81e6730666a50feb01a328f4a016464a5893f835a"}, - {file = "lxml-4.9.3-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:71c52db65e4b56b8ddc5bb89fb2e66c558ed9d1a74a45ceb7dcb20c191c3df2f"}, - {file = "lxml-4.9.3-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:d73d8ecf8ecf10a3bd007f2192725a34bd62898e8da27eb9d32a58084f93962b"}, - {file = "lxml-4.9.3-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:0a3d3487f07c1d7f150894c238299934a2a074ef590b583103a45002035be120"}, - {file = "lxml-4.9.3-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:9e28c51fa0ce5674be9f560c6761c1b441631901993f76700b1b30ca6c8378d6"}, - {file = "lxml-4.9.3-cp311-cp311-win32.whl", hash = "sha256:0bfd0767c5c1de2551a120673b72e5d4b628737cb05414f03c3277bf9bed3305"}, - {file = "lxml-4.9.3-cp311-cp311-win_amd64.whl", hash = "sha256:25f32acefac14ef7bd53e4218fe93b804ef6f6b92ffdb4322bb6d49d94cad2bc"}, - {file = "lxml-4.9.3-cp312-cp312-macosx_11_0_universal2.whl", hash = "sha256:d3ff32724f98fbbbfa9f49d82852b159e9784d6094983d9a8b7f2ddaebb063d4"}, - {file = "lxml-4.9.3-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:48d6ed886b343d11493129e019da91d4039826794a3e3027321c56d9e71505be"}, - {file = "lxml-4.9.3-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:9a92d3faef50658dd2c5470af249985782bf754c4e18e15afb67d3ab06233f13"}, - {file = "lxml-4.9.3-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:b4e4bc18382088514ebde9328da057775055940a1f2e18f6ad2d78aa0f3ec5b9"}, - {file = "lxml-4.9.3-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:fc9b106a1bf918db68619fdcd6d5ad4f972fdd19c01d19bdb6bf63f3589a9ec5"}, - {file = "lxml-4.9.3-cp312-cp312-win_amd64.whl", hash = "sha256:d37017287a7adb6ab77e1c5bee9bcf9660f90ff445042b790402a654d2ad81d8"}, - {file = "lxml-4.9.3-cp35-cp35m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:56dc1f1ebccc656d1b3ed288f11e27172a01503fc016bcabdcbc0978b19352b7"}, - {file = "lxml-4.9.3-cp35-cp35m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:578695735c5a3f51569810dfebd05dd6f888147a34f0f98d4bb27e92b76e05c2"}, - {file = "lxml-4.9.3-cp35-cp35m-win32.whl", hash = "sha256:704f61ba8c1283c71b16135caf697557f5ecf3e74d9e453233e4771d68a1f42d"}, - {file = "lxml-4.9.3-cp35-cp35m-win_amd64.whl", hash = "sha256:c41bfca0bd3532d53d16fd34d20806d5c2b1ace22a2f2e4c0008570bf2c58833"}, - {file = "lxml-4.9.3-cp36-cp36m-macosx_11_0_x86_64.whl", hash = "sha256:64f479d719dc9f4c813ad9bb6b28f8390360660b73b2e4beb4cb0ae7104f1c12"}, - {file = "lxml-4.9.3-cp36-cp36m-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_24_i686.whl", hash = "sha256:dd708cf4ee4408cf46a48b108fb9427bfa00b9b85812a9262b5c668af2533ea5"}, - {file = "lxml-4.9.3-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5c31c7462abdf8f2ac0577d9f05279727e698f97ecbb02f17939ea99ae8daa98"}, - {file = "lxml-4.9.3-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:e3cd95e10c2610c360154afdc2f1480aea394f4a4f1ea0a5eacce49640c9b190"}, - {file = "lxml-4.9.3-cp36-cp36m-manylinux_2_28_x86_64.whl", hash = "sha256:4930be26af26ac545c3dffb662521d4e6268352866956672231887d18f0eaab2"}, - {file = "lxml-4.9.3-cp36-cp36m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:4aec80cde9197340bc353d2768e2a75f5f60bacda2bab72ab1dc499589b3878c"}, - {file = "lxml-4.9.3-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:14e019fd83b831b2e61baed40cab76222139926b1fb5ed0e79225bc0cae14584"}, - {file = "lxml-4.9.3-cp36-cp36m-musllinux_1_1_aarch64.whl", hash = "sha256:0c0850c8b02c298d3c7006b23e98249515ac57430e16a166873fc47a5d549287"}, - {file = "lxml-4.9.3-cp36-cp36m-musllinux_1_1_x86_64.whl", hash = "sha256:aca086dc5f9ef98c512bac8efea4483eb84abbf926eaeedf7b91479feb092458"}, - {file = "lxml-4.9.3-cp36-cp36m-win32.whl", hash = "sha256:50baa9c1c47efcaef189f31e3d00d697c6d4afda5c3cde0302d063492ff9b477"}, - {file = "lxml-4.9.3-cp36-cp36m-win_amd64.whl", hash = "sha256:bef4e656f7d98aaa3486d2627e7d2df1157d7e88e7efd43a65aa5dd4714916cf"}, - {file = "lxml-4.9.3-cp37-cp37m-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_24_i686.whl", hash = "sha256:46f409a2d60f634fe550f7133ed30ad5321ae2e6630f13657fb9479506b00601"}, - {file = "lxml-4.9.3-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_24_aarch64.whl", hash = "sha256:4c28a9144688aef80d6ea666c809b4b0e50010a2aca784c97f5e6bf143d9f129"}, - {file = "lxml-4.9.3-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:141f1d1a9b663c679dc524af3ea1773e618907e96075262726c7612c02b149a4"}, - {file = "lxml-4.9.3-cp37-cp37m-manylinux_2_28_x86_64.whl", hash = "sha256:53ace1c1fd5a74ef662f844a0413446c0629d151055340e9893da958a374f70d"}, - {file = "lxml-4.9.3-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:17a753023436a18e27dd7769e798ce302963c236bc4114ceee5b25c18c52c693"}, - {file = "lxml-4.9.3-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:7d298a1bd60c067ea75d9f684f5f3992c9d6766fadbc0bcedd39750bf344c2f4"}, - {file = "lxml-4.9.3-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:081d32421db5df44c41b7f08a334a090a545c54ba977e47fd7cc2deece78809a"}, - {file = "lxml-4.9.3-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:23eed6d7b1a3336ad92d8e39d4bfe09073c31bfe502f20ca5116b2a334f8ec02"}, - {file = "lxml-4.9.3-cp37-cp37m-win32.whl", hash = "sha256:1509dd12b773c02acd154582088820893109f6ca27ef7291b003d0e81666109f"}, - {file = "lxml-4.9.3-cp37-cp37m-win_amd64.whl", hash = "sha256:120fa9349a24c7043854c53cae8cec227e1f79195a7493e09e0c12e29f918e52"}, - {file = "lxml-4.9.3-cp38-cp38-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_24_i686.whl", hash = "sha256:4d2d1edbca80b510443f51afd8496be95529db04a509bc8faee49c7b0fb6d2cc"}, - {file = "lxml-4.9.3-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_24_aarch64.whl", hash = "sha256:8d7e43bd40f65f7d97ad8ef5c9b1778943d02f04febef12def25f7583d19baac"}, - {file = "lxml-4.9.3-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:71d66ee82e7417828af6ecd7db817913cb0cf9d4e61aa0ac1fde0583d84358db"}, - {file = "lxml-4.9.3-cp38-cp38-manylinux_2_28_x86_64.whl", hash = "sha256:6fc3c450eaa0b56f815c7b62f2b7fba7266c4779adcf1cece9e6deb1de7305ce"}, - {file = "lxml-4.9.3-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:65299ea57d82fb91c7f019300d24050c4ddeb7c5a190e076b5f48a2b43d19c42"}, - {file = "lxml-4.9.3-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:eadfbbbfb41b44034a4c757fd5d70baccd43296fb894dba0295606a7cf3124aa"}, - {file = "lxml-4.9.3-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:3e9bdd30efde2b9ccfa9cb5768ba04fe71b018a25ea093379c857c9dad262c40"}, - {file = "lxml-4.9.3-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:fcdd00edfd0a3001e0181eab3e63bd5c74ad3e67152c84f93f13769a40e073a7"}, - {file = "lxml-4.9.3-cp38-cp38-win32.whl", hash = "sha256:57aba1bbdf450b726d58b2aea5fe47c7875f5afb2c4a23784ed78f19a0462574"}, - {file = "lxml-4.9.3-cp38-cp38-win_amd64.whl", hash = "sha256:92af161ecbdb2883c4593d5ed4815ea71b31fafd7fd05789b23100d081ecac96"}, - {file = "lxml-4.9.3-cp39-cp39-macosx_11_0_x86_64.whl", hash = "sha256:9bb6ad405121241e99a86efff22d3ef469024ce22875a7ae045896ad23ba2340"}, - {file = "lxml-4.9.3-cp39-cp39-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_24_i686.whl", hash = "sha256:8ed74706b26ad100433da4b9d807eae371efaa266ffc3e9191ea436087a9d6a7"}, - {file = "lxml-4.9.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:fbf521479bcac1e25a663df882c46a641a9bff6b56dc8b0fafaebd2f66fb231b"}, - {file = "lxml-4.9.3-cp39-cp39-manylinux_2_28_aarch64.whl", hash = "sha256:303bf1edce6ced16bf67a18a1cf8339d0db79577eec5d9a6d4a80f0fb10aa2da"}, - {file = "lxml-4.9.3-cp39-cp39-manylinux_2_28_x86_64.whl", hash = "sha256:5515edd2a6d1a5a70bfcdee23b42ec33425e405c5b351478ab7dc9347228f96e"}, - {file = "lxml-4.9.3-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:690dafd0b187ed38583a648076865d8c229661ed20e48f2335d68e2cf7dc829d"}, - {file = "lxml-4.9.3-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:b6420a005548ad52154c8ceab4a1290ff78d757f9e5cbc68f8c77089acd3c432"}, - {file = "lxml-4.9.3-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:bb3bb49c7a6ad9d981d734ef7c7193bc349ac338776a0360cc671eaee89bcf69"}, - {file = "lxml-4.9.3-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:d27be7405547d1f958b60837dc4c1007da90b8b23f54ba1f8b728c78fdb19d50"}, - {file = "lxml-4.9.3-cp39-cp39-win32.whl", hash = "sha256:8df133a2ea5e74eef5e8fc6f19b9e085f758768a16e9877a60aec455ed2609b2"}, - {file = "lxml-4.9.3-cp39-cp39-win_amd64.whl", hash = "sha256:4dd9a263e845a72eacb60d12401e37c616438ea2e5442885f65082c276dfb2b2"}, - {file = "lxml-4.9.3-pp310-pypy310_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:6689a3d7fd13dc687e9102a27e98ef33730ac4fe37795d5036d18b4d527abd35"}, - {file = "lxml-4.9.3-pp37-pypy37_pp73-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_24_i686.whl", hash = "sha256:f6bdac493b949141b733c5345b6ba8f87a226029cbabc7e9e121a413e49441e0"}, - {file = "lxml-4.9.3-pp37-pypy37_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:05186a0f1346ae12553d66df1cfce6f251589fea3ad3da4f3ef4e34b2d58c6a3"}, - {file = "lxml-4.9.3-pp37-pypy37_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:c2006f5c8d28dee289f7020f721354362fa304acbaaf9745751ac4006650254b"}, - {file = "lxml-4.9.3-pp38-pypy38_pp73-macosx_11_0_x86_64.whl", hash = "sha256:5c245b783db29c4e4fbbbfc9c5a78be496c9fea25517f90606aa1f6b2b3d5f7b"}, - {file = "lxml-4.9.3-pp38-pypy38_pp73-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_24_i686.whl", hash = "sha256:4fb960a632a49f2f089d522f70496640fdf1218f1243889da3822e0a9f5f3ba7"}, - {file = "lxml-4.9.3-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:50670615eaf97227d5dc60de2dc99fb134a7130d310d783314e7724bf163f75d"}, - {file = "lxml-4.9.3-pp38-pypy38_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:9719fe17307a9e814580af1f5c6e05ca593b12fb7e44fe62450a5384dbf61b4b"}, - {file = "lxml-4.9.3-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:3331bece23c9ee066e0fb3f96c61322b9e0f54d775fccefff4c38ca488de283a"}, - {file = "lxml-4.9.3-pp39-pypy39_pp73-macosx_11_0_x86_64.whl", hash = "sha256:ed667f49b11360951e201453fc3967344d0d0263aa415e1619e85ae7fd17b4e0"}, - {file = "lxml-4.9.3-pp39-pypy39_pp73-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_24_i686.whl", hash = "sha256:8b77946fd508cbf0fccd8e400a7f71d4ac0e1595812e66025bac475a8e811694"}, - {file = "lxml-4.9.3-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:e4da8ca0c0c0aea88fd46be8e44bd49716772358d648cce45fe387f7b92374a7"}, - {file = "lxml-4.9.3-pp39-pypy39_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:fe4bda6bd4340caa6e5cf95e73f8fea5c4bfc55763dd42f1b50a94c1b4a2fbd4"}, - {file = "lxml-4.9.3-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:f3df3db1d336b9356dd3112eae5f5c2b8b377f3bc826848567f10bfddfee77e9"}, - {file = "lxml-4.9.3.tar.gz", hash = "sha256:48628bd53a426c9eb9bc066a923acaa0878d1e86129fd5359aee99285f4eed9c"}, -] -lz4 = [ - {file = "lz4-4.3.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:b891880c187e96339474af2a3b2bfb11a8e4732ff5034be919aa9029484cd201"}, - {file = "lz4-4.3.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:222a7e35137d7539c9c33bb53fcbb26510c5748779364014235afc62b0ec797f"}, - {file = "lz4-4.3.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f76176492ff082657ada0d0f10c794b6da5800249ef1692b35cf49b1e93e8ef7"}, - {file = "lz4-4.3.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f1d18718f9d78182c6b60f568c9a9cec8a7204d7cb6fad4e511a2ef279e4cb05"}, - {file = "lz4-4.3.3-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6cdc60e21ec70266947a48839b437d46025076eb4b12c76bd47f8e5eb8a75dcc"}, - {file = "lz4-4.3.3-cp310-cp310-win32.whl", hash = "sha256:c81703b12475da73a5d66618856d04b1307e43428a7e59d98cfe5a5d608a74c6"}, - {file = "lz4-4.3.3-cp310-cp310-win_amd64.whl", hash = "sha256:43cf03059c0f941b772c8aeb42a0813d68d7081c009542301637e5782f8a33e2"}, - {file = "lz4-4.3.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:30e8c20b8857adef7be045c65f47ab1e2c4fabba86a9fa9a997d7674a31ea6b6"}, - {file = "lz4-4.3.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:2f7b1839f795315e480fb87d9bc60b186a98e3e5d17203c6e757611ef7dcef61"}, - {file = "lz4-4.3.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:edfd858985c23523f4e5a7526ca6ee65ff930207a7ec8a8f57a01eae506aaee7"}, - {file = "lz4-4.3.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0e9c410b11a31dbdc94c05ac3c480cb4b222460faf9231f12538d0074e56c563"}, - {file = "lz4-4.3.3-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d2507ee9c99dbddd191c86f0e0c8b724c76d26b0602db9ea23232304382e1f21"}, - {file = "lz4-4.3.3-cp311-cp311-win32.whl", hash = "sha256:f180904f33bdd1e92967923a43c22899e303906d19b2cf8bb547db6653ea6e7d"}, - {file = "lz4-4.3.3-cp311-cp311-win_amd64.whl", hash = "sha256:b14d948e6dce389f9a7afc666d60dd1e35fa2138a8ec5306d30cd2e30d36b40c"}, - {file = "lz4-4.3.3-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:e36cd7b9d4d920d3bfc2369840da506fa68258f7bb176b8743189793c055e43d"}, - {file = "lz4-4.3.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:31ea4be9d0059c00b2572d700bf2c1bc82f241f2c3282034a759c9a4d6ca4dc2"}, - {file = "lz4-4.3.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:33c9a6fd20767ccaf70649982f8f3eeb0884035c150c0b818ea660152cf3c809"}, - {file = "lz4-4.3.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bca8fccc15e3add173da91be8f34121578dc777711ffd98d399be35487c934bf"}, - {file = "lz4-4.3.3-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e7d84b479ddf39fe3ea05387f10b779155fc0990125f4fb35d636114e1c63a2e"}, - {file = "lz4-4.3.3-cp312-cp312-win32.whl", hash = "sha256:337cb94488a1b060ef1685187d6ad4ba8bc61d26d631d7ba909ee984ea736be1"}, - {file = "lz4-4.3.3-cp312-cp312-win_amd64.whl", hash = "sha256:5d35533bf2cee56f38ced91f766cd0038b6abf46f438a80d50c52750088be93f"}, - {file = "lz4-4.3.3-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:363ab65bf31338eb364062a15f302fc0fab0a49426051429866d71c793c23394"}, - {file = "lz4-4.3.3-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:0a136e44a16fc98b1abc404fbabf7f1fada2bdab6a7e970974fb81cf55b636d0"}, - {file = "lz4-4.3.3-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:abc197e4aca8b63f5ae200af03eb95fb4b5055a8f990079b5bdf042f568469dd"}, - {file = "lz4-4.3.3-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:56f4fe9c6327adb97406f27a66420b22ce02d71a5c365c48d6b656b4aaeb7775"}, - {file = "lz4-4.3.3-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f0e822cd7644995d9ba248cb4b67859701748a93e2ab7fc9bc18c599a52e4604"}, - {file = "lz4-4.3.3-cp38-cp38-win32.whl", hash = "sha256:24b3206de56b7a537eda3a8123c644a2b7bf111f0af53bc14bed90ce5562d1aa"}, - {file = "lz4-4.3.3-cp38-cp38-win_amd64.whl", hash = "sha256:b47839b53956e2737229d70714f1d75f33e8ac26e52c267f0197b3189ca6de24"}, - {file = "lz4-4.3.3-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:6756212507405f270b66b3ff7f564618de0606395c0fe10a7ae2ffcbbe0b1fba"}, - {file = "lz4-4.3.3-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:ee9ff50557a942d187ec85462bb0960207e7ec5b19b3b48949263993771c6205"}, - {file = "lz4-4.3.3-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2b901c7784caac9a1ded4555258207d9e9697e746cc8532129f150ffe1f6ba0d"}, - {file = "lz4-4.3.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b6d9ec061b9eca86e4dcc003d93334b95d53909afd5a32c6e4f222157b50c071"}, - {file = "lz4-4.3.3-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f4c7bf687303ca47d69f9f0133274958fd672efaa33fb5bcde467862d6c621f0"}, - {file = "lz4-4.3.3-cp39-cp39-win32.whl", hash = "sha256:054b4631a355606e99a42396f5db4d22046a3397ffc3269a348ec41eaebd69d2"}, - {file = "lz4-4.3.3-cp39-cp39-win_amd64.whl", hash = "sha256:eac9af361e0d98335a02ff12fb56caeb7ea1196cf1a49dbf6f17828a131da807"}, - {file = "lz4-4.3.3.tar.gz", hash = "sha256:01fe674ef2889dbb9899d8a67361e0c4a2c833af5aeb37dd505727cf5d2a131e"}, -] -makefun = [ - {file = "makefun-1.15.1-py2.py3-none-any.whl", hash = "sha256:a63cfc7b47a539c76d97bd4fdb833c7d0461e759fd1225f580cb4be6200294d4"}, - {file = "makefun-1.15.1.tar.gz", hash = "sha256:40b0f118b6ded0d8d78c78f1eb679b8b6b2462e3c1b3e05fb1b2da8cd46b48a5"}, -] -mako = [ - {file = "Mako-1.2.4-py3-none-any.whl", hash = "sha256:c97c79c018b9165ac9922ae4f32da095ffd3c4e6872b45eded42926deea46818"}, - {file = "Mako-1.2.4.tar.gz", hash = "sha256:d60a3903dc3bb01a18ad6a89cdbe2e4eadc69c0bc8ef1e3773ba53d44c3f7a34"}, -] -markdown = [ - {file = "Markdown-3.4.4-py3-none-any.whl", hash = "sha256:a4c1b65c0957b4bd9e7d86ddc7b3c9868fb9670660f6f99f6d1bca8954d5a941"}, - {file = "Markdown-3.4.4.tar.gz", hash = "sha256:225c6123522495d4119a90b3a3ba31a1e87a70369e03f14799ea9c0d7183a3d6"}, -] -markdown-it-py = [ - {file = "markdown-it-py-3.0.0.tar.gz", hash = "sha256:e3f60a94fa066dc52ec76661e37c851cb232d92f9886b15cb560aaada2df8feb"}, - {file = "markdown_it_py-3.0.0-py3-none-any.whl", hash = "sha256:355216845c60bd96232cd8d8c40e8f9765cc86f46880e43a8fd22dc1a1a8cab1"}, -] -markupsafe = [ - {file = "MarkupSafe-2.1.3-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:cd0f502fe016460680cd20aaa5a76d241d6f35a1c3350c474bac1273803893fa"}, - {file = "MarkupSafe-2.1.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:e09031c87a1e51556fdcb46e5bd4f59dfb743061cf93c4d6831bf894f125eb57"}, - {file = "MarkupSafe-2.1.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:68e78619a61ecf91e76aa3e6e8e33fc4894a2bebe93410754bd28fce0a8a4f9f"}, - {file = "MarkupSafe-2.1.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:65c1a9bcdadc6c28eecee2c119465aebff8f7a584dd719facdd9e825ec61ab52"}, - {file = "MarkupSafe-2.1.3-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:525808b8019e36eb524b8c68acdd63a37e75714eac50e988180b169d64480a00"}, - {file = "MarkupSafe-2.1.3-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:962f82a3086483f5e5f64dbad880d31038b698494799b097bc59c2edf392fce6"}, - {file = "MarkupSafe-2.1.3-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:aa7bd130efab1c280bed0f45501b7c8795f9fdbeb02e965371bbef3523627779"}, - {file = "MarkupSafe-2.1.3-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:c9c804664ebe8f83a211cace637506669e7890fec1b4195b505c214e50dd4eb7"}, - {file = "MarkupSafe-2.1.3-cp310-cp310-win32.whl", hash = "sha256:10bbfe99883db80bdbaff2dcf681dfc6533a614f700da1287707e8a5d78a8431"}, - {file = "MarkupSafe-2.1.3-cp310-cp310-win_amd64.whl", hash = "sha256:1577735524cdad32f9f694208aa75e422adba74f1baee7551620e43a3141f559"}, - {file = "MarkupSafe-2.1.3-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:ad9e82fb8f09ade1c3e1b996a6337afac2b8b9e365f926f5a61aacc71adc5b3c"}, - {file = "MarkupSafe-2.1.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:3c0fae6c3be832a0a0473ac912810b2877c8cb9d76ca48de1ed31e1c68386575"}, - {file = "MarkupSafe-2.1.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b076b6226fb84157e3f7c971a47ff3a679d837cf338547532ab866c57930dbee"}, - {file = "MarkupSafe-2.1.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bfce63a9e7834b12b87c64d6b155fdd9b3b96191b6bd334bf37db7ff1fe457f2"}, - {file = "MarkupSafe-2.1.3-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:338ae27d6b8745585f87218a3f23f1512dbf52c26c28e322dbe54bcede54ccb9"}, - {file = "MarkupSafe-2.1.3-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:e4dd52d80b8c83fdce44e12478ad2e85c64ea965e75d66dbeafb0a3e77308fcc"}, - {file = "MarkupSafe-2.1.3-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:df0be2b576a7abbf737b1575f048c23fb1d769f267ec4358296f31c2479db8f9"}, - {file = "MarkupSafe-2.1.3-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:5bbe06f8eeafd38e5d0a4894ffec89378b6c6a625ff57e3028921f8ff59318ac"}, - {file = "MarkupSafe-2.1.3-cp311-cp311-win32.whl", hash = "sha256:dd15ff04ffd7e05ffcb7fe79f1b98041b8ea30ae9234aed2a9168b5797c3effb"}, - {file = "MarkupSafe-2.1.3-cp311-cp311-win_amd64.whl", hash = "sha256:134da1eca9ec0ae528110ccc9e48041e0828d79f24121a1a146161103c76e686"}, - {file = "MarkupSafe-2.1.3-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:8e254ae696c88d98da6555f5ace2279cf7cd5b3f52be2b5cf97feafe883b58d2"}, - {file = "MarkupSafe-2.1.3-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cb0932dc158471523c9637e807d9bfb93e06a95cbf010f1a38b98623b929ef2b"}, - {file = "MarkupSafe-2.1.3-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9402b03f1a1b4dc4c19845e5c749e3ab82d5078d16a2a4c2cd2df62d57bb0707"}, - {file = "MarkupSafe-2.1.3-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ca379055a47383d02a5400cb0d110cef0a776fc644cda797db0c5696cfd7e18e"}, - {file = "MarkupSafe-2.1.3-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:b7ff0f54cb4ff66dd38bebd335a38e2c22c41a8ee45aa608efc890ac3e3931bc"}, - {file = "MarkupSafe-2.1.3-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:c011a4149cfbcf9f03994ec2edffcb8b1dc2d2aede7ca243746df97a5d41ce48"}, - {file = "MarkupSafe-2.1.3-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:56d9f2ecac662ca1611d183feb03a3fa4406469dafe241673d521dd5ae92a155"}, - {file = "MarkupSafe-2.1.3-cp37-cp37m-win32.whl", hash = "sha256:8758846a7e80910096950b67071243da3e5a20ed2546e6392603c096778d48e0"}, - {file = "MarkupSafe-2.1.3-cp37-cp37m-win_amd64.whl", hash = "sha256:787003c0ddb00500e49a10f2844fac87aa6ce977b90b0feaaf9de23c22508b24"}, - {file = "MarkupSafe-2.1.3-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:2ef12179d3a291be237280175b542c07a36e7f60718296278d8593d21ca937d4"}, - {file = "MarkupSafe-2.1.3-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:2c1b19b3aaacc6e57b7e25710ff571c24d6c3613a45e905b1fde04d691b98ee0"}, - {file = "MarkupSafe-2.1.3-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8afafd99945ead6e075b973fefa56379c5b5c53fd8937dad92c662da5d8fd5ee"}, - {file = "MarkupSafe-2.1.3-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8c41976a29d078bb235fea9b2ecd3da465df42a562910f9022f1a03107bd02be"}, - {file = "MarkupSafe-2.1.3-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d080e0a5eb2529460b30190fcfcc4199bd7f827663f858a226a81bc27beaa97e"}, - {file = "MarkupSafe-2.1.3-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:69c0f17e9f5a7afdf2cc9fb2d1ce6aabdb3bafb7f38017c0b77862bcec2bbad8"}, - {file = "MarkupSafe-2.1.3-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:504b320cd4b7eff6f968eddf81127112db685e81f7e36e75f9f84f0df46041c3"}, - {file = "MarkupSafe-2.1.3-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:42de32b22b6b804f42c5d98be4f7e5e977ecdd9ee9b660fda1a3edf03b11792d"}, - {file = "MarkupSafe-2.1.3-cp38-cp38-win32.whl", hash = "sha256:ceb01949af7121f9fc39f7d27f91be8546f3fb112c608bc4029aef0bab86a2a5"}, - {file = "MarkupSafe-2.1.3-cp38-cp38-win_amd64.whl", hash = "sha256:1b40069d487e7edb2676d3fbdb2b0829ffa2cd63a2ec26c4938b2d34391b4ecc"}, - {file = "MarkupSafe-2.1.3-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:8023faf4e01efadfa183e863fefde0046de576c6f14659e8782065bcece22198"}, - {file = "MarkupSafe-2.1.3-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:6b2b56950d93e41f33b4223ead100ea0fe11f8e6ee5f641eb753ce4b77a7042b"}, - {file = "MarkupSafe-2.1.3-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9dcdfd0eaf283af041973bff14a2e143b8bd64e069f4c383416ecd79a81aab58"}, - {file = "MarkupSafe-2.1.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:05fb21170423db021895e1ea1e1f3ab3adb85d1c2333cbc2310f2a26bc77272e"}, - {file = "MarkupSafe-2.1.3-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:282c2cb35b5b673bbcadb33a585408104df04f14b2d9b01d4c345a3b92861c2c"}, - {file = "MarkupSafe-2.1.3-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:ab4a0df41e7c16a1392727727e7998a467472d0ad65f3ad5e6e765015df08636"}, - {file = "MarkupSafe-2.1.3-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:7ef3cb2ebbf91e330e3bb937efada0edd9003683db6b57bb108c4001f37a02ea"}, - {file = "MarkupSafe-2.1.3-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:0a4e4a1aff6c7ac4cd55792abf96c915634c2b97e3cc1c7129578aa68ebd754e"}, - {file = "MarkupSafe-2.1.3-cp39-cp39-win32.whl", hash = "sha256:fec21693218efe39aa7f8599346e90c705afa52c5b31ae019b2e57e8f6542bb2"}, - {file = "MarkupSafe-2.1.3-cp39-cp39-win_amd64.whl", hash = "sha256:3fd4abcb888d15a94f32b75d8fd18ee162ca0c064f35b11134be77050296d6ba"}, - {file = "MarkupSafe-2.1.3.tar.gz", hash = "sha256:af598ed32d6ae86f1b747b82783958b1a4ab8f617b06fe68795c7f026abbdcad"}, -] -marshmallow = [ - {file = "marshmallow-3.20.1-py3-none-any.whl", hash = "sha256:684939db93e80ad3561392f47be0230743131560a41c5110684c16e21ade0a5c"}, - {file = "marshmallow-3.20.1.tar.gz", hash = "sha256:5d2371bbe42000f2b3fb5eaa065224df7d8f8597bc19a1bbfa5bfe7fba8da889"}, -] -marshmallow-oneofschema = [ - {file = "marshmallow-oneofschema-3.0.1.tar.gz", hash = "sha256:62cd2099b29188c92493c2940ee79d1bf2f2619a71721664e5a98ec2faa58237"}, - {file = "marshmallow_oneofschema-3.0.1-py2.py3-none-any.whl", hash = "sha256:bd29410a9f2f7457a2b428286e2a80ef76b8ddc3701527dc1f935a88914b02f2"}, -] -marshmallow-sqlalchemy = [ - {file = "marshmallow-sqlalchemy-0.26.1.tar.gz", hash = "sha256:d8525f74de51554b5c8491effe036f60629a426229befa33ff614c8569a16a73"}, - {file = "marshmallow_sqlalchemy-0.26.1-py2.py3-none-any.whl", hash = "sha256:ba7493eeb8669a3bf00d8f906b657feaa87a740ae9e4ecf829cfd6ddf763d276"}, -] -mashumaro = [ - {file = "mashumaro-3.11-py3-none-any.whl", hash = "sha256:8f858bdb33790db6d9f3087dce793a26d109aeae38bed3ca9c2d7f16f19db412"}, - {file = "mashumaro-3.11.tar.gz", hash = "sha256:b0b2443be4bdad29bb209d91fe4a2a918fbd7b63cccfeb457c7eeb567db02f5e"}, -] -mccabe = [ - {file = "mccabe-0.7.0-py2.py3-none-any.whl", hash = "sha256:6c2d30ab6be0e4a46919781807b4f0d834ebdd6c6e3dca0bda5a15f863427b6e"}, - {file = "mccabe-0.7.0.tar.gz", hash = "sha256:348e0240c33b60bbdf4e523192ef919f28cb2c3d7d5c7794f74009290f236325"}, -] -mdit-py-plugins = [ - {file = "mdit_py_plugins-0.4.0-py3-none-any.whl", hash = "sha256:b51b3bb70691f57f974e257e367107857a93b36f322a9e6d44ca5bf28ec2def9"}, - {file = "mdit_py_plugins-0.4.0.tar.gz", hash = "sha256:d8ab27e9aed6c38aa716819fedfde15ca275715955f8a185a8e1cf90fb1d2c1b"}, -] -mdurl = [ - {file = "mdurl-0.1.2-py3-none-any.whl", hash = "sha256:84008a41e51615a49fc9966191ff91509e3c40b939176e643fd50a5c2196b8f8"}, - {file = "mdurl-0.1.2.tar.gz", hash = "sha256:bb413d29f5eea38f31dd4754dd7377d4465116fb207585f97bf925588687c1ba"}, -] -minimal-snowplow-tracker = [ - {file = "minimal-snowplow-tracker-0.0.2.tar.gz", hash = "sha256:acabf7572db0e7f5cbf6983d495eef54081f71be392330eb3aadb9ccb39daaa4"}, -] -mmh3 = [ - {file = "mmh3-4.0.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:b719ba87232749095011d567a36a25e40ed029fc61c47e74a12416d8bb60b311"}, - {file = "mmh3-4.0.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:f0ad423711c5096cf4a346011f3b3ec763208e4f4cc4b10ed41cad2a03dbfaed"}, - {file = "mmh3-4.0.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:80918e3f8ab6b717af0a388c14ffac5a89c15d827ff008c1ef545b8b32724116"}, - {file = "mmh3-4.0.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8222cd5f147defa1355b4042d590c34cef9b2bb173a159fcb72cda204061a4ac"}, - {file = "mmh3-4.0.1-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3821bcd1961ef19247c78c5d01b5a759de82ab0c023e2ff1d5ceed74322fa018"}, - {file = "mmh3-4.0.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:59f7ed28c24249a54665f1ed3f6c7c1c56618473381080f79bcc0bd1d1db2e4a"}, - {file = "mmh3-4.0.1-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:dacd8d07d4b9be8f0cb6e8fd9a08fc237c18578cf8d42370ee8af2f5a2bf1967"}, - {file = "mmh3-4.0.1-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5cd00883ef6bcf7831026ce42e773a4b2a4f3a7bf9003a4e781fecb1144b06c1"}, - {file = "mmh3-4.0.1-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:df73d1c7f0c50c0f8061cd349968fd9dcc6a9e7592d1c834fa898f9c98f8dd7e"}, - {file = "mmh3-4.0.1-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:f41eeae98f15af0a4ba2a92bce11d8505b612012af664a7634bbfdba7096f5fc"}, - {file = "mmh3-4.0.1-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:ce9bb622e9f1162cafd033071b32ac495c5e8d5863fca2a5144c092a0f129a5b"}, - {file = "mmh3-4.0.1-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:dd92e0ff9edee6af960d9862a3e519d651e6344321fd280fb082654fc96ecc4d"}, - {file = "mmh3-4.0.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:1aefa8ac8c8fc8ad93365477baef2125dbfd7235880a9c47dca2c46a0af49ef7"}, - {file = "mmh3-4.0.1-cp310-cp310-win32.whl", hash = "sha256:a076ea30ec279a63f44f4c203e4547b5710d00581165fed12583d2017139468d"}, - {file = "mmh3-4.0.1-cp310-cp310-win_amd64.whl", hash = "sha256:5aa1e87e448ee1ffa3737b72f2fe3f5960159ab75bbac2f49dca6fb9797132f6"}, - {file = "mmh3-4.0.1-cp310-cp310-win_arm64.whl", hash = "sha256:45155ff2f291c3a1503d1c93e539ab025a13fd8b3f2868650140702b8bd7bfc2"}, - {file = "mmh3-4.0.1-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:91f81d6dd4d0c3b4235b4a58a545493c946669c751a2e0f15084171dc2d81fee"}, - {file = "mmh3-4.0.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:bbfddaf55207798f5b29341e5b3a24dbff91711c51b1665eabc9d910255a78f0"}, - {file = "mmh3-4.0.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:0deb8e19121c0896fdc709209aceda30a367cda47f4a884fcbe56223dbf9e867"}, - {file = "mmh3-4.0.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:df468ac7b61ec7251d7499e27102899ca39d87686f659baf47f84323f8f4541f"}, - {file = "mmh3-4.0.1-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:84936c113814c6ef3bc4bd3d54f538d7ba312d1d0c2441ac35fdd7d5221c60f6"}, - {file = "mmh3-4.0.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:8b1df3cf5ce5786aa093f45462118d87ff485f0d69699cdc34f6289b1e833632"}, - {file = "mmh3-4.0.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:da281aa740aa9e7f9bebb879c1de0ea9366687ece5930f9f5027e7c87d018153"}, - {file = "mmh3-4.0.1-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0ec380933a56eb9fea16d7fcd49f1b5a5c92d7d2b86f25e9a845b72758ee8c42"}, - {file = "mmh3-4.0.1-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:2fa905fcec8a30e1c0ef522afae1d6170c4f08e6a88010a582f67c59209fb7c7"}, - {file = "mmh3-4.0.1-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:9b23a06315a65ef0b78da0be32409cfce0d6d83e51d70dcebd3302a61e4d34ce"}, - {file = "mmh3-4.0.1-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:36c27089b12026db14be594d750f7ea6d5d785713b40a971b063f033f5354a74"}, - {file = "mmh3-4.0.1-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:6338341ae6fa5eaa46f69ed9ac3e34e8eecad187b211a6e552e0d8128c568eb1"}, - {file = "mmh3-4.0.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:1aece29e27d0c8fb489d00bb712fba18b4dd10e39c9aec2e216c779ae6400b8f"}, - {file = "mmh3-4.0.1-cp311-cp311-win32.whl", hash = "sha256:2733e2160c142eed359e25e5529915964a693f0d043165b53933f904a731c1b3"}, - {file = "mmh3-4.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:09f9f643e0b7f8d98473efdfcdb155105824a38a1ada374625b84c1208197a9b"}, - {file = "mmh3-4.0.1-cp311-cp311-win_arm64.whl", hash = "sha256:d93422f38bc9c4d808c5438a011b769935a87df92ce277e9e22b6ec0ae8ed2e2"}, - {file = "mmh3-4.0.1-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:41013c033dc446d3bfb573621b8b53223adcfcf07be1da0bcbe166d930276882"}, - {file = "mmh3-4.0.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:be46540eac024dd8d9b82899d35b2f23592d3d3850845aba6f10e6127d93246b"}, - {file = "mmh3-4.0.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:0e64114b30c6c1e30f8201433b5fa6108a74a5d6f1a14af1b041360c0dd056aa"}, - {file = "mmh3-4.0.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:275637ecca755565e3b0505d3ecf8e1e0a51eb6a3cbe6e212ed40943f92f98cd"}, - {file = "mmh3-4.0.1-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:955178c8e8d3bc9ad18eab443af670cd13fe18a6b2dba16db2a2a0632be8a133"}, - {file = "mmh3-4.0.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:750afe0477e0c17904611045ad311ff10bc6c2ec5f5ddc5dd949a2b9bf71d5d5"}, - {file = "mmh3-4.0.1-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0b7c18c35e9d6a59d6c5f94a6576f800ff2b500e41cd152ecfc7bb4330f32ba2"}, - {file = "mmh3-4.0.1-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5b8635b1fc6b25d93458472c5d682a1a4b9e6c53e7f4ca75d2bf2a18fa9363ae"}, - {file = "mmh3-4.0.1-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:057b8de47adee8ad0f2e194ffa445b9845263c1c367ddb335e9ae19c011b25cc"}, - {file = "mmh3-4.0.1-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:78c0ee0197cfc912f57172aa16e784ad55b533e2e2e91b3a65188cc66fbb1b6e"}, - {file = "mmh3-4.0.1-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:d6acb15137467592691e41e6f897db1d2823ff3283111e316aa931ac0b5a5709"}, - {file = "mmh3-4.0.1-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:f91b2598e1f25e013da070ff641a29ebda76292d3a7bdd20ef1736e9baf0de67"}, - {file = "mmh3-4.0.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:a78f6f2592395321e2f0dc6b618773398b2c9b15becb419364e0960df53e9f04"}, - {file = "mmh3-4.0.1-cp38-cp38-win32.whl", hash = "sha256:d8650982d0b70af24700bd32b15fab33bb3ef9be4af411100f4960a938b0dd0f"}, - {file = "mmh3-4.0.1-cp38-cp38-win_amd64.whl", hash = "sha256:2489949c7261870a02eeaa2ec7b966881c1775df847c8ce6ea4de3e9d96b5f4f"}, - {file = "mmh3-4.0.1-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:dcd03a4bb0fa3db03648d26fb221768862f089b6aec5272f0df782a8b4fe5b5b"}, - {file = "mmh3-4.0.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:3775fb0cc675977e5b506b12b8f23cd220be3d4c2d4db7df81f03c9f61baa4cc"}, - {file = "mmh3-4.0.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:8f250f78328d41cdf73d3ad9809359636f4fb7a846d7a6586e1a0f0d2f5f2590"}, - {file = "mmh3-4.0.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4161009c9077d5ebf8b472dbf0f41b9139b3d380e0bbe71bf9b503efb2965584"}, - {file = "mmh3-4.0.1-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:2cf986ebf530717fefeee8d0decbf3f359812caebba985e2c8885c0ce7c2ee4e"}, - {file = "mmh3-4.0.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3b55741ed51e928b1eec94a119e003fa3bc0139f4f9802e19bea3af03f7dd55a"}, - {file = "mmh3-4.0.1-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d8250375641b8c5ce5d56a00c6bb29f583516389b8bde0023181d5eba8aa4119"}, - {file = "mmh3-4.0.1-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:29373e802bc094ffd490e39047bac372ac893c0f411dac3223ef11775e34acd0"}, - {file = "mmh3-4.0.1-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:071ba41e56f5c385d13ee84b288ccaf46b70cd9e9a6d8cbcbe0964dee68c0019"}, - {file = "mmh3-4.0.1-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:909e0b88d2c6285481fa6895c2a0faf6384e1b0093f72791aa57d1e04f4adc65"}, - {file = "mmh3-4.0.1-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:51d356f4380f9d9c2a0612156c3d1e7359933991e84a19304440aa04fd723e68"}, - {file = "mmh3-4.0.1-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:c4b2549949efa63d8decb6572f7e75fad4f2375d52fafced674323239dd9812d"}, - {file = "mmh3-4.0.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:9bcc7b32a89c4e5c6fdef97d82e8087ba26a20c25b4aaf0723abd0b302525934"}, - {file = "mmh3-4.0.1-cp39-cp39-win32.whl", hash = "sha256:8edee21ae4f4337fb970810ef5a263e5d2212b85daca0d39daf995e13380e908"}, - {file = "mmh3-4.0.1-cp39-cp39-win_amd64.whl", hash = "sha256:8cbb6f90f08952fcc90dbf08f0310fdf4d61096c5cb7db8adf03e23f3b857ae5"}, - {file = "mmh3-4.0.1-cp39-cp39-win_arm64.whl", hash = "sha256:ce71856cbca9d7c74d084eeee1bc5b126ed197c1c9530a4fdb994d099b9bc4db"}, - {file = "mmh3-4.0.1.tar.gz", hash = "sha256:ad8be695dc4e44a79631748ba5562d803f0ac42d36a6b97a53aca84a70809385"}, -] -more-itertools = [ - {file = "more-itertools-10.1.0.tar.gz", hash = "sha256:626c369fa0eb37bac0291bce8259b332fd59ac792fa5497b59837309cd5b114a"}, - {file = "more_itertools-10.1.0-py3-none-any.whl", hash = "sha256:64e0735fcfdc6f3464ea133afe8ea4483b1c5fe3a3d69852e6503b43a0b222e6"}, -] -mpmath = [ - {file = "mpmath-1.3.0-py3-none-any.whl", hash = "sha256:a0b2b9fe80bbcd81a6647ff13108738cfb482d481d826cc0e02f5b35e5c88d2c"}, - {file = "mpmath-1.3.0.tar.gz", hash = "sha256:7a28eb2a9774d00c7bc92411c19a89209d5da7c4c9a9e227be8330a23a25b91f"}, -] -msal = [ - {file = "msal-1.23.0-py2.py3-none-any.whl", hash = "sha256:3342e0837a047007f9d479e814b559c3219767453d57920dc40a31986862048b"}, - {file = "msal-1.23.0.tar.gz", hash = "sha256:25c9a33acf84301f93d1fdbe9f1a9c60cd38af0d5fffdbfa378138fc7bc1e86b"}, -] -msal-extensions = [ - {file = "msal-extensions-1.0.0.tar.gz", hash = "sha256:c676aba56b0cce3783de1b5c5ecfe828db998167875126ca4b47dc6436451354"}, - {file = "msal_extensions-1.0.0-py2.py3-none-any.whl", hash = "sha256:91e3db9620b822d0ed2b4d1850056a0f133cba04455e62f11612e40f5502f2ee"}, -] -msgpack = [ +files = [ {file = "msgpack-1.0.5-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:525228efd79bb831cf6830a732e2e80bc1b05436b086d4264814b4b2955b2fa9"}, {file = "msgpack-1.0.5-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:4f8d8b3bf1ff2672567d6b5c725a1b347fe838b912772aa8ae2bf70338d5a198"}, {file = "msgpack-1.0.5-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:cdc793c50be3f01106245a61b739328f7dccc2c648b501e237f0699fe1395b81"}, @@ -7015,7 +4988,14 @@ msgpack = [ {file = "msgpack-1.0.5-cp39-cp39-win_amd64.whl", hash = "sha256:06f5174b5f8ed0ed919da0e62cbd4ffde676a374aba4020034da05fab67b9164"}, {file = "msgpack-1.0.5.tar.gz", hash = "sha256:c075544284eadc5cddc70f4757331d99dcbc16b2bbd4849d15f8aae4cf36d31c"}, ] -multidict = [ + +[[package]] +name = "multidict" +version = "6.0.4" +description = "multidict implementation" +optional = false +python-versions = ">=3.7" +files = [ {file = "multidict-6.0.4-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:0b1a97283e0c85772d613878028fec909f003993e1007eafa715b24b377cb9b8"}, {file = "multidict-6.0.4-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:eeb6dcc05e911516ae3d1f207d4b0520d07f54484c49dfc294d6e7d63b734171"}, {file = "multidict-6.0.4-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:d6d635d5209b82a3492508cf5b365f3446afb65ae7ebd755e70e18f287b0adf7"}, @@ -7091,7 +5071,14 @@ multidict = [ {file = "multidict-6.0.4-cp39-cp39-win_amd64.whl", hash = "sha256:33029f5734336aa0d4c0384525da0387ef89148dc7191aae00ca5fb23d7aafc2"}, {file = "multidict-6.0.4.tar.gz", hash = "sha256:3666906492efb76453c0e7b97f2cf459b0682e7402c0489a95484965dbc1da49"}, ] -mypy = [ + +[[package]] +name = "mypy" +version = "1.6.1" +description = "Optional static typing for Python" +optional = false +python-versions = ">=3.8" +files = [ {file = "mypy-1.6.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:e5012e5cc2ac628177eaac0e83d622b2dd499e28253d4107a08ecc59ede3fc2c"}, {file = "mypy-1.6.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:d8fbb68711905f8912e5af474ca8b78d077447d8f3918997fecbf26943ff3cbb"}, {file = "mypy-1.6.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:21a1ad938fee7d2d96ca666c77b7c494c3c5bd88dff792220e1afbebb2925b5e"}, @@ -7120,47 +5107,161 @@ mypy = [ {file = "mypy-1.6.1-py3-none-any.whl", hash = "sha256:4cbe68ef919c28ea561165206a2dcb68591c50f3bcf777932323bc208d949cf1"}, {file = "mypy-1.6.1.tar.gz", hash = "sha256:4d01c00d09a0be62a4ca3f933e315455bde83f37f892ba4b08ce92f3cf44bcc1"}, ] -mypy-boto3-athena = [ + +[package.dependencies] +mypy-extensions = ">=1.0.0" +tomli = {version = ">=1.1.0", markers = "python_version < \"3.11\""} +typing-extensions = ">=4.1.0" + +[package.extras] +dmypy = ["psutil (>=4.0)"] +install-types = ["pip"] +reports = ["lxml"] + +[[package]] +name = "mypy-boto3-athena" +version = "1.28.36" +description = "Type annotations for boto3.Athena 1.28.36 service generated with mypy-boto3-builder 7.18.0" +optional = true +python-versions = ">=3.7" +files = [ {file = "mypy-boto3-athena-1.28.36.tar.gz", hash = "sha256:a76df6aace3dc1d91b3f74640d617cd1b4802e5f348a22db2f16dfce0b01ee26"}, {file = "mypy_boto3_athena-1.28.36-py3-none-any.whl", hash = "sha256:b79b77df6ba30c55ff2f1f8b36de410f537c8c978d892e958b4c5e165797915a"}, ] -mypy-boto3-glue = [ + +[package.dependencies] +typing-extensions = {version = ">=4.1.0", markers = "python_version < \"3.12\""} + +[[package]] +name = "mypy-boto3-glue" +version = "1.28.36" +description = "Type annotations for boto3.Glue 1.28.36 service generated with mypy-boto3-builder 7.18.0" +optional = true +python-versions = ">=3.7" +files = [ {file = "mypy-boto3-glue-1.28.36.tar.gz", hash = "sha256:161771252bb6a220a0bfd8e6ad71da8548599c611f95fe8a94846f4a3386d2ae"}, {file = "mypy_boto3_glue-1.28.36-py3-none-any.whl", hash = "sha256:73bc14616ac65a5c02adea5efba7bbbcf8207cd0c0e3237c13d351ebc916338d"}, ] -mypy-boto3-lakeformation = [ + +[package.dependencies] +typing-extensions = {version = ">=4.1.0", markers = "python_version < \"3.12\""} + +[[package]] +name = "mypy-boto3-lakeformation" +version = "1.28.36" +description = "Type annotations for boto3.LakeFormation 1.28.36 service generated with mypy-boto3-builder 7.18.0" +optional = true +python-versions = ">=3.7" +files = [ {file = "mypy-boto3-lakeformation-1.28.36.tar.gz", hash = "sha256:9327cf0d28a09abf5bd90ae946ce7420b32a3b979a1a3554ac93716c3dceacb0"}, {file = "mypy_boto3_lakeformation-1.28.36-py3-none-any.whl", hash = "sha256:9525a8ab3d69632d4ec83eb565ff7fdfa1181fbdf032bcff4a20d4f8a0350688"}, ] -mypy-boto3-sts = [ + +[package.dependencies] +typing-extensions = {version = ">=4.1.0", markers = "python_version < \"3.12\""} + +[[package]] +name = "mypy-boto3-sts" +version = "1.28.37" +description = "Type annotations for boto3.STS 1.28.37 service generated with mypy-boto3-builder 7.18.2" +optional = true +python-versions = ">=3.7" +files = [ {file = "mypy-boto3-sts-1.28.37.tar.gz", hash = "sha256:54d64ca695ab90a51c68ac1e67ff9eae7ec69f926649e320a3b90ed1ec841a95"}, {file = "mypy_boto3_sts-1.28.37-py3-none-any.whl", hash = "sha256:24106ff30ecfe7ad0538657bbd00b6009418a5382b323cac46e0e26c1f5d50fb"}, ] -mypy-extensions = [ + +[package.dependencies] +typing-extensions = {version = ">=4.1.0", markers = "python_version < \"3.12\""} + +[[package]] +name = "mypy-extensions" +version = "1.0.0" +description = "Type system extensions for programs checked with the mypy type checker." +optional = false +python-versions = ">=3.5" +files = [ {file = "mypy_extensions-1.0.0-py3-none-any.whl", hash = "sha256:4392f6c0eb8a5668a69e23d168ffa70f0be9ccfd32b5cc2d26a34ae5b844552d"}, {file = "mypy_extensions-1.0.0.tar.gz", hash = "sha256:75dbf8955dc00442a438fc4d0666508a9a97b6bd41aa2f0ffe9d2f2725af0782"}, ] -natsort = [ + +[[package]] +name = "natsort" +version = "8.4.0" +description = "Simple yet flexible natural sorting in Python." +optional = false +python-versions = ">=3.7" +files = [ {file = "natsort-8.4.0-py3-none-any.whl", hash = "sha256:4732914fb471f56b5cce04d7bae6f164a592c7712e1c85f9ef585e197299521c"}, {file = "natsort-8.4.0.tar.gz", hash = "sha256:45312c4a0e5507593da193dedd04abb1469253b601ecaf63445ad80f0a1ea581"}, ] -networkx = [ + +[package.extras] +fast = ["fastnumbers (>=2.0.0)"] +icu = ["PyICU (>=1.0.0)"] + +[[package]] +name = "networkx" +version = "2.8.8" +description = "Python package for creating and manipulating graphs and networks" +optional = false +python-versions = ">=3.8" +files = [ {file = "networkx-2.8.8-py3-none-any.whl", hash = "sha256:e435dfa75b1d7195c7b8378c3859f0445cd88c6b0375c181ed66823a9ceb7524"}, {file = "networkx-2.8.8.tar.gz", hash = "sha256:230d388117af870fce5647a3c52401fcf753e94720e6ea6b4197a5355648885e"}, ] -nr-date = [ + +[package.extras] +default = ["matplotlib (>=3.4)", "numpy (>=1.19)", "pandas (>=1.3)", "scipy (>=1.8)"] +developer = ["mypy (>=0.982)", "pre-commit (>=2.20)"] +doc = ["nb2plots (>=0.6)", "numpydoc (>=1.5)", "pillow (>=9.2)", "pydata-sphinx-theme (>=0.11)", "sphinx (>=5.2)", "sphinx-gallery (>=0.11)", "texext (>=0.6.6)"] +extra = ["lxml (>=4.6)", "pydot (>=1.4.2)", "pygraphviz (>=1.9)", "sympy (>=1.10)"] +test = ["codecov (>=2.1)", "pytest (>=7.2)", "pytest-cov (>=4.0)"] + +[[package]] +name = "nr-date" +version = "2.1.0" +description = "" +optional = false +python-versions = ">=3.6,<4.0" +files = [ {file = "nr_date-2.1.0-py3-none-any.whl", hash = "sha256:bd672a9dfbdcf7c4b9289fea6750c42490eaee08036a72059dcc78cb236ed568"}, {file = "nr_date-2.1.0.tar.gz", hash = "sha256:0643aea13bcdc2a8bc56af9d5e6a89ef244c9744a1ef00cdc735902ba7f7d2e6"}, ] -nr-stream = [ + +[[package]] +name = "nr-stream" +version = "1.1.5" +description = "" +optional = false +python-versions = ">=3.6,<4.0" +files = [ {file = "nr_stream-1.1.5-py3-none-any.whl", hash = "sha256:47e12150b331ad2cb729cfd9d2abd281c9949809729ba461c6aa87dd9927b2d4"}, {file = "nr_stream-1.1.5.tar.gz", hash = "sha256:eb0216c6bfc61a46d4568dba3b588502c610ec8ddef4ac98f3932a2bd7264f65"}, ] -nr-util = [ + +[[package]] +name = "nr-util" +version = "0.8.12" +description = "General purpose Python utility library." +optional = false +python-versions = ">=3.7,<4.0" +files = [ {file = "nr.util-0.8.12-py3-none-any.whl", hash = "sha256:91da02ac9795eb8e015372275c1efe54bac9051231ee9b0e7e6f96b0b4e7d2bb"}, {file = "nr.util-0.8.12.tar.gz", hash = "sha256:a4549c2033d99d2f0379b3f3d233fd2a8ade286bbf0b3ad0cc7cea16022214f4"}, ] -numpy = [ + +[package.dependencies] +deprecated = ">=1.2.0,<2.0.0" +typing-extensions = ">=3.0.0" + +[[package]] +name = "numpy" +version = "1.24.4" +description = "Fundamental package for array computing in Python" +optional = false +python-versions = ">=3.8" +files = [ {file = "numpy-1.24.4-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:c0bfb52d2169d58c1cdb8cc1f16989101639b34c7d3ce60ed70b19c63eba0b64"}, {file = "numpy-1.24.4-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:ed094d4f0c177b1b8e7aa9cba7d6ceed51c0e569a5318ac0ca9a090680a6a1b1"}, {file = "numpy-1.24.4-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:79fc682a374c4a8ed08b331bef9c5f582585d1048fa6d80bc6c35bc384eee9b4"}, @@ -7189,6 +5290,15 @@ numpy = [ {file = "numpy-1.24.4-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:95f7ac6540e95bc440ad77f56e520da5bf877f87dca58bd095288dce8940532a"}, {file = "numpy-1.24.4-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:e98f220aa76ca2a977fe435f5b04d7b3470c0a2e6312907b37ba6068f26787f2"}, {file = "numpy-1.24.4.tar.gz", hash = "sha256:80f5e3a4e498641401868df4208b74581206afbee7cf7b8329daae82676d9463"}, +] + +[[package]] +name = "numpy" +version = "1.26.1" +description = "Fundamental package for array computing in Python" +optional = false +python-versions = "<3.13,>=3.9" +files = [ {file = "numpy-1.26.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:82e871307a6331b5f09efda3c22e03c095d957f04bf6bc1804f30048d0e5e7af"}, {file = "numpy-1.26.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:cdd9ec98f0063d93baeb01aad472a1a0840dee302842a2746a7a8e92968f9575"}, {file = "numpy-1.26.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d78f269e0c4fd365fc2992c00353e4530d274ba68f15e968d8bc3c69ce5f5244"}, @@ -7222,11 +5332,30 @@ numpy = [ {file = "numpy-1.26.1-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:6965888d65d2848e8768824ca8288db0a81263c1efccec881cb35a0d805fcd2f"}, {file = "numpy-1.26.1.tar.gz", hash = "sha256:c8c6c72d4a9f831f328efb1312642a1cafafaa88981d9ab76368d50d07d93cbe"}, ] -oauthlib = [ + +[[package]] +name = "oauthlib" +version = "3.2.2" +description = "A generic, spec-compliant, thorough implementation of the OAuth request-signing logic" +optional = false +python-versions = ">=3.6" +files = [ {file = "oauthlib-3.2.2-py3-none-any.whl", hash = "sha256:8139f29aac13e25d502680e9e19963e83f16838d48a0d71c287fe40e7067fbca"}, {file = "oauthlib-3.2.2.tar.gz", hash = "sha256:9859c40929662bec5d64f34d01c99e093149682a3f38915dc0655d5a633dd918"}, ] -onnx = [ + +[package.extras] +rsa = ["cryptography (>=3.0.0)"] +signals = ["blinker (>=1.4.0)"] +signedtoken = ["cryptography (>=3.0.0)", "pyjwt (>=2.0.0,<3)"] + +[[package]] +name = "onnx" +version = "1.15.0" +description = "Open Neural Network Exchange" +optional = true +python-versions = ">=3.8" +files = [ {file = "onnx-1.15.0-cp310-cp310-macosx_10_12_universal2.whl", hash = "sha256:51cacb6aafba308aaf462252ced562111f6991cdc7bc57a6c554c3519453a8ff"}, {file = "onnx-1.15.0-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:0aee26b6f7f7da7e840de75ad9195a77a147d0662c94eaa6483be13ba468ffc1"}, {file = "onnx-1.15.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:baf6ef6c93b3b843edb97a8d5b3d229a1301984f3f8dee859c29634d2083e6f9"}, @@ -7253,7 +5382,21 @@ onnx = [ {file = "onnx-1.15.0-cp39-cp39-win_amd64.whl", hash = "sha256:95d7a3e2d79d371e272e39ae3f7547e0b116d0c7f774a4004e97febe6c93507f"}, {file = "onnx-1.15.0.tar.gz", hash = "sha256:b18461a7d38f286618ca2a6e78062a2a9c634ce498e631e708a8041b00094825"}, ] -onnxruntime = [ + +[package.dependencies] +numpy = "*" +protobuf = ">=3.20.2" + +[package.extras] +reference = ["Pillow", "google-re2"] + +[[package]] +name = "onnxruntime" +version = "1.16.1" +description = "ONNX Runtime is a runtime accelerator for Machine Learning models" +optional = true +python-versions = "*" +files = [ {file = "onnxruntime-1.16.1-cp310-cp310-macosx_10_15_x86_64.whl", hash = "sha256:28b2c7f444b4119950b69370801cd66067f403d19cbaf2a444735d7c269cce4a"}, {file = "onnxruntime-1.16.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:c24e04f33e7899f6aebb03ed51e51d346c1f906b05c5569d58ac9a12d38a2f58"}, {file = "onnxruntime-1.16.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9fa93b166f2d97063dc9f33c5118c5729a4a5dd5617296b6dbef42f9047b3e81"}, @@ -7279,43 +5422,166 @@ onnxruntime = [ {file = "onnxruntime-1.16.1-cp39-cp39-win32.whl", hash = "sha256:85771adb75190db9364b25ddec353ebf07635b83eb94b64ed014f1f6d57a3857"}, {file = "onnxruntime-1.16.1-cp39-cp39-win_amd64.whl", hash = "sha256:d32d2b30799c1f950123c60ae8390818381fd5f88bdf3627eeca10071c155dc5"}, ] -openpyxl = [ + +[package.dependencies] +coloredlogs = "*" +flatbuffers = "*" +numpy = ">=1.21.6" +packaging = "*" +protobuf = "*" +sympy = "*" + +[[package]] +name = "openpyxl" +version = "3.1.2" +description = "A Python library to read/write Excel 2010 xlsx/xlsm files" +optional = true +python-versions = ">=3.6" +files = [ {file = "openpyxl-3.1.2-py2.py3-none-any.whl", hash = "sha256:f91456ead12ab3c6c2e9491cf33ba6d08357d802192379bb482f1033ade496f5"}, {file = "openpyxl-3.1.2.tar.gz", hash = "sha256:a6f5977418eff3b2d5500d54d9db50c8277a368436f4e4f8ddb1be3422870184"}, ] -opentelemetry-api = [ + +[package.dependencies] +et-xmlfile = "*" + +[[package]] +name = "opentelemetry-api" +version = "1.15.0" +description = "OpenTelemetry Python API" +optional = false +python-versions = ">=3.7" +files = [ {file = "opentelemetry_api-1.15.0-py3-none-any.whl", hash = "sha256:e6c2d2e42140fd396e96edf75a7ceb11073f4efb4db87565a431cc9d0f93f2e0"}, {file = "opentelemetry_api-1.15.0.tar.gz", hash = "sha256:79ab791b4aaad27acc3dc3ba01596db5b5aac2ef75c70622c6038051d6c2cded"}, ] -opentelemetry-exporter-otlp = [ + +[package.dependencies] +deprecated = ">=1.2.6" +setuptools = ">=16.0" + +[[package]] +name = "opentelemetry-exporter-otlp" +version = "1.15.0" +description = "OpenTelemetry Collector Exporters" +optional = false +python-versions = ">=3.7" +files = [ {file = "opentelemetry_exporter_otlp-1.15.0-py3-none-any.whl", hash = "sha256:79f22748b6a54808a0448093dfa189c8490e729f67c134d4c992533d9393b33e"}, {file = "opentelemetry_exporter_otlp-1.15.0.tar.gz", hash = "sha256:4f7c49751d9720e2e726e13b0bb958ccade4e29122c305d92c033da432c8d2c5"}, ] -opentelemetry-exporter-otlp-proto-grpc = [ + +[package.dependencies] +opentelemetry-exporter-otlp-proto-grpc = "1.15.0" +opentelemetry-exporter-otlp-proto-http = "1.15.0" + +[[package]] +name = "opentelemetry-exporter-otlp-proto-grpc" +version = "1.15.0" +description = "OpenTelemetry Collector Protobuf over gRPC Exporter" +optional = false +python-versions = ">=3.7" +files = [ {file = "opentelemetry_exporter_otlp_proto_grpc-1.15.0-py3-none-any.whl", hash = "sha256:c2a5492ba7d140109968135d641d06ce3c5bd73c50665f787526065d57d7fd1d"}, {file = "opentelemetry_exporter_otlp_proto_grpc-1.15.0.tar.gz", hash = "sha256:844f2a4bb9bcda34e4eb6fe36765e5031aacb36dc60ed88c90fc246942ea26e7"}, ] -opentelemetry-exporter-otlp-proto-http = [ + +[package.dependencies] +backoff = {version = ">=1.10.0,<3.0.0", markers = "python_version >= \"3.7\""} +googleapis-common-protos = ">=1.52,<2.0" +grpcio = ">=1.0.0,<2.0.0" +opentelemetry-api = ">=1.12,<2.0" +opentelemetry-proto = "1.15.0" +opentelemetry-sdk = ">=1.12,<2.0" + +[package.extras] +test = ["pytest-grpc"] + +[[package]] +name = "opentelemetry-exporter-otlp-proto-http" +version = "1.15.0" +description = "OpenTelemetry Collector Protobuf over HTTP Exporter" +optional = false +python-versions = ">=3.7" +files = [ {file = "opentelemetry_exporter_otlp_proto_http-1.15.0-py3-none-any.whl", hash = "sha256:3ec2a02196c8a54bf5cbf7fe623a5238625638e83b6047a983bdf96e2bbb74c0"}, {file = "opentelemetry_exporter_otlp_proto_http-1.15.0.tar.gz", hash = "sha256:11b2c814249a49b22f6cca7a06b05701f561d577b747f3660dfd67b6eb9daf9c"}, ] -opentelemetry-proto = [ + +[package.dependencies] +backoff = {version = ">=1.10.0,<3.0.0", markers = "python_version >= \"3.7\""} +googleapis-common-protos = ">=1.52,<2.0" +opentelemetry-api = ">=1.12,<2.0" +opentelemetry-proto = "1.15.0" +opentelemetry-sdk = ">=1.12,<2.0" +requests = ">=2.7,<3.0" + +[package.extras] +test = ["responses (==0.22.0)"] + +[[package]] +name = "opentelemetry-proto" +version = "1.15.0" +description = "OpenTelemetry Python Proto" +optional = false +python-versions = ">=3.7" +files = [ {file = "opentelemetry_proto-1.15.0-py3-none-any.whl", hash = "sha256:044b6d044b4d10530f250856f933442b8753a17f94ae37c207607f733fb9a844"}, {file = "opentelemetry_proto-1.15.0.tar.gz", hash = "sha256:9c4008e40ac8cab359daac283fbe7002c5c29c77ea2674ad5626a249e64e0101"}, ] -opentelemetry-sdk = [ + +[package.dependencies] +protobuf = ">=3.19,<5.0" + +[[package]] +name = "opentelemetry-sdk" +version = "1.15.0" +description = "OpenTelemetry Python SDK" +optional = false +python-versions = ">=3.7" +files = [ {file = "opentelemetry_sdk-1.15.0-py3-none-any.whl", hash = "sha256:555c533e9837766119bbccc7a80458c9971d853a6f1da683a2246cd5e53b4645"}, {file = "opentelemetry_sdk-1.15.0.tar.gz", hash = "sha256:98dbffcfeebcbff12c0c974292d6ea603180a145904cf838b1fe4d5c99078425"}, ] -opentelemetry-semantic-conventions = [ + +[package.dependencies] +opentelemetry-api = "1.15.0" +opentelemetry-semantic-conventions = "0.36b0" +setuptools = ">=16.0" +typing-extensions = ">=3.7.4" + +[[package]] +name = "opentelemetry-semantic-conventions" +version = "0.36b0" +description = "OpenTelemetry Semantic Conventions" +optional = false +python-versions = ">=3.7" +files = [ {file = "opentelemetry_semantic_conventions-0.36b0-py3-none-any.whl", hash = "sha256:adc05635e87b9d3e007c9f530eed487fc3ef2177d02f82f674f28ebf9aff8243"}, {file = "opentelemetry_semantic_conventions-0.36b0.tar.gz", hash = "sha256:829dc221795467d98b773c04096e29be038d77526dc8d6ac76f546fb6279bf01"}, ] -ordered-set = [ + +[[package]] +name = "ordered-set" +version = "4.1.0" +description = "An OrderedSet is a custom MutableSet that remembers its order, so that every" +optional = false +python-versions = ">=3.7" +files = [ {file = "ordered-set-4.1.0.tar.gz", hash = "sha256:694a8e44c87657c59292ede72891eb91d34131f6531463aab3009191c77364a8"}, {file = "ordered_set-4.1.0-py3-none-any.whl", hash = "sha256:046e1132c71fcf3330438a539928932caf51ddbc582496833e23de611de14562"}, ] -orjson = [ + +[package.extras] +dev = ["black", "mypy", "pytest"] + +[[package]] +name = "orjson" +version = "3.9.5" +description = "Fast, correct Python JSON library supporting dataclasses, datetimes, and numpy" +optional = false +python-versions = ">=3.7" +files = [ {file = "orjson-3.9.5-cp310-cp310-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:ad6845912a71adcc65df7c8a7f2155eba2096cf03ad2c061c93857de70d699ad"}, {file = "orjson-3.9.5-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e298e0aacfcc14ef4476c3f409e85475031de24e5b23605a465e9bf4b2156273"}, {file = "orjson-3.9.5-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:83c9939073281ef7dd7c5ca7f54cceccb840b440cec4b8a326bda507ff88a0a6"}, @@ -7377,113 +5643,443 @@ orjson = [ {file = "orjson-3.9.5-cp39-none-win_amd64.whl", hash = "sha256:91dda66755795ac6100e303e206b636568d42ac83c156547634256a2e68de694"}, {file = "orjson-3.9.5.tar.gz", hash = "sha256:6daf5ee0b3cf530b9978cdbf71024f1c16ed4a67d05f6ec435c6e7fe7a52724c"}, ] -packaging = [ + +[[package]] +name = "packaging" +version = "23.1" +description = "Core utilities for Python packages" +optional = false +python-versions = ">=3.7" +files = [ {file = "packaging-23.1-py3-none-any.whl", hash = "sha256:994793af429502c4ea2ebf6bf664629d07c1a9fe974af92966e4b8d2df7edc61"}, {file = "packaging-23.1.tar.gz", hash = "sha256:a392980d2b6cffa644431898be54b0045151319d1e7ec34f0cfed48767dd334f"}, ] -pandas = [ - {file = "pandas-2.0.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:e4c7c9f27a4185304c7caf96dc7d91bc60bc162221152de697c98eb0b2648dd8"}, - {file = "pandas-2.0.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:f167beed68918d62bffb6ec64f2e1d8a7d297a038f86d4aed056b9493fca407f"}, - {file = "pandas-2.0.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ce0c6f76a0f1ba361551f3e6dceaff06bde7514a374aa43e33b588ec10420183"}, - {file = "pandas-2.0.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ba619e410a21d8c387a1ea6e8a0e49bb42216474436245718d7f2e88a2f8d7c0"}, - {file = "pandas-2.0.3-cp310-cp310-win32.whl", hash = "sha256:3ef285093b4fe5058eefd756100a367f27029913760773c8bf1d2d8bebe5d210"}, - {file = "pandas-2.0.3-cp310-cp310-win_amd64.whl", hash = "sha256:9ee1a69328d5c36c98d8e74db06f4ad518a1840e8ccb94a4ba86920986bb617e"}, - {file = "pandas-2.0.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:b084b91d8d66ab19f5bb3256cbd5ea661848338301940e17f4492b2ce0801fe8"}, - {file = "pandas-2.0.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:37673e3bdf1551b95bf5d4ce372b37770f9529743d2498032439371fc7b7eb26"}, - {file = "pandas-2.0.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b9cb1e14fdb546396b7e1b923ffaeeac24e4cedd14266c3497216dd4448e4f2d"}, - {file = "pandas-2.0.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d9cd88488cceb7635aebb84809d087468eb33551097d600c6dad13602029c2df"}, - {file = "pandas-2.0.3-cp311-cp311-win32.whl", hash = "sha256:694888a81198786f0e164ee3a581df7d505024fbb1f15202fc7db88a71d84ebd"}, - {file = "pandas-2.0.3-cp311-cp311-win_amd64.whl", hash = "sha256:6a21ab5c89dcbd57f78d0ae16630b090eec626360085a4148693def5452d8a6b"}, - {file = "pandas-2.0.3-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:9e4da0d45e7f34c069fe4d522359df7d23badf83abc1d1cef398895822d11061"}, - {file = "pandas-2.0.3-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:32fca2ee1b0d93dd71d979726b12b61faa06aeb93cf77468776287f41ff8fdc5"}, - {file = "pandas-2.0.3-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:258d3624b3ae734490e4d63c430256e716f488c4fcb7c8e9bde2d3aa46c29089"}, - {file = "pandas-2.0.3-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9eae3dc34fa1aa7772dd3fc60270d13ced7346fcbcfee017d3132ec625e23bb0"}, - {file = "pandas-2.0.3-cp38-cp38-win32.whl", hash = "sha256:f3421a7afb1a43f7e38e82e844e2bca9a6d793d66c1a7f9f0ff39a795bbc5e02"}, - {file = "pandas-2.0.3-cp38-cp38-win_amd64.whl", hash = "sha256:69d7f3884c95da3a31ef82b7618af5710dba95bb885ffab339aad925c3e8ce78"}, - {file = "pandas-2.0.3-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:5247fb1ba347c1261cbbf0fcfba4a3121fbb4029d95d9ef4dc45406620b25c8b"}, - {file = "pandas-2.0.3-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:81af086f4543c9d8bb128328b5d32e9986e0c84d3ee673a2ac6fb57fd14f755e"}, - {file = "pandas-2.0.3-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1994c789bf12a7c5098277fb43836ce090f1073858c10f9220998ac74f37c69b"}, - {file = "pandas-2.0.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5ec591c48e29226bcbb316e0c1e9423622bc7a4eaf1ef7c3c9fa1a3981f89641"}, - {file = "pandas-2.0.3-cp39-cp39-win32.whl", hash = "sha256:04dbdbaf2e4d46ca8da896e1805bc04eb85caa9a82e259e8eed00254d5e0c682"}, - {file = "pandas-2.0.3-cp39-cp39-win_amd64.whl", hash = "sha256:1168574b036cd8b93abc746171c9b4f1b83467438a5e45909fed645cf8692dbc"}, - {file = "pandas-2.0.3.tar.gz", hash = "sha256:c02f372a88e0d17f36d3093a644c73cfc1788e876a7c4bcb4020a77512e2043c"}, + +[[package]] +name = "pandas" +version = "2.0.3" +description = "Powerful data structures for data analysis, time series, and statistics" +optional = false +python-versions = ">=3.8" +files = [ + {file = "pandas-2.0.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:e4c7c9f27a4185304c7caf96dc7d91bc60bc162221152de697c98eb0b2648dd8"}, + {file = "pandas-2.0.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:f167beed68918d62bffb6ec64f2e1d8a7d297a038f86d4aed056b9493fca407f"}, + {file = "pandas-2.0.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ce0c6f76a0f1ba361551f3e6dceaff06bde7514a374aa43e33b588ec10420183"}, + {file = "pandas-2.0.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ba619e410a21d8c387a1ea6e8a0e49bb42216474436245718d7f2e88a2f8d7c0"}, + {file = "pandas-2.0.3-cp310-cp310-win32.whl", hash = "sha256:3ef285093b4fe5058eefd756100a367f27029913760773c8bf1d2d8bebe5d210"}, + {file = "pandas-2.0.3-cp310-cp310-win_amd64.whl", hash = "sha256:9ee1a69328d5c36c98d8e74db06f4ad518a1840e8ccb94a4ba86920986bb617e"}, + {file = "pandas-2.0.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:b084b91d8d66ab19f5bb3256cbd5ea661848338301940e17f4492b2ce0801fe8"}, + {file = "pandas-2.0.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:37673e3bdf1551b95bf5d4ce372b37770f9529743d2498032439371fc7b7eb26"}, + {file = "pandas-2.0.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b9cb1e14fdb546396b7e1b923ffaeeac24e4cedd14266c3497216dd4448e4f2d"}, + {file = "pandas-2.0.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d9cd88488cceb7635aebb84809d087468eb33551097d600c6dad13602029c2df"}, + {file = "pandas-2.0.3-cp311-cp311-win32.whl", hash = "sha256:694888a81198786f0e164ee3a581df7d505024fbb1f15202fc7db88a71d84ebd"}, + {file = "pandas-2.0.3-cp311-cp311-win_amd64.whl", hash = "sha256:6a21ab5c89dcbd57f78d0ae16630b090eec626360085a4148693def5452d8a6b"}, + {file = "pandas-2.0.3-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:9e4da0d45e7f34c069fe4d522359df7d23badf83abc1d1cef398895822d11061"}, + {file = "pandas-2.0.3-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:32fca2ee1b0d93dd71d979726b12b61faa06aeb93cf77468776287f41ff8fdc5"}, + {file = "pandas-2.0.3-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:258d3624b3ae734490e4d63c430256e716f488c4fcb7c8e9bde2d3aa46c29089"}, + {file = "pandas-2.0.3-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9eae3dc34fa1aa7772dd3fc60270d13ced7346fcbcfee017d3132ec625e23bb0"}, + {file = "pandas-2.0.3-cp38-cp38-win32.whl", hash = "sha256:f3421a7afb1a43f7e38e82e844e2bca9a6d793d66c1a7f9f0ff39a795bbc5e02"}, + {file = "pandas-2.0.3-cp38-cp38-win_amd64.whl", hash = "sha256:69d7f3884c95da3a31ef82b7618af5710dba95bb885ffab339aad925c3e8ce78"}, + {file = "pandas-2.0.3-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:5247fb1ba347c1261cbbf0fcfba4a3121fbb4029d95d9ef4dc45406620b25c8b"}, + {file = "pandas-2.0.3-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:81af086f4543c9d8bb128328b5d32e9986e0c84d3ee673a2ac6fb57fd14f755e"}, + {file = "pandas-2.0.3-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1994c789bf12a7c5098277fb43836ce090f1073858c10f9220998ac74f37c69b"}, + {file = "pandas-2.0.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5ec591c48e29226bcbb316e0c1e9423622bc7a4eaf1ef7c3c9fa1a3981f89641"}, + {file = "pandas-2.0.3-cp39-cp39-win32.whl", hash = "sha256:04dbdbaf2e4d46ca8da896e1805bc04eb85caa9a82e259e8eed00254d5e0c682"}, + {file = "pandas-2.0.3-cp39-cp39-win_amd64.whl", hash = "sha256:1168574b036cd8b93abc746171c9b4f1b83467438a5e45909fed645cf8692dbc"}, + {file = "pandas-2.0.3.tar.gz", hash = "sha256:c02f372a88e0d17f36d3093a644c73cfc1788e876a7c4bcb4020a77512e2043c"}, +] + +[package.dependencies] +numpy = {version = ">=1.20.3", markers = "python_version < \"3.10\""} +python-dateutil = ">=2.8.2" +pytz = ">=2020.1" +tzdata = ">=2022.1" + +[package.extras] +all = ["PyQt5 (>=5.15.1)", "SQLAlchemy (>=1.4.16)", "beautifulsoup4 (>=4.9.3)", "bottleneck (>=1.3.2)", "brotlipy (>=0.7.0)", "fastparquet (>=0.6.3)", "fsspec (>=2021.07.0)", "gcsfs (>=2021.07.0)", "html5lib (>=1.1)", "hypothesis (>=6.34.2)", "jinja2 (>=3.0.0)", "lxml (>=4.6.3)", "matplotlib (>=3.6.1)", "numba (>=0.53.1)", "numexpr (>=2.7.3)", "odfpy (>=1.4.1)", "openpyxl (>=3.0.7)", "pandas-gbq (>=0.15.0)", "psycopg2 (>=2.8.6)", "pyarrow (>=7.0.0)", "pymysql (>=1.0.2)", "pyreadstat (>=1.1.2)", "pytest (>=7.3.2)", "pytest-asyncio (>=0.17.0)", "pytest-xdist (>=2.2.0)", "python-snappy (>=0.6.0)", "pyxlsb (>=1.0.8)", "qtpy (>=2.2.0)", "s3fs (>=2021.08.0)", "scipy (>=1.7.1)", "tables (>=3.6.1)", "tabulate (>=0.8.9)", "xarray (>=0.21.0)", "xlrd (>=2.0.1)", "xlsxwriter (>=1.4.3)", "zstandard (>=0.15.2)"] +aws = ["s3fs (>=2021.08.0)"] +clipboard = ["PyQt5 (>=5.15.1)", "qtpy (>=2.2.0)"] +compression = ["brotlipy (>=0.7.0)", "python-snappy (>=0.6.0)", "zstandard (>=0.15.2)"] +computation = ["scipy (>=1.7.1)", "xarray (>=0.21.0)"] +excel = ["odfpy (>=1.4.1)", "openpyxl (>=3.0.7)", "pyxlsb (>=1.0.8)", "xlrd (>=2.0.1)", "xlsxwriter (>=1.4.3)"] +feather = ["pyarrow (>=7.0.0)"] +fss = ["fsspec (>=2021.07.0)"] +gcp = ["gcsfs (>=2021.07.0)", "pandas-gbq (>=0.15.0)"] +hdf5 = ["tables (>=3.6.1)"] +html = ["beautifulsoup4 (>=4.9.3)", "html5lib (>=1.1)", "lxml (>=4.6.3)"] +mysql = ["SQLAlchemy (>=1.4.16)", "pymysql (>=1.0.2)"] +output-formatting = ["jinja2 (>=3.0.0)", "tabulate (>=0.8.9)"] +parquet = ["pyarrow (>=7.0.0)"] +performance = ["bottleneck (>=1.3.2)", "numba (>=0.53.1)", "numexpr (>=2.7.1)"] +plot = ["matplotlib (>=3.6.1)"] +postgresql = ["SQLAlchemy (>=1.4.16)", "psycopg2 (>=2.8.6)"] +spss = ["pyreadstat (>=1.1.2)"] +sql-other = ["SQLAlchemy (>=1.4.16)"] +test = ["hypothesis (>=6.34.2)", "pytest (>=7.3.2)", "pytest-asyncio (>=0.17.0)", "pytest-xdist (>=2.2.0)"] +xml = ["lxml (>=4.6.3)"] + +[[package]] +name = "pandas" +version = "2.2.0" +description = "Powerful data structures for data analysis, time series, and statistics" +optional = false +python-versions = ">=3.9" +files = [ + {file = "pandas-2.2.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:8108ee1712bb4fa2c16981fba7e68b3f6ea330277f5ca34fa8d557e986a11670"}, + {file = "pandas-2.2.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:736da9ad4033aeab51d067fc3bd69a0ba36f5a60f66a527b3d72e2030e63280a"}, + {file = "pandas-2.2.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:38e0b4fc3ddceb56ec8a287313bc22abe17ab0eb184069f08fc6a9352a769b18"}, + {file = "pandas-2.2.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:20404d2adefe92aed3b38da41d0847a143a09be982a31b85bc7dd565bdba0f4e"}, + {file = "pandas-2.2.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:7ea3ee3f125032bfcade3a4cf85131ed064b4f8dd23e5ce6fa16473e48ebcaf5"}, + {file = "pandas-2.2.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:f9670b3ac00a387620489dfc1bca66db47a787f4e55911f1293063a78b108df1"}, + {file = "pandas-2.2.0-cp310-cp310-win_amd64.whl", hash = "sha256:5a946f210383c7e6d16312d30b238fd508d80d927014f3b33fb5b15c2f895430"}, + {file = "pandas-2.2.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:a1b438fa26b208005c997e78672f1aa8138f67002e833312e6230f3e57fa87d5"}, + {file = "pandas-2.2.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:8ce2fbc8d9bf303ce54a476116165220a1fedf15985b09656b4b4275300e920b"}, + {file = "pandas-2.2.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2707514a7bec41a4ab81f2ccce8b382961a29fbe9492eab1305bb075b2b1ff4f"}, + {file = "pandas-2.2.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:85793cbdc2d5bc32620dc8ffa715423f0c680dacacf55056ba13454a5be5de88"}, + {file = "pandas-2.2.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:cfd6c2491dc821b10c716ad6776e7ab311f7df5d16038d0b7458bc0b67dc10f3"}, + {file = "pandas-2.2.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:a146b9dcacc3123aa2b399df1a284de5f46287a4ab4fbfc237eac98a92ebcb71"}, + {file = "pandas-2.2.0-cp311-cp311-win_amd64.whl", hash = "sha256:fbc1b53c0e1fdf16388c33c3cca160f798d38aea2978004dd3f4d3dec56454c9"}, + {file = "pandas-2.2.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:a41d06f308a024981dcaa6c41f2f2be46a6b186b902c94c2674e8cb5c42985bc"}, + {file = "pandas-2.2.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:159205c99d7a5ce89ecfc37cb08ed179de7783737cea403b295b5eda8e9c56d1"}, + {file = "pandas-2.2.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:eb1e1f3861ea9132b32f2133788f3b14911b68102d562715d71bd0013bc45440"}, + {file = "pandas-2.2.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:761cb99b42a69005dec2b08854fb1d4888fdf7b05db23a8c5a099e4b886a2106"}, + {file = "pandas-2.2.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:a20628faaf444da122b2a64b1e5360cde100ee6283ae8effa0d8745153809a2e"}, + {file = "pandas-2.2.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:f5be5d03ea2073627e7111f61b9f1f0d9625dc3c4d8dda72cc827b0c58a1d042"}, + {file = "pandas-2.2.0-cp312-cp312-win_amd64.whl", hash = "sha256:a626795722d893ed6aacb64d2401d017ddc8a2341b49e0384ab9bf7112bdec30"}, + {file = "pandas-2.2.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:9f66419d4a41132eb7e9a73dcec9486cf5019f52d90dd35547af11bc58f8637d"}, + {file = "pandas-2.2.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:57abcaeda83fb80d447f28ab0cc7b32b13978f6f733875ebd1ed14f8fbc0f4ab"}, + {file = "pandas-2.2.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e60f1f7dba3c2d5ca159e18c46a34e7ca7247a73b5dd1a22b6d59707ed6b899a"}, + {file = "pandas-2.2.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:eb61dc8567b798b969bcc1fc964788f5a68214d333cade8319c7ab33e2b5d88a"}, + {file = "pandas-2.2.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:52826b5f4ed658fa2b729264d63f6732b8b29949c7fd234510d57c61dbeadfcd"}, + {file = "pandas-2.2.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:bde2bc699dbd80d7bc7f9cab1e23a95c4375de615860ca089f34e7c64f4a8de7"}, + {file = "pandas-2.2.0-cp39-cp39-win_amd64.whl", hash = "sha256:3de918a754bbf2da2381e8a3dcc45eede8cd7775b047b923f9006d5f876802ae"}, + {file = "pandas-2.2.0.tar.gz", hash = "sha256:30b83f7c3eb217fb4d1b494a57a2fda5444f17834f5df2de6b2ffff68dc3c8e2"}, +] + +[package.dependencies] +numpy = [ + {version = ">=1.22.4,<2", markers = "python_version < \"3.11\""}, + {version = ">=1.23.2,<2", markers = "python_version == \"3.11\""}, + {version = ">=1.26.0,<2", markers = "python_version >= \"3.12\""}, ] -parsedatetime = [ +python-dateutil = ">=2.8.2" +pytz = ">=2020.1" +tzdata = ">=2022.7" + +[package.extras] +all = ["PyQt5 (>=5.15.9)", "SQLAlchemy (>=2.0.0)", "adbc-driver-postgresql (>=0.8.0)", "adbc-driver-sqlite (>=0.8.0)", "beautifulsoup4 (>=4.11.2)", "bottleneck (>=1.3.6)", "dataframe-api-compat (>=0.1.7)", "fastparquet (>=2022.12.0)", "fsspec (>=2022.11.0)", "gcsfs (>=2022.11.0)", "html5lib (>=1.1)", "hypothesis (>=6.46.1)", "jinja2 (>=3.1.2)", "lxml (>=4.9.2)", "matplotlib (>=3.6.3)", "numba (>=0.56.4)", "numexpr (>=2.8.4)", "odfpy (>=1.4.1)", "openpyxl (>=3.1.0)", "pandas-gbq (>=0.19.0)", "psycopg2 (>=2.9.6)", "pyarrow (>=10.0.1)", "pymysql (>=1.0.2)", "pyreadstat (>=1.2.0)", "pytest (>=7.3.2)", "pytest-xdist (>=2.2.0)", "python-calamine (>=0.1.7)", "pyxlsb (>=1.0.10)", "qtpy (>=2.3.0)", "s3fs (>=2022.11.0)", "scipy (>=1.10.0)", "tables (>=3.8.0)", "tabulate (>=0.9.0)", "xarray (>=2022.12.0)", "xlrd (>=2.0.1)", "xlsxwriter (>=3.0.5)", "zstandard (>=0.19.0)"] +aws = ["s3fs (>=2022.11.0)"] +clipboard = ["PyQt5 (>=5.15.9)", "qtpy (>=2.3.0)"] +compression = ["zstandard (>=0.19.0)"] +computation = ["scipy (>=1.10.0)", "xarray (>=2022.12.0)"] +consortium-standard = ["dataframe-api-compat (>=0.1.7)"] +excel = ["odfpy (>=1.4.1)", "openpyxl (>=3.1.0)", "python-calamine (>=0.1.7)", "pyxlsb (>=1.0.10)", "xlrd (>=2.0.1)", "xlsxwriter (>=3.0.5)"] +feather = ["pyarrow (>=10.0.1)"] +fss = ["fsspec (>=2022.11.0)"] +gcp = ["gcsfs (>=2022.11.0)", "pandas-gbq (>=0.19.0)"] +hdf5 = ["tables (>=3.8.0)"] +html = ["beautifulsoup4 (>=4.11.2)", "html5lib (>=1.1)", "lxml (>=4.9.2)"] +mysql = ["SQLAlchemy (>=2.0.0)", "pymysql (>=1.0.2)"] +output-formatting = ["jinja2 (>=3.1.2)", "tabulate (>=0.9.0)"] +parquet = ["pyarrow (>=10.0.1)"] +performance = ["bottleneck (>=1.3.6)", "numba (>=0.56.4)", "numexpr (>=2.8.4)"] +plot = ["matplotlib (>=3.6.3)"] +postgresql = ["SQLAlchemy (>=2.0.0)", "adbc-driver-postgresql (>=0.8.0)", "psycopg2 (>=2.9.6)"] +spss = ["pyreadstat (>=1.2.0)"] +sql-other = ["SQLAlchemy (>=2.0.0)", "adbc-driver-postgresql (>=0.8.0)", "adbc-driver-sqlite (>=0.8.0)"] +test = ["hypothesis (>=6.46.1)", "pytest (>=7.3.2)", "pytest-xdist (>=2.2.0)"] +xml = ["lxml (>=4.9.2)"] + +[[package]] +name = "parsedatetime" +version = "2.4" +description = "Parse human-readable date/time text." +optional = false +python-versions = "*" +files = [ {file = "parsedatetime-2.4-py2-none-any.whl", hash = "sha256:9ee3529454bf35c40a77115f5a596771e59e1aee8c53306f346c461b8e913094"}, {file = "parsedatetime-2.4.tar.gz", hash = "sha256:3d817c58fb9570d1eec1dd46fa9448cd644eeed4fb612684b02dfda3a79cb84b"}, ] -pathspec = [ + +[package.dependencies] +future = "*" + +[[package]] +name = "pathspec" +version = "0.11.2" +description = "Utility library for gitignore style pattern matching of file paths." +optional = false +python-versions = ">=3.7" +files = [ {file = "pathspec-0.11.2-py3-none-any.whl", hash = "sha256:1d6ed233af05e679efb96b1851550ea95bbb64b7c490b0f5aa52996c11e92a20"}, {file = "pathspec-0.11.2.tar.gz", hash = "sha256:e0d8d0ac2f12da61956eb2306b69f9469b42f4deb0f3cb6ed47b9cce9996ced3"}, ] -pathvalidate = [ + +[[package]] +name = "pathvalidate" +version = "3.1.0" +description = "pathvalidate is a Python library to sanitize/validate a string such as filenames/file-paths/etc." +optional = false +python-versions = ">=3.7" +files = [ {file = "pathvalidate-3.1.0-py3-none-any.whl", hash = "sha256:912fd1d2e1a2a6a6f98da36a91f21ed86746473810ff625b9c34f3d06c0caa1d"}, {file = "pathvalidate-3.1.0.tar.gz", hash = "sha256:426970226e24199fd90d93995d223c1e28bda967cdf4370755a14cdf72a2a8ee"}, ] -pbr = [ + +[package.extras] +docs = ["Sphinx (>=2.4)", "sphinx-rtd-theme (>=1.2.2)", "urllib3 (<2)"] +test = ["Faker (>=1.0.8)", "allpairspy (>=2)", "click (>=6.2)", "pytest (>=6.0.1)", "pytest-discord (>=0.1.2)", "pytest-md-report (>=0.3)"] + +[[package]] +name = "pbr" +version = "5.11.1" +description = "Python Build Reasonableness" +optional = false +python-versions = ">=2.6" +files = [ {file = "pbr-5.11.1-py2.py3-none-any.whl", hash = "sha256:567f09558bae2b3ab53cb3c1e2e33e726ff3338e7bae3db5dc954b3a44eef12b"}, {file = "pbr-5.11.1.tar.gz", hash = "sha256:aefc51675b0b533d56bb5fd1c8c6c0522fe31896679882e1c4c63d5e4a0fccb3"}, ] -pendulum = [ - {file = "pendulum-2.1.2-cp27-cp27m-macosx_10_15_x86_64.whl", hash = "sha256:b6c352f4bd32dff1ea7066bd31ad0f71f8d8100b9ff709fb343f3b86cee43efe"}, - {file = "pendulum-2.1.2-cp27-cp27m-win_amd64.whl", hash = "sha256:318f72f62e8e23cd6660dbafe1e346950281a9aed144b5c596b2ddabc1d19739"}, - {file = "pendulum-2.1.2-cp35-cp35m-macosx_10_15_x86_64.whl", hash = "sha256:0731f0c661a3cb779d398803655494893c9f581f6488048b3fb629c2342b5394"}, - {file = "pendulum-2.1.2-cp35-cp35m-manylinux1_i686.whl", hash = "sha256:3481fad1dc3f6f6738bd575a951d3c15d4b4ce7c82dce37cf8ac1483fde6e8b0"}, - {file = "pendulum-2.1.2-cp35-cp35m-manylinux1_x86_64.whl", hash = "sha256:9702069c694306297ed362ce7e3c1ef8404ac8ede39f9b28b7c1a7ad8c3959e3"}, - {file = "pendulum-2.1.2-cp35-cp35m-win_amd64.whl", hash = "sha256:fb53ffa0085002ddd43b6ca61a7b34f2d4d7c3ed66f931fe599e1a531b42af9b"}, - {file = "pendulum-2.1.2-cp36-cp36m-macosx_10_15_x86_64.whl", hash = "sha256:c501749fdd3d6f9e726086bf0cd4437281ed47e7bca132ddb522f86a1645d360"}, - {file = "pendulum-2.1.2-cp36-cp36m-manylinux1_i686.whl", hash = "sha256:c807a578a532eeb226150d5006f156632df2cc8c5693d778324b43ff8c515dd0"}, - {file = "pendulum-2.1.2-cp36-cp36m-manylinux1_x86_64.whl", hash = "sha256:2d1619a721df661e506eff8db8614016f0720ac171fe80dda1333ee44e684087"}, - {file = "pendulum-2.1.2-cp36-cp36m-win_amd64.whl", hash = "sha256:f888f2d2909a414680a29ae74d0592758f2b9fcdee3549887779cd4055e975db"}, - {file = "pendulum-2.1.2-cp37-cp37m-macosx_10_15_x86_64.whl", hash = "sha256:e95d329384717c7bf627bf27e204bc3b15c8238fa8d9d9781d93712776c14002"}, - {file = "pendulum-2.1.2-cp37-cp37m-manylinux1_i686.whl", hash = "sha256:4c9c689747f39d0d02a9f94fcee737b34a5773803a64a5fdb046ee9cac7442c5"}, - {file = "pendulum-2.1.2-cp37-cp37m-manylinux1_x86_64.whl", hash = "sha256:1245cd0075a3c6d889f581f6325dd8404aca5884dea7223a5566c38aab94642b"}, - {file = "pendulum-2.1.2-cp37-cp37m-win_amd64.whl", hash = "sha256:db0a40d8bcd27b4fb46676e8eb3c732c67a5a5e6bfab8927028224fbced0b40b"}, - {file = "pendulum-2.1.2-cp38-cp38-macosx_10_15_x86_64.whl", hash = "sha256:f5e236e7730cab1644e1b87aca3d2ff3e375a608542e90fe25685dae46310116"}, - {file = "pendulum-2.1.2-cp38-cp38-manylinux1_i686.whl", hash = "sha256:de42ea3e2943171a9e95141f2eecf972480636e8e484ccffaf1e833929e9e052"}, - {file = "pendulum-2.1.2-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:7c5ec650cb4bec4c63a89a0242cc8c3cebcec92fcfe937c417ba18277d8560be"}, - {file = "pendulum-2.1.2-cp38-cp38-win_amd64.whl", hash = "sha256:33fb61601083f3eb1d15edeb45274f73c63b3c44a8524703dc143f4212bf3269"}, - {file = "pendulum-2.1.2-cp39-cp39-manylinux1_i686.whl", hash = "sha256:29c40a6f2942376185728c9a0347d7c0f07905638c83007e1d262781f1e6953a"}, - {file = "pendulum-2.1.2-cp39-cp39-manylinux1_x86_64.whl", hash = "sha256:94b1fc947bfe38579b28e1cccb36f7e28a15e841f30384b5ad6c5e31055c85d7"}, - {file = "pendulum-2.1.2.tar.gz", hash = "sha256:b06a0ca1bfe41c990bbf0c029f0b6501a7f2ec4e38bfec730712015e8860f207"}, -] -pipdeptree = [ + +[[package]] +name = "pendulum" +version = "3.0.0" +description = "Python datetimes made easy" +optional = false +python-versions = ">=3.8" +files = [ + {file = "pendulum-3.0.0-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:2cf9e53ef11668e07f73190c805dbdf07a1939c3298b78d5a9203a86775d1bfd"}, + {file = "pendulum-3.0.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:fb551b9b5e6059377889d2d878d940fd0bbb80ae4810543db18e6f77b02c5ef6"}, + {file = "pendulum-3.0.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6c58227ac260d5b01fc1025176d7b31858c9f62595737f350d22124a9a3ad82d"}, + {file = "pendulum-3.0.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:60fb6f415fea93a11c52578eaa10594568a6716602be8430b167eb0d730f3332"}, + {file = "pendulum-3.0.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b69f6b4dbcb86f2c2fe696ba991e67347bcf87fe601362a1aba6431454b46bde"}, + {file = "pendulum-3.0.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:138afa9c373ee450ede206db5a5e9004fd3011b3c6bbe1e57015395cd076a09f"}, + {file = "pendulum-3.0.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:83d9031f39c6da9677164241fd0d37fbfc9dc8ade7043b5d6d62f56e81af8ad2"}, + {file = "pendulum-3.0.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:0c2308af4033fa534f089595bcd40a95a39988ce4059ccd3dc6acb9ef14ca44a"}, + {file = "pendulum-3.0.0-cp310-none-win_amd64.whl", hash = "sha256:9a59637cdb8462bdf2dbcb9d389518c0263799189d773ad5c11db6b13064fa79"}, + {file = "pendulum-3.0.0-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:3725245c0352c95d6ca297193192020d1b0c0f83d5ee6bb09964edc2b5a2d508"}, + {file = "pendulum-3.0.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:6c035f03a3e565ed132927e2c1b691de0dbf4eb53b02a5a3c5a97e1a64e17bec"}, + {file = "pendulum-3.0.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:597e66e63cbd68dd6d58ac46cb7a92363d2088d37ccde2dae4332ef23e95cd00"}, + {file = "pendulum-3.0.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:99a0f8172e19f3f0c0e4ace0ad1595134d5243cf75985dc2233e8f9e8de263ca"}, + {file = "pendulum-3.0.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:77d8839e20f54706aed425bec82a83b4aec74db07f26acd039905d1237a5e1d4"}, + {file = "pendulum-3.0.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:afde30e8146292b059020fbc8b6f8fd4a60ae7c5e6f0afef937bbb24880bdf01"}, + {file = "pendulum-3.0.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:660434a6fcf6303c4efd36713ca9212c753140107ee169a3fc6c49c4711c2a05"}, + {file = "pendulum-3.0.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:dee9e5a48c6999dc1106eb7eea3e3a50e98a50651b72c08a87ee2154e544b33e"}, + {file = "pendulum-3.0.0-cp311-none-win_amd64.whl", hash = "sha256:d4cdecde90aec2d67cebe4042fd2a87a4441cc02152ed7ed8fb3ebb110b94ec4"}, + {file = "pendulum-3.0.0-cp311-none-win_arm64.whl", hash = "sha256:773c3bc4ddda2dda9f1b9d51fe06762f9200f3293d75c4660c19b2614b991d83"}, + {file = "pendulum-3.0.0-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:409e64e41418c49f973d43a28afe5df1df4f1dd87c41c7c90f1a63f61ae0f1f7"}, + {file = "pendulum-3.0.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:a38ad2121c5ec7c4c190c7334e789c3b4624798859156b138fcc4d92295835dc"}, + {file = "pendulum-3.0.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fde4d0b2024b9785f66b7f30ed59281bd60d63d9213cda0eb0910ead777f6d37"}, + {file = "pendulum-3.0.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4b2c5675769fb6d4c11238132962939b960fcb365436b6d623c5864287faa319"}, + {file = "pendulum-3.0.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:8af95e03e066826f0f4c65811cbee1b3123d4a45a1c3a2b4fc23c4b0dff893b5"}, + {file = "pendulum-3.0.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2165a8f33cb15e06c67070b8afc87a62b85c5a273e3aaa6bc9d15c93a4920d6f"}, + {file = "pendulum-3.0.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:ad5e65b874b5e56bd942546ea7ba9dd1d6a25121db1c517700f1c9de91b28518"}, + {file = "pendulum-3.0.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:17fe4b2c844bbf5f0ece69cfd959fa02957c61317b2161763950d88fed8e13b9"}, + {file = "pendulum-3.0.0-cp312-none-win_amd64.whl", hash = "sha256:78f8f4e7efe5066aca24a7a57511b9c2119f5c2b5eb81c46ff9222ce11e0a7a5"}, + {file = "pendulum-3.0.0-cp312-none-win_arm64.whl", hash = "sha256:28f49d8d1e32aae9c284a90b6bb3873eee15ec6e1d9042edd611b22a94ac462f"}, + {file = "pendulum-3.0.0-cp37-cp37m-macosx_10_12_x86_64.whl", hash = "sha256:d4e2512f4e1a4670284a153b214db9719eb5d14ac55ada5b76cbdb8c5c00399d"}, + {file = "pendulum-3.0.0-cp37-cp37m-macosx_11_0_arm64.whl", hash = "sha256:3d897eb50883cc58d9b92f6405245f84b9286cd2de6e8694cb9ea5cb15195a32"}, + {file = "pendulum-3.0.0-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2e169cc2ca419517f397811bbe4589cf3cd13fca6dc38bb352ba15ea90739ebb"}, + {file = "pendulum-3.0.0-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f17c3084a4524ebefd9255513692f7e7360e23c8853dc6f10c64cc184e1217ab"}, + {file = "pendulum-3.0.0-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:826d6e258052715f64d05ae0fc9040c0151e6a87aae7c109ba9a0ed930ce4000"}, + {file = "pendulum-3.0.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d2aae97087872ef152a0c40e06100b3665d8cb86b59bc8471ca7c26132fccd0f"}, + {file = "pendulum-3.0.0-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:ac65eeec2250d03106b5e81284ad47f0d417ca299a45e89ccc69e36130ca8bc7"}, + {file = "pendulum-3.0.0-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:a5346d08f3f4a6e9e672187faa179c7bf9227897081d7121866358af369f44f9"}, + {file = "pendulum-3.0.0-cp37-none-win_amd64.whl", hash = "sha256:235d64e87946d8f95c796af34818c76e0f88c94d624c268693c85b723b698aa9"}, + {file = "pendulum-3.0.0-cp38-cp38-macosx_10_12_x86_64.whl", hash = "sha256:6a881d9c2a7f85bc9adafcfe671df5207f51f5715ae61f5d838b77a1356e8b7b"}, + {file = "pendulum-3.0.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:d7762d2076b9b1cb718a6631ad6c16c23fc3fac76cbb8c454e81e80be98daa34"}, + {file = "pendulum-3.0.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4e8e36a8130819d97a479a0e7bf379b66b3b1b520e5dc46bd7eb14634338df8c"}, + {file = "pendulum-3.0.0-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:7dc843253ac373358ffc0711960e2dd5b94ab67530a3e204d85c6e8cb2c5fa10"}, + {file = "pendulum-3.0.0-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:0a78ad3635d609ceb1e97d6aedef6a6a6f93433ddb2312888e668365908c7120"}, + {file = "pendulum-3.0.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b30a137e9e0d1f751e60e67d11fc67781a572db76b2296f7b4d44554761049d6"}, + {file = "pendulum-3.0.0-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:c95984037987f4a457bb760455d9ca80467be792236b69d0084f228a8ada0162"}, + {file = "pendulum-3.0.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:d29c6e578fe0f893766c0d286adbf0b3c726a4e2341eba0917ec79c50274ec16"}, + {file = "pendulum-3.0.0-cp38-none-win_amd64.whl", hash = "sha256:deaba8e16dbfcb3d7a6b5fabdd5a38b7c982809567479987b9c89572df62e027"}, + {file = "pendulum-3.0.0-cp39-cp39-macosx_10_12_x86_64.whl", hash = "sha256:b11aceea5b20b4b5382962b321dbc354af0defe35daa84e9ff3aae3c230df694"}, + {file = "pendulum-3.0.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:a90d4d504e82ad236afac9adca4d6a19e4865f717034fc69bafb112c320dcc8f"}, + {file = "pendulum-3.0.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:825799c6b66e3734227756fa746cc34b3549c48693325b8b9f823cb7d21b19ac"}, + {file = "pendulum-3.0.0-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ad769e98dc07972e24afe0cff8d365cb6f0ebc7e65620aa1976fcfbcadc4c6f3"}, + {file = "pendulum-3.0.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a6fc26907eb5fb8cc6188cc620bc2075a6c534d981a2f045daa5f79dfe50d512"}, + {file = "pendulum-3.0.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0c717eab1b6d898c00a3e0fa7781d615b5c5136bbd40abe82be100bb06df7a56"}, + {file = "pendulum-3.0.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:3ddd1d66d1a714ce43acfe337190be055cdc221d911fc886d5a3aae28e14b76d"}, + {file = "pendulum-3.0.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:822172853d7a9cf6da95d7b66a16c7160cb99ae6df55d44373888181d7a06edc"}, + {file = "pendulum-3.0.0-cp39-none-win_amd64.whl", hash = "sha256:840de1b49cf1ec54c225a2a6f4f0784d50bd47f68e41dc005b7f67c7d5b5f3ae"}, + {file = "pendulum-3.0.0-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:3b1f74d1e6ffe5d01d6023870e2ce5c2191486928823196f8575dcc786e107b1"}, + {file = "pendulum-3.0.0-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:729e9f93756a2cdfa77d0fc82068346e9731c7e884097160603872686e570f07"}, + {file = "pendulum-3.0.0-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e586acc0b450cd21cbf0db6bae386237011b75260a3adceddc4be15334689a9a"}, + {file = "pendulum-3.0.0-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:22e7944ffc1f0099a79ff468ee9630c73f8c7835cd76fdb57ef7320e6a409df4"}, + {file = "pendulum-3.0.0-pp310-pypy310_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:fa30af36bd8e50686846bdace37cf6707bdd044e5cb6e1109acbad3277232e04"}, + {file = "pendulum-3.0.0-pp310-pypy310_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:440215347b11914ae707981b9a57ab9c7b6983ab0babde07063c6ee75c0dc6e7"}, + {file = "pendulum-3.0.0-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:314c4038dc5e6a52991570f50edb2f08c339debdf8cea68ac355b32c4174e820"}, + {file = "pendulum-3.0.0-pp37-pypy37_pp73-macosx_10_12_x86_64.whl", hash = "sha256:5acb1d386337415f74f4d1955c4ce8d0201978c162927d07df8eb0692b2d8533"}, + {file = "pendulum-3.0.0-pp37-pypy37_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a789e12fbdefaffb7b8ac67f9d8f22ba17a3050ceaaa635cd1cc4645773a4b1e"}, + {file = "pendulum-3.0.0-pp37-pypy37_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:860aa9b8a888e5913bd70d819306749e5eb488e6b99cd6c47beb701b22bdecf5"}, + {file = "pendulum-3.0.0-pp37-pypy37_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:5ebc65ea033ef0281368217fbf59f5cb05b338ac4dd23d60959c7afcd79a60a0"}, + {file = "pendulum-3.0.0-pp37-pypy37_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:d9fef18ab0386ef6a9ac7bad7e43ded42c83ff7ad412f950633854f90d59afa8"}, + {file = "pendulum-3.0.0-pp38-pypy38_pp73-macosx_10_12_x86_64.whl", hash = "sha256:1c134ba2f0571d0b68b83f6972e2307a55a5a849e7dac8505c715c531d2a8795"}, + {file = "pendulum-3.0.0-pp38-pypy38_pp73-macosx_11_0_arm64.whl", hash = "sha256:385680812e7e18af200bb9b4a49777418c32422d05ad5a8eb85144c4a285907b"}, + {file = "pendulum-3.0.0-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9eec91cd87c59fb32ec49eb722f375bd58f4be790cae11c1b70fac3ee4f00da0"}, + {file = "pendulum-3.0.0-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4386bffeca23c4b69ad50a36211f75b35a4deb6210bdca112ac3043deb7e494a"}, + {file = "pendulum-3.0.0-pp38-pypy38_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:dfbcf1661d7146d7698da4b86e7f04814221081e9fe154183e34f4c5f5fa3bf8"}, + {file = "pendulum-3.0.0-pp38-pypy38_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:04a1094a5aa1daa34a6b57c865b25f691848c61583fb22722a4df5699f6bf74c"}, + {file = "pendulum-3.0.0-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:5b0ec85b9045bd49dd3a3493a5e7ddfd31c36a2a60da387c419fa04abcaecb23"}, + {file = "pendulum-3.0.0-pp39-pypy39_pp73-macosx_10_12_x86_64.whl", hash = "sha256:0a15b90129765b705eb2039062a6daf4d22c4e28d1a54fa260892e8c3ae6e157"}, + {file = "pendulum-3.0.0-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:bb8f6d7acd67a67d6fedd361ad2958ff0539445ef51cbe8cd288db4306503cd0"}, + {file = "pendulum-3.0.0-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fd69b15374bef7e4b4440612915315cc42e8575fcda2a3d7586a0d88192d0c88"}, + {file = "pendulum-3.0.0-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dc00f8110db6898360c53c812872662e077eaf9c75515d53ecc65d886eec209a"}, + {file = "pendulum-3.0.0-pp39-pypy39_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:83a44e8b40655d0ba565a5c3d1365d27e3e6778ae2a05b69124db9e471255c4a"}, + {file = "pendulum-3.0.0-pp39-pypy39_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:1a3604e9fbc06b788041b2a8b78f75c243021e0f512447806a6d37ee5214905d"}, + {file = "pendulum-3.0.0-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:92c307ae7accebd06cbae4729f0ba9fa724df5f7d91a0964b1b972a22baa482b"}, + {file = "pendulum-3.0.0.tar.gz", hash = "sha256:5d034998dea404ec31fae27af6b22cff1708f830a1ed7353be4d1019bb9f584e"}, +] + +[package.dependencies] +"backports.zoneinfo" = {version = ">=0.2.1", markers = "python_version < \"3.9\""} +importlib-resources = {version = ">=5.9.0", markers = "python_version < \"3.9\""} +python-dateutil = ">=2.6" +tzdata = ">=2020.1" + +[package.extras] +test = ["time-machine (>=2.6.0)"] + +[[package]] +name = "pipdeptree" +version = "2.9.6" +description = "Command line utility to show dependency tree of packages." +optional = true +python-versions = ">=3.7" +files = [ {file = "pipdeptree-2.9.6-py3-none-any.whl", hash = "sha256:de93f990d21224297c9f03e057da5a3dc65ff732a0147945dd9421671f13626b"}, {file = "pipdeptree-2.9.6.tar.gz", hash = "sha256:f815caf165e89c576ce659b866c7a82ae4590420c2d020a92d32e45097f8bc73"}, ] -pkgutil-resolve-name = [ + +[package.extras] +graphviz = ["graphviz (>=0.20.1)"] +test = ["covdefaults (>=2.3)", "diff-cover (>=7.6)", "pip (>=23.1.2)", "pytest (>=7.4)", "pytest-cov (>=4.1)", "pytest-mock (>=3.11.1)", "virtualenv (>=20.23.1,<21)"] + +[[package]] +name = "pkgutil-resolve-name" +version = "1.3.10" +description = "Resolve a name to an object." +optional = false +python-versions = ">=3.6" +files = [ {file = "pkgutil_resolve_name-1.3.10-py3-none-any.whl", hash = "sha256:ca27cc078d25c5ad71a9de0a7a330146c4e014c2462d9af19c6b828280649c5e"}, {file = "pkgutil_resolve_name-1.3.10.tar.gz", hash = "sha256:357d6c9e6a755653cfd78893817c0853af365dd51ec97f3d358a819373bbd174"}, ] -platformdirs = [ + +[[package]] +name = "platformdirs" +version = "3.8.1" +description = "A small Python package for determining appropriate platform-specific dirs, e.g. a \"user data dir\"." +optional = false +python-versions = ">=3.7" +files = [ {file = "platformdirs-3.8.1-py3-none-any.whl", hash = "sha256:cec7b889196b9144d088e4c57d9ceef7374f6c39694ad1577a0aab50d27ea28c"}, {file = "platformdirs-3.8.1.tar.gz", hash = "sha256:f87ca4fcff7d2b0f81c6a748a77973d7af0f4d526f98f308477c3c436c74d528"}, ] -pluggy = [ + +[package.extras] +docs = ["furo (>=2023.5.20)", "proselint (>=0.13)", "sphinx (>=7.0.1)", "sphinx-autodoc-typehints (>=1.23,!=1.23.4)"] +test = ["appdirs (==1.4.4)", "covdefaults (>=2.3)", "pytest (>=7.3.1)", "pytest-cov (>=4.1)", "pytest-mock (>=3.10)"] + +[[package]] +name = "pluggy" +version = "1.3.0" +description = "plugin and hook calling mechanisms for python" +optional = false +python-versions = ">=3.8" +files = [ {file = "pluggy-1.3.0-py3-none-any.whl", hash = "sha256:d89c696a773f8bd377d18e5ecda92b7a3793cbe66c87060a6fb58c7b6e1061f7"}, {file = "pluggy-1.3.0.tar.gz", hash = "sha256:cf61ae8f126ac6f7c451172cf30e3e43d3ca77615509771b3a984a0730651e12"}, ] -ply = [ + +[package.extras] +dev = ["pre-commit", "tox"] +testing = ["pytest", "pytest-benchmark"] + +[[package]] +name = "ply" +version = "3.11" +description = "Python Lex & Yacc" +optional = false +python-versions = "*" +files = [ {file = "ply-3.11-py2.py3-none-any.whl", hash = "sha256:096f9b8350b65ebd2fd1346b12452efe5b9607f7482813ffca50c22722a807ce"}, {file = "ply-3.11.tar.gz", hash = "sha256:00c7c1aaa88358b9c765b6d3000c6eec0ba42abca5351b095321aef446081da3"}, ] -portalocker = [ + +[[package]] +name = "portalocker" +version = "2.7.0" +description = "Wraps the portalocker recipe for easy usage" +optional = true +python-versions = ">=3.5" +files = [ {file = "portalocker-2.7.0-py2.py3-none-any.whl", hash = "sha256:a07c5b4f3985c3cf4798369631fb7011adb498e2a46d8440efc75a8f29a0f983"}, {file = "portalocker-2.7.0.tar.gz", hash = "sha256:032e81d534a88ec1736d03f780ba073f047a06c478b06e2937486f334e955c51"}, ] -prefixed = [ + +[package.dependencies] +pywin32 = {version = ">=226", markers = "platform_system == \"Windows\""} + +[package.extras] +docs = ["sphinx (>=1.7.1)"] +redis = ["redis"] +tests = ["pytest (>=5.4.1)", "pytest-cov (>=2.8.1)", "pytest-mypy (>=0.8.0)", "pytest-timeout (>=2.1.0)", "redis", "sphinx (>=6.0.0)"] + +[[package]] +name = "prefixed" +version = "0.7.0" +description = "Prefixed alternative numeric library" +optional = false +python-versions = "*" +files = [ {file = "prefixed-0.7.0-py2.py3-none-any.whl", hash = "sha256:537b0e4ff4516c4578f277a41d7104f769d6935ae9cdb0f88fed82ec7b3c0ca5"}, {file = "prefixed-0.7.0.tar.gz", hash = "sha256:0b54d15e602eb8af4ac31b1db21a37ea95ce5890e0741bb0dd9ded493cefbbe9"}, ] -prison = [ + +[[package]] +name = "prison" +version = "0.2.1" +description = "Rison encoder/decoder" +optional = false +python-versions = "*" +files = [ {file = "prison-0.2.1-py2.py3-none-any.whl", hash = "sha256:f90bab63fca497aa0819a852f64fb21a4e181ed9f6114deaa5dc04001a7555c5"}, {file = "prison-0.2.1.tar.gz", hash = "sha256:e6cd724044afcb1a8a69340cad2f1e3151a5839fd3a8027fd1357571e797c599"}, ] -proto-plus = [ + +[package.dependencies] +six = "*" + +[package.extras] +dev = ["nose", "pipreqs", "twine"] + +[[package]] +name = "proto-plus" +version = "1.22.3" +description = "Beautiful, Pythonic protocol buffers." +optional = true +python-versions = ">=3.6" +files = [ {file = "proto-plus-1.22.3.tar.gz", hash = "sha256:fdcd09713cbd42480740d2fe29c990f7fbd885a67efc328aa8be6ee3e9f76a6b"}, {file = "proto_plus-1.22.3-py3-none-any.whl", hash = "sha256:a49cd903bc0b6ab41f76bf65510439d56ca76f868adf0274e738bfdd096894df"}, ] -protobuf = [ + +[package.dependencies] +protobuf = ">=3.19.0,<5.0.0dev" + +[package.extras] +testing = ["google-api-core[grpc] (>=1.31.5)"] + +[[package]] +name = "protobuf" +version = "4.24.2" +description = "" +optional = false +python-versions = ">=3.7" +files = [ {file = "protobuf-4.24.2-cp310-abi3-win32.whl", hash = "sha256:58e12d2c1aa428ece2281cef09bbaa6938b083bcda606db3da4e02e991a0d924"}, {file = "protobuf-4.24.2-cp310-abi3-win_amd64.whl", hash = "sha256:77700b55ba41144fc64828e02afb41901b42497b8217b558e4a001f18a85f2e3"}, {file = "protobuf-4.24.2-cp37-abi3-macosx_10_9_universal2.whl", hash = "sha256:237b9a50bd3b7307d0d834c1b0eb1a6cd47d3f4c2da840802cd03ea288ae8880"}, @@ -7498,7 +6094,14 @@ protobuf = [ {file = "protobuf-4.24.2-py3-none-any.whl", hash = "sha256:3b7b170d3491ceed33f723bbf2d5a260f8a4e23843799a3906f16ef736ef251e"}, {file = "protobuf-4.24.2.tar.gz", hash = "sha256:7fda70797ddec31ddfa3576cbdcc3ddbb6b3078b737a1a87ab9136af0570cd6e"}, ] -psutil = [ + +[[package]] +name = "psutil" +version = "5.9.5" +description = "Cross-platform lib for process and system monitoring in Python." +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" +files = [ {file = "psutil-5.9.5-cp27-cp27m-macosx_10_9_x86_64.whl", hash = "sha256:be8929ce4313f9f8146caad4272f6abb8bf99fc6cf59344a3167ecd74f4f203f"}, {file = "psutil-5.9.5-cp27-cp27m-manylinux2010_i686.whl", hash = "sha256:ab8ed1a1d77c95453db1ae00a3f9c50227ebd955437bcf2a574ba8adbf6a74d5"}, {file = "psutil-5.9.5-cp27-cp27m-manylinux2010_x86_64.whl", hash = "sha256:4aef137f3345082a3d3232187aeb4ac4ef959ba3d7c10c33dd73763fbc063da4"}, @@ -7514,7 +6117,17 @@ psutil = [ {file = "psutil-5.9.5-cp38-abi3-macosx_11_0_arm64.whl", hash = "sha256:c607bb3b57dc779d55e1554846352b4e358c10fff3abf3514a7a6601beebdb30"}, {file = "psutil-5.9.5.tar.gz", hash = "sha256:5410638e4df39c54d957fc51ce03048acd8e6d60abc0f5107af51e5fb566eb3c"}, ] -psycopg2-binary = [ + +[package.extras] +test = ["enum34", "ipaddress", "mock", "pywin32", "wmi"] + +[[package]] +name = "psycopg2-binary" +version = "2.9.7" +description = "psycopg2 - Python-PostgreSQL Database Adapter" +optional = true +python-versions = ">=3.6" +files = [ {file = "psycopg2-binary-2.9.7.tar.gz", hash = "sha256:1b918f64a51ffe19cd2e230b3240ba481330ce1d4b7875ae67305bd1d37b041c"}, {file = "psycopg2_binary-2.9.7-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:ea5f8ee87f1eddc818fc04649d952c526db4426d26bab16efbe5a0c52b27d6ab"}, {file = "psycopg2_binary-2.9.7-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:2993ccb2b7e80844d534e55e0f12534c2871952f78e0da33c35e648bf002bbff"}, @@ -7576,14 +6189,39 @@ psycopg2-binary = [ {file = "psycopg2_binary-2.9.7-cp39-cp39-win32.whl", hash = "sha256:18f12632ab516c47c1ac4841a78fddea6508a8284c7cf0f292cb1a523f2e2379"}, {file = "psycopg2_binary-2.9.7-cp39-cp39-win_amd64.whl", hash = "sha256:eb3b8d55924a6058a26db69fb1d3e7e32695ff8b491835ba9f479537e14dcf9f"}, ] -psycopg2cffi = [ + +[[package]] +name = "psycopg2cffi" +version = "2.9.0" +description = ".. image:: https://travis-ci.org/chtd/psycopg2cffi.svg?branch=master" +optional = true +python-versions = "*" +files = [ {file = "psycopg2cffi-2.9.0.tar.gz", hash = "sha256:7e272edcd837de3a1d12b62185eb85c45a19feda9e62fa1b120c54f9e8d35c52"}, ] -py = [ + +[package.dependencies] +cffi = ">=1.0" +six = "*" + +[[package]] +name = "py" +version = "1.11.0" +description = "library with cross-python path, ini-parsing, io, code, log facilities" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" +files = [ {file = "py-1.11.0-py2.py3-none-any.whl", hash = "sha256:607c53218732647dff4acdfcd50cb62615cedf612e72d1724fb1a0cc6405b378"}, {file = "py-1.11.0.tar.gz", hash = "sha256:51c75c4126074b472f746a24399ad32f6053d1b34b68d2fa41e558e6f4a98719"}, ] -pyarrow = [ + +[[package]] +name = "pyarrow" +version = "14.0.1" +description = "Python library for Apache Arrow" +optional = true +python-versions = ">=3.8" +files = [ {file = "pyarrow-14.0.1-cp310-cp310-macosx_10_14_x86_64.whl", hash = "sha256:96d64e5ba7dceb519a955e5eeb5c9adcfd63f73a56aea4722e2cc81364fc567a"}, {file = "pyarrow-14.0.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:1a8ae88c0038d1bc362a682320112ee6774f006134cd5afc291591ee4bc06505"}, {file = "pyarrow-14.0.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0f6f053cb66dc24091f5511e5920e45c83107f954a21032feadc7b9e3a8e7851"}, @@ -7621,31 +6259,106 @@ pyarrow = [ {file = "pyarrow-14.0.1-cp39-cp39-win_amd64.whl", hash = "sha256:3f6d5faf4f1b0d5a7f97be987cf9e9f8cd39902611e818fe134588ee99bf0283"}, {file = "pyarrow-14.0.1.tar.gz", hash = "sha256:b8b3f4fe8d4ec15e1ef9b599b94683c5216adaed78d5cb4c606180546d1e2ee1"}, ] -pyasn1 = [ + +[package.dependencies] +numpy = ">=1.16.6" + +[[package]] +name = "pyasn1" +version = "0.5.0" +description = "Pure-Python implementation of ASN.1 types and DER/BER/CER codecs (X.208)" +optional = false +python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,>=2.7" +files = [ {file = "pyasn1-0.5.0-py2.py3-none-any.whl", hash = "sha256:87a2121042a1ac9358cabcaf1d07680ff97ee6404333bacca15f76aa8ad01a57"}, {file = "pyasn1-0.5.0.tar.gz", hash = "sha256:97b7290ca68e62a832558ec3976f15cbf911bf5d7c7039d8b861c2a0ece69fde"}, ] -pyasn1-modules = [ + +[[package]] +name = "pyasn1-modules" +version = "0.3.0" +description = "A collection of ASN.1-based protocols modules" +optional = false +python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,>=2.7" +files = [ {file = "pyasn1_modules-0.3.0-py2.py3-none-any.whl", hash = "sha256:d3ccd6ed470d9ffbc716be08bd90efbd44d0734bc9303818f7336070984a162d"}, {file = "pyasn1_modules-0.3.0.tar.gz", hash = "sha256:5bd01446b736eb9d31512a30d46c1ac3395d676c6f3cafa4c03eb54b9925631c"}, ] -pyathena = [ + +[package.dependencies] +pyasn1 = ">=0.4.6,<0.6.0" + +[[package]] +name = "pyathena" +version = "3.0.6" +description = "Python DB API 2.0 (PEP 249) client for Amazon Athena" +optional = true +python-versions = ">=3.8.1" +files = [ {file = "pyathena-3.0.6-py3-none-any.whl", hash = "sha256:27fb606a73644e62be8ef9b86cdf583ab3cb9f8cac9c2ad8f05b7ad6d4eaaa87"}, {file = "pyathena-3.0.6.tar.gz", hash = "sha256:ee6ea175134894209af2c6be1859b7be4371f7741faa7a58f9f97905ff6a73a4"}, ] -pycodestyle = [ + +[package.dependencies] +boto3 = ">=1.26.4" +botocore = ">=1.29.4" +fsspec = "*" +tenacity = ">=4.1.0" + +[package.extras] +arrow = ["pyarrow (>=7.0.0)"] +fastparquet = ["fastparquet (>=0.4.0)"] +pandas = ["pandas (>=1.3.0)"] +sqlalchemy = ["sqlalchemy (>=1.0.0)"] + +[[package]] +name = "pycodestyle" +version = "2.9.1" +description = "Python style guide checker" +optional = false +python-versions = ">=3.6" +files = [ {file = "pycodestyle-2.9.1-py2.py3-none-any.whl", hash = "sha256:d1735fc58b418fd7c5f658d28d943854f8a849b01a5d0a1e6f3f3fdd0166804b"}, {file = "pycodestyle-2.9.1.tar.gz", hash = "sha256:2c9607871d58c76354b697b42f5d57e1ada7d261c261efac224b664affdc5785"}, ] -pycparser = [ + +[[package]] +name = "pycparser" +version = "2.21" +description = "C parser in Python" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" +files = [ {file = "pycparser-2.21-py2.py3-none-any.whl", hash = "sha256:8ee45429555515e1f6b185e78100aea234072576aa43ab53aefcae078162fca9"}, {file = "pycparser-2.21.tar.gz", hash = "sha256:e644fdec12f7872f86c58ff790da456218b10f863970249516d60a5eaca77206"}, ] -pydantic = [ + +[[package]] +name = "pydantic" +version = "2.5.0" +description = "Data validation using Python type hints" +optional = false +python-versions = ">=3.7" +files = [ {file = "pydantic-2.5.0-py3-none-any.whl", hash = "sha256:7ce6e766c456ad026fe5712f7bcf036efc34bd5d107b3e669ef7ea01b3a9050c"}, {file = "pydantic-2.5.0.tar.gz", hash = "sha256:69bd6fb62d2d04b7055f59a396993486a2ee586c43a0b89231ce0000de07627c"}, ] -pydantic-core = [ + +[package.dependencies] +annotated-types = ">=0.4.0" +pydantic-core = "2.14.1" +typing-extensions = ">=4.6.1" + +[package.extras] +email = ["email-validator (>=2.0.0)"] + +[[package]] +name = "pydantic-core" +version = "2.14.1" +description = "" +optional = false +python-versions = ">=3.7" +files = [ {file = "pydantic_core-2.14.1-cp310-cp310-macosx_10_7_x86_64.whl", hash = "sha256:812beca1dcb2b722cccc7e9c620bd972cbc323321194ec2725eab3222e6ac573"}, {file = "pydantic_core-2.14.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:a2ccdc53cb88e51c7d47d74c59630d7be844428f6b8d463055ffad6f0392d8da"}, {file = "pydantic_core-2.14.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fd937733bf2fe7d6a8bf208c12741f1f730b7bf5636033877767a75093c29b8a"}, @@ -7748,23 +6461,89 @@ pydantic-core = [ {file = "pydantic_core-2.14.1-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:d983222223f63e323a5f497f5b85e211557a5d8fb670dc88f343784502b466ba"}, {file = "pydantic_core-2.14.1.tar.gz", hash = "sha256:0d82a6ee815388a362885186e431fac84c7a06623bc136f508e9f88261d8cadb"}, ] -pydoc-markdown = [ + +[package.dependencies] +typing-extensions = ">=4.6.0,<4.7.0 || >4.7.0" + +[[package]] +name = "pydoc-markdown" +version = "4.8.2" +description = "Create Python API documentation in Markdown format." +optional = false +python-versions = ">=3.7,<4.0" +files = [ {file = "pydoc_markdown-4.8.2-py3-none-any.whl", hash = "sha256:203f74119e6bb2f9deba43d452422de7c8ec31955b61e0620fa4dd8c2611715f"}, {file = "pydoc_markdown-4.8.2.tar.gz", hash = "sha256:fb6c927e31386de17472d42f9bd3d3be2905977d026f6216881c65145aa67f0b"}, ] -pyflakes = [ + +[package.dependencies] +click = ">=7.1,<9.0" +"databind.core" = ">=4.4.0,<5.0.0" +"databind.json" = ">=4.4.0,<5.0.0" +docspec = ">=2.2.1,<3.0.0" +docspec-python = ">=2.2.1,<3.0.0" +docstring-parser = ">=0.11,<0.12" +jinja2 = ">=3.0.0,<4.0.0" +"nr.util" = ">=0.7.5,<1.0.0" +PyYAML = ">=5.0,<7.0" +requests = ">=2.23.0,<3.0.0" +tomli = ">=2.0.0,<3.0.0" +tomli_w = ">=1.0.0,<2.0.0" +watchdog = "*" +yapf = ">=0.30.0" + +[[package]] +name = "pyflakes" +version = "2.5.0" +description = "passive checker of Python programs" +optional = false +python-versions = ">=3.6" +files = [ {file = "pyflakes-2.5.0-py2.py3-none-any.whl", hash = "sha256:4579f67d887f804e67edb544428f264b7b24f435b263c4614f384135cea553d2"}, {file = "pyflakes-2.5.0.tar.gz", hash = "sha256:491feb020dca48ccc562a8c0cbe8df07ee13078df59813b83959cbdada312ea3"}, ] -pygments = [ + +[[package]] +name = "pygments" +version = "2.16.1" +description = "Pygments is a syntax highlighting package written in Python." +optional = false +python-versions = ">=3.7" +files = [ {file = "Pygments-2.16.1-py3-none-any.whl", hash = "sha256:13fc09fa63bc8d8671a6d247e1eb303c4b343eaee81d861f3404db2935653692"}, {file = "Pygments-2.16.1.tar.gz", hash = "sha256:1daff0494820c69bc8941e407aa20f577374ee88364ee10a98fdbe0aece96e29"}, ] -pyjwt = [ + +[package.extras] +plugins = ["importlib-metadata"] + +[[package]] +name = "pyjwt" +version = "2.8.0" +description = "JSON Web Token implementation in Python" +optional = false +python-versions = ">=3.7" +files = [ {file = "PyJWT-2.8.0-py3-none-any.whl", hash = "sha256:59127c392cc44c2da5bb3192169a91f429924e17aff6534d70fdc02ab3e04320"}, {file = "PyJWT-2.8.0.tar.gz", hash = "sha256:57e28d156e3d5c10088e0c68abb90bfac3df82b40a71bd0daa20c65ccd5c23de"}, ] -pymongo = [ + +[package.dependencies] +cryptography = {version = ">=3.4.0", optional = true, markers = "extra == \"crypto\""} + +[package.extras] +crypto = ["cryptography (>=3.4.0)"] +dev = ["coverage[toml] (==5.0.4)", "cryptography (>=3.4.0)", "pre-commit", "pytest (>=6.0.0,<7.0.0)", "sphinx (>=4.5.0,<5.0.0)", "sphinx-rtd-theme", "zope.interface"] +docs = ["sphinx (>=4.5.0,<5.0.0)", "sphinx-rtd-theme", "zope.interface"] +tests = ["coverage[toml] (==5.0.4)", "pytest (>=6.0.0,<7.0.0)"] + +[[package]] +name = "pymongo" +version = "4.6.0" +description = "Python driver for MongoDB " +optional = false +python-versions = ">=3.7" +files = [ {file = "pymongo-4.6.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:c011bd5ad03cc096f99ffcfdd18a1817354132c1331bed7a837a25226659845f"}, {file = "pymongo-4.6.0-cp310-cp310-manylinux1_i686.whl", hash = "sha256:5e63146dbdb1eac207464f6e0cfcdb640c9c5ff0f57b754fa96fe252314a1dc6"}, {file = "pymongo-4.6.0-cp310-cp310-manylinux2014_aarch64.whl", hash = "sha256:2972dd1f1285866aba027eff2f4a2bbf8aa98563c2ced14cb34ee5602b36afdf"}, @@ -7795,6 +6574,7 @@ pymongo = [ {file = "pymongo-4.6.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:8ab6bcc8e424e07c1d4ba6df96f7fb963bcb48f590b9456de9ebd03b88084fe8"}, {file = "pymongo-4.6.0-cp312-cp312-win32.whl", hash = "sha256:47aa128be2e66abd9d1a9b0437c62499d812d291f17b55185cb4aa33a5f710a4"}, {file = "pymongo-4.6.0-cp312-cp312-win_amd64.whl", hash = "sha256:014e7049dd019a6663747ca7dae328943e14f7261f7c1381045dfc26a04fa330"}, + {file = "pymongo-4.6.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:e24025625bad66895b1bc3ae1647f48f0a92dd014108fb1be404c77f0b69ca67"}, {file = "pymongo-4.6.0-cp37-cp37m-manylinux1_i686.whl", hash = "sha256:288c21ab9531b037f7efa4e467b33176bc73a0c27223c141b822ab4a0e66ff2a"}, {file = "pymongo-4.6.0-cp37-cp37m-manylinux1_x86_64.whl", hash = "sha256:747c84f4e690fbe6999c90ac97246c95d31460d890510e4a3fa61b7d2b87aa34"}, {file = "pymongo-4.6.0-cp37-cp37m-manylinux2014_aarch64.whl", hash = "sha256:055f5c266e2767a88bb585d01137d9c7f778b0195d3dbf4a487ef0638be9b651"}, @@ -7847,11 +6627,41 @@ pymongo = [ {file = "pymongo-4.6.0-cp39-cp39-win_amd64.whl", hash = "sha256:107a234dc55affc5802acb3b6d83cbb8c87355b38a9457fcd8806bdeb8bce161"}, {file = "pymongo-4.6.0.tar.gz", hash = "sha256:fb1c56d891f9e34303c451998ef62ba52659648bb0d75b03c5e4ac223a3342c2"}, ] -pymysql = [ + +[package.dependencies] +dnspython = ">=1.16.0,<3.0.0" + +[package.extras] +aws = ["pymongo-auth-aws (<2.0.0)"] +encryption = ["certifi", "pymongo[aws]", "pymongocrypt (>=1.6.0,<2.0.0)"] +gssapi = ["pykerberos", "winkerberos (>=0.5.0)"] +ocsp = ["certifi", "cryptography (>=2.5)", "pyopenssl (>=17.2.0)", "requests (<3.0.0)", "service-identity (>=18.1.0)"] +snappy = ["python-snappy"] +test = ["pytest (>=7)"] +zstd = ["zstandard"] + +[[package]] +name = "pymysql" +version = "1.1.0" +description = "Pure Python MySQL Driver" +optional = false +python-versions = ">=3.7" +files = [ {file = "PyMySQL-1.1.0-py3-none-any.whl", hash = "sha256:8969ec6d763c856f7073c4c64662882675702efcb114b4bcbb955aea3a069fa7"}, {file = "PyMySQL-1.1.0.tar.gz", hash = "sha256:4f13a7df8bf36a51e81dd9f3605fede45a4878fe02f9236349fd82a3f0612f96"}, ] -pyodbc = [ + +[package.extras] +ed25519 = ["PyNaCl (>=1.4.0)"] +rsa = ["cryptography"] + +[[package]] +name = "pyodbc" +version = "4.0.39" +description = "DB API Module for ODBC" +optional = true +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*" +files = [ {file = "pyodbc-4.0.39-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:74af348dbaee4885998858daf50c8964e767629ecf6c195868b016367b0bb861"}, {file = "pyodbc-4.0.39-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:0f5901b57eaef0761f4cf02bca8e7c63f589fd0fd723a79f6ccf1ea1275372e5"}, {file = "pyodbc-4.0.39-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e0db69478d00fcd8d0b9bdde8aca0b0eada341fd6ed8c2da84b594b928c84106"}, @@ -7888,74 +6698,267 @@ pyodbc = [ {file = "pyodbc-4.0.39-cp39-cp39-win_amd64.whl", hash = "sha256:305c7d6337e2d4c8350677cc641b343fc0197b7b9bc167815c66b64545c67a53"}, {file = "pyodbc-4.0.39.tar.gz", hash = "sha256:e528bb70dd6d6299ee429868925df0866e3e919c772b9eff79c8e17920d8f116"}, ] -pyopenssl = [ + +[[package]] +name = "pyopenssl" +version = "23.2.0" +description = "Python wrapper module around the OpenSSL library" +optional = true +python-versions = ">=3.6" +files = [ {file = "pyOpenSSL-23.2.0-py3-none-any.whl", hash = "sha256:24f0dc5227396b3e831f4c7f602b950a5e9833d292c8e4a2e06b709292806ae2"}, {file = "pyOpenSSL-23.2.0.tar.gz", hash = "sha256:276f931f55a452e7dea69c7173e984eb2a4407ce413c918aa34b55f82f9b8bac"}, ] -pyparsing = [ + +[package.dependencies] +cryptography = ">=38.0.0,<40.0.0 || >40.0.0,<40.0.1 || >40.0.1,<42" + +[package.extras] +docs = ["sphinx (!=5.2.0,!=5.2.0.post0)", "sphinx-rtd-theme"] +test = ["flaky", "pretend", "pytest (>=3.0.1)"] + +[[package]] +name = "pyparsing" +version = "3.1.1" +description = "pyparsing module - Classes and methods to define and execute parsing grammars" +optional = false +python-versions = ">=3.6.8" +files = [ {file = "pyparsing-3.1.1-py3-none-any.whl", hash = "sha256:32c7c0b711493c72ff18a981d24f28aaf9c1fb7ed5e9667c9e84e3db623bdbfb"}, {file = "pyparsing-3.1.1.tar.gz", hash = "sha256:ede28a1a32462f5a9705e07aea48001a08f7cf81a021585011deba701581a0db"}, ] -pypdf2 = [ + +[package.extras] +diagrams = ["jinja2", "railroad-diagrams"] + +[[package]] +name = "pypdf2" +version = "3.0.1" +description = "A pure-python PDF library capable of splitting, merging, cropping, and transforming PDF files" +optional = false +python-versions = ">=3.6" +files = [ {file = "PyPDF2-3.0.1.tar.gz", hash = "sha256:a74408f69ba6271f71b9352ef4ed03dc53a31aa404d29b5d31f53bfecfee1440"}, {file = "pypdf2-3.0.1-py3-none-any.whl", hash = "sha256:d16e4205cfee272fbdc0568b68d82be796540b1537508cef59388f839c191928"}, ] -pyreadline3 = [ + +[package.dependencies] +typing_extensions = {version = ">=3.10.0.0", markers = "python_version < \"3.10\""} + +[package.extras] +crypto = ["PyCryptodome"] +dev = ["black", "flit", "pip-tools", "pre-commit (<2.18.0)", "pytest-cov", "wheel"] +docs = ["myst_parser", "sphinx", "sphinx_rtd_theme"] +full = ["Pillow", "PyCryptodome"] +image = ["Pillow"] + +[[package]] +name = "pyreadline3" +version = "3.4.1" +description = "A python implementation of GNU readline." +optional = true +python-versions = "*" +files = [ {file = "pyreadline3-3.4.1-py3-none-any.whl", hash = "sha256:b0efb6516fd4fb07b45949053826a62fa4cb353db5be2bbb4a7aa1fdd1e345fb"}, {file = "pyreadline3-3.4.1.tar.gz", hash = "sha256:6f3d1f7b8a31ba32b73917cefc1f28cc660562f39aea8646d30bd6eff21f7bae"}, ] -pytest = [ + +[[package]] +name = "pytest" +version = "7.4.4" +description = "pytest: simple powerful testing with Python" +optional = false +python-versions = ">=3.7" +files = [ {file = "pytest-7.4.4-py3-none-any.whl", hash = "sha256:b090cdf5ed60bf4c45261be03239c2c1c22df034fbffe691abe93cd80cea01d8"}, {file = "pytest-7.4.4.tar.gz", hash = "sha256:2cf0005922c6ace4a3e2ec8b4080eb0d9753fdc93107415332f50ce9e7994280"}, ] -pytest-asyncio = [ + +[package.dependencies] +colorama = {version = "*", markers = "sys_platform == \"win32\""} +exceptiongroup = {version = ">=1.0.0rc8", markers = "python_version < \"3.11\""} +iniconfig = "*" +packaging = "*" +pluggy = ">=0.12,<2.0" +tomli = {version = ">=1.0.0", markers = "python_version < \"3.11\""} + +[package.extras] +testing = ["argcomplete", "attrs (>=19.2.0)", "hypothesis (>=3.56)", "mock", "nose", "pygments (>=2.7.2)", "requests", "setuptools", "xmlschema"] + +[[package]] +name = "pytest-asyncio" +version = "0.23.5" +description = "Pytest support for asyncio" +optional = false +python-versions = ">=3.8" +files = [ {file = "pytest-asyncio-0.23.5.tar.gz", hash = "sha256:3a048872a9c4ba14c3e90cc1aa20cbc2def7d01c7c8db3777ec281ba9c057675"}, {file = "pytest_asyncio-0.23.5-py3-none-any.whl", hash = "sha256:4e7093259ba018d58ede7d5315131d21923a60f8a6e9ee266ce1589685c89eac"}, ] -pytest-cases = [ + +[package.dependencies] +pytest = ">=7.0.0,<9" + +[package.extras] +docs = ["sphinx (>=5.3)", "sphinx-rtd-theme (>=1.0)"] +testing = ["coverage (>=6.2)", "hypothesis (>=5.7.1)"] + +[[package]] +name = "pytest-cases" +version = "3.6.14" +description = "Separate test code from test cases in pytest." +optional = false +python-versions = "*" +files = [ {file = "pytest-cases-3.6.14.tar.gz", hash = "sha256:7455e6ca57a544c1bfdd8b56ace08c1c1ce4c6572a8aab8f1bd351dc25a10b6b"}, {file = "pytest_cases-3.6.14-py2.py3-none-any.whl", hash = "sha256:a087f3d019efd8942d0f0dc3fb526bedf9f83d742c40289e9623f6788aff7257"}, ] -pytest-console-scripts = [ + +[package.dependencies] +decopatch = "*" +makefun = ">=1.9.5" + +[[package]] +name = "pytest-console-scripts" +version = "1.4.1" +description = "Pytest plugin for testing console scripts" +optional = false +python-versions = ">=3.8" +files = [ {file = "pytest-console-scripts-1.4.1.tar.gz", hash = "sha256:5a826ed84cc0afa202eb9e44381d7d762f7bdda8e0c23f9f79a7f1f44cf4a895"}, {file = "pytest_console_scripts-1.4.1-py3-none-any.whl", hash = "sha256:ad860a951a90eca4bd3bd1159b8f5428633ba4ea01abd5c9526b67a95f65437a"}, ] -pytest-forked = [ + +[package.dependencies] +importlib-metadata = {version = ">=3.6", markers = "python_version < \"3.10\""} +pytest = ">=4.0.0" + +[[package]] +name = "pytest-forked" +version = "1.6.0" +description = "run tests in isolated forked subprocesses" +optional = false +python-versions = ">=3.7" +files = [ {file = "pytest-forked-1.6.0.tar.gz", hash = "sha256:4dafd46a9a600f65d822b8f605133ecf5b3e1941ebb3588e943b4e3eb71a5a3f"}, {file = "pytest_forked-1.6.0-py3-none-any.whl", hash = "sha256:810958f66a91afb1a1e2ae83089d8dc1cd2437ac96b12963042fbb9fb4d16af0"}, ] -pytest-order = [ + +[package.dependencies] +py = "*" +pytest = ">=3.10" + +[[package]] +name = "pytest-order" +version = "1.1.0" +description = "pytest plugin to run your tests in a specific order" +optional = false +python-versions = ">=3.6" +files = [ {file = "pytest-order-1.1.0.tar.gz", hash = "sha256:139d25b30826b78eebb42722f747eab14c44b88059d7a71d4f79d14a057269a5"}, {file = "pytest_order-1.1.0-py3-none-any.whl", hash = "sha256:3b3730969c97900fa5cd31ecff80847680ed56b2490954565c14949ba60d9371"}, ] -python-daemon = [ + +[package.dependencies] +pytest = [ + {version = ">=5.0", markers = "python_version < \"3.10\""}, + {version = ">=6.2.4", markers = "python_version >= \"3.10\""}, +] + +[[package]] +name = "python-daemon" +version = "3.0.1" +description = "Library to implement a well-behaved Unix daemon process." +optional = false +python-versions = ">=3" +files = [ {file = "python-daemon-3.0.1.tar.gz", hash = "sha256:6c57452372f7eaff40934a1c03ad1826bf5e793558e87fef49131e6464b4dae5"}, {file = "python_daemon-3.0.1-py3-none-any.whl", hash = "sha256:42bb848a3260a027fa71ad47ecd959e471327cb34da5965962edd5926229f341"}, ] -python-dateutil = [ + +[package.dependencies] +docutils = "*" +lockfile = ">=0.10" +setuptools = ">=62.4.0" + +[package.extras] +devel = ["coverage", "docutils", "isort", "testscenarios (>=0.4)", "testtools", "twine"] +test = ["coverage", "docutils", "testscenarios (>=0.4)", "testtools"] + +[[package]] +name = "python-dateutil" +version = "2.8.2" +description = "Extensions to the standard Python datetime module" +optional = false +python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7" +files = [ {file = "python-dateutil-2.8.2.tar.gz", hash = "sha256:0123cacc1627ae19ddf3c27a5de5bd67ee4586fbdd6440d9748f8abb483d3e86"}, {file = "python_dateutil-2.8.2-py2.py3-none-any.whl", hash = "sha256:961d03dc3453ebbc59dbdea9e4e11c5651520a876d0f4db161e8674aae935da9"}, ] -python-nvd3 = [ + +[package.dependencies] +six = ">=1.5" + +[[package]] +name = "python-nvd3" +version = "0.15.0" +description = "Python NVD3 - Chart Library for d3.js" +optional = false +python-versions = "*" +files = [ {file = "python-nvd3-0.15.0.tar.gz", hash = "sha256:fbd75ff47e0ef255b4aa4f3a8b10dc8b4024aa5a9a7abed5b2406bd3cb817715"}, ] -python-slugify = [ + +[package.dependencies] +Jinja2 = ">=2.8" +python-slugify = ">=1.2.5" + +[[package]] +name = "python-slugify" +version = "8.0.1" +description = "A Python slugify application that also handles Unicode" +optional = false +python-versions = ">=3.7" +files = [ {file = "python-slugify-8.0.1.tar.gz", hash = "sha256:ce0d46ddb668b3be82f4ed5e503dbc33dd815d83e2eb6824211310d3fb172a27"}, {file = "python_slugify-8.0.1-py2.py3-none-any.whl", hash = "sha256:70ca6ea68fe63ecc8fa4fcf00ae651fc8a5d02d93dcd12ae6d4fc7ca46c4d395"}, ] -pytimeparse = [ + +[package.dependencies] +text-unidecode = ">=1.3" + +[package.extras] +unidecode = ["Unidecode (>=1.1.1)"] + +[[package]] +name = "pytimeparse" +version = "1.1.8" +description = "Time expression parser" +optional = false +python-versions = "*" +files = [ {file = "pytimeparse-1.1.8-py2.py3-none-any.whl", hash = "sha256:04b7be6cc8bd9f5647a6325444926c3ac34ee6bc7e69da4367ba282f076036bd"}, {file = "pytimeparse-1.1.8.tar.gz", hash = "sha256:e86136477be924d7e670646a98561957e8ca7308d44841e21f5ddea757556a0a"}, ] -pytz = [ + +[[package]] +name = "pytz" +version = "2023.3" +description = "World timezone definitions, modern and historical" +optional = false +python-versions = "*" +files = [ {file = "pytz-2023.3-py2.py3-none-any.whl", hash = "sha256:a151b3abb88eda1d4e34a9814df37de2a80e301e68ba0fd856fb9b46bfbbbffb"}, {file = "pytz-2023.3.tar.gz", hash = "sha256:1d8ce29db189191fb55338ee6d0387d82ab59f3d00eac103412d64e0ebd0c588"}, ] -pytzdata = [ - {file = "pytzdata-2020.1-py2.py3-none-any.whl", hash = "sha256:e1e14750bcf95016381e4d472bad004eef710f2d6417240904070b3d6654485f"}, - {file = "pytzdata-2020.1.tar.gz", hash = "sha256:3efa13b335a00a8de1d345ae41ec78dd11c9f8807f522d39850f2dd828681540"}, -] -pywin32 = [ + +[[package]] +name = "pywin32" +version = "306" +description = "Python for Window Extensions" +optional = true +python-versions = "*" +files = [ {file = "pywin32-306-cp310-cp310-win32.whl", hash = "sha256:06d3420a5155ba65f0b72f2699b5bacf3109f36acbe8923765c22938a69dfc8d"}, {file = "pywin32-306-cp310-cp310-win_amd64.whl", hash = "sha256:84f4471dbca1887ea3803d8848a1616429ac94a4a8d05f4bc9c5dcfd42ca99c8"}, {file = "pywin32-306-cp311-cp311-win32.whl", hash = "sha256:e65028133d15b64d2ed8f06dd9fbc268352478d4f9289e69c190ecd6818b6407"}, @@ -7971,16 +6974,31 @@ pywin32 = [ {file = "pywin32-306-cp39-cp39-win32.whl", hash = "sha256:e25fd5b485b55ac9c057f67d94bc203f3f6595078d1fb3b458c9c28b7153a802"}, {file = "pywin32-306-cp39-cp39-win_amd64.whl", hash = "sha256:39b61c15272833b5c329a2989999dcae836b1eed650252ab1b7bfbe1d59f30f4"}, ] -pywin32-ctypes = [ + +[[package]] +name = "pywin32-ctypes" +version = "0.2.2" +description = "A (partial) reimplementation of pywin32 using ctypes/cffi" +optional = true +python-versions = ">=3.6" +files = [ {file = "pywin32-ctypes-0.2.2.tar.gz", hash = "sha256:3426e063bdd5fd4df74a14fa3cf80a0b42845a87e1d1e81f6549f9daec593a60"}, {file = "pywin32_ctypes-0.2.2-py3-none-any.whl", hash = "sha256:bf490a1a709baf35d688fe0ecf980ed4de11d2b3e37b51e5442587a75d9957e7"}, ] -pyyaml = [ + +[[package]] +name = "pyyaml" +version = "6.0.1" +description = "YAML parser and emitter for Python" +optional = false +python-versions = ">=3.6" +files = [ {file = "PyYAML-6.0.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:d858aa552c999bc8a8d57426ed01e40bef403cd8ccdd0fc5f6f04a00414cac2a"}, {file = "PyYAML-6.0.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:fd66fc5d0da6d9815ba2cebeb4205f95818ff4b79c3ebe268e75d961704af52f"}, {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:69b023b2b4daa7548bcfbd4aa3da05b3a74b772db9e23b982788168117739938"}, {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:81e0b275a9ecc9c0c0c07b4b90ba548307583c125f54d5b6946cfee6360c733d"}, {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ba336e390cd8e4d1739f42dfe9bb83a3cc2e80f567d8805e11b46f4a943f5515"}, + {file = "PyYAML-6.0.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:326c013efe8048858a6d312ddd31d56e468118ad4cdeda36c719bf5bb6192290"}, {file = "PyYAML-6.0.1-cp310-cp310-win32.whl", hash = "sha256:bd4af7373a854424dabd882decdc5579653d7868b8fb26dc7d0e99f823aa5924"}, {file = "PyYAML-6.0.1-cp310-cp310-win_amd64.whl", hash = "sha256:fd1592b3fdf65fff2ad0004b5e363300ef59ced41c2e6b3a99d4089fa8c5435d"}, {file = "PyYAML-6.0.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:6965a7bc3cf88e5a1c3bd2e0b5c22f8d677dc88a455344035f03399034eb3007"}, @@ -7988,8 +7006,16 @@ pyyaml = [ {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:42f8152b8dbc4fe7d96729ec2b99c7097d656dc1213a3229ca5383f973a5ed6d"}, {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:062582fca9fabdd2c8b54a3ef1c978d786e0f6b3a1510e0ac93ef59e0ddae2bc"}, {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d2b04aac4d386b172d5b9692e2d2da8de7bfb6c387fa4f801fbf6fb2e6ba4673"}, + {file = "PyYAML-6.0.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:e7d73685e87afe9f3b36c799222440d6cf362062f78be1013661b00c5c6f678b"}, {file = "PyYAML-6.0.1-cp311-cp311-win32.whl", hash = "sha256:1635fd110e8d85d55237ab316b5b011de701ea0f29d07611174a1b42f1444741"}, {file = "PyYAML-6.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:bf07ee2fef7014951eeb99f56f39c9bb4af143d8aa3c21b1677805985307da34"}, + {file = "PyYAML-6.0.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:855fb52b0dc35af121542a76b9a84f8d1cd886ea97c84703eaa6d88e37a2ad28"}, + {file = "PyYAML-6.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:40df9b996c2b73138957fe23a16a4f0ba614f4c0efce1e9406a184b6d07fa3a9"}, + {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a08c6f0fe150303c1c6b71ebcd7213c2858041a7e01975da3a99aed1e7a378ef"}, + {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c22bec3fbe2524cde73d7ada88f6566758a8f7227bfbf93a408a9d86bcc12a0"}, + {file = "PyYAML-6.0.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8d4e9c88387b0f5c7d5f281e55304de64cf7f9c0021a3525bd3b1c542da3b0e4"}, + {file = "PyYAML-6.0.1-cp312-cp312-win32.whl", hash = "sha256:d483d2cdf104e7c9fa60c544d92981f12ad66a457afae824d146093b8c294c54"}, + {file = "PyYAML-6.0.1-cp312-cp312-win_amd64.whl", hash = "sha256:0d3304d8c0adc42be59c5f8a4d9e3d7379e6955ad754aa9d6ab7a398b59dd1df"}, {file = "PyYAML-6.0.1-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:50550eb667afee136e9a77d6dc71ae76a44df8b3e51e41b77f6de2932bfe0f47"}, {file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1fe35611261b29bd1de0070f0b2f47cb6ff71fa6595c077e42bd0c419fa27b98"}, {file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:704219a11b772aea0d8ecd7058d0082713c3562b4e271b849ad7dc4a5c90c13c"}, @@ -8006,6 +7032,7 @@ pyyaml = [ {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a0cd17c15d3bb3fa06978b4e8958dcdc6e0174ccea823003a106c7d4d7899ac5"}, {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:28c119d996beec18c05208a8bd78cbe4007878c6dd15091efb73a30e90539696"}, {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7e07cbde391ba96ab58e532ff4803f79c4129397514e1413a7dc761ccd755735"}, + {file = "PyYAML-6.0.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:49a183be227561de579b4a36efbb21b3eab9651dd81b1858589f796549873dd6"}, {file = "PyYAML-6.0.1-cp38-cp38-win32.whl", hash = "sha256:184c5108a2aca3c5b3d3bf9395d50893a7ab82a38004c8f61c258d4428e80206"}, {file = "PyYAML-6.0.1-cp38-cp38-win_amd64.whl", hash = "sha256:1e2722cc9fbb45d9b87631ac70924c11d3a401b2d7f410cc0e3bbf249f2dca62"}, {file = "PyYAML-6.0.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:9eb6caa9a297fc2c2fb8862bc5370d0303ddba53ba97e71f08023b6cd73d16a8"}, @@ -8013,144 +7040,320 @@ pyyaml = [ {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5773183b6446b2c99bb77e77595dd486303b4faab2b086e7b17bc6bef28865f6"}, {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b786eecbdf8499b9ca1d697215862083bd6d2a99965554781d0d8d1ad31e13a0"}, {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bc1bf2925a1ecd43da378f4db9e4f799775d6367bdb94671027b73b393a7c42c"}, + {file = "PyYAML-6.0.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:04ac92ad1925b2cff1db0cfebffb6ffc43457495c9b3c39d3fcae417d7125dc5"}, {file = "PyYAML-6.0.1-cp39-cp39-win32.whl", hash = "sha256:faca3bdcf85b2fc05d06ff3fbc1f83e1391b3e724afa3feba7d13eeab355484c"}, {file = "PyYAML-6.0.1-cp39-cp39-win_amd64.whl", hash = "sha256:510c9deebc5c0225e8c96813043e62b680ba2f9c50a08d3724c7f28a747d1486"}, {file = "PyYAML-6.0.1.tar.gz", hash = "sha256:bfdf460b1736c775f2ba9f6a92bca30bc2095067b8a9d77876d1fad6cc3b4a43"}, ] -qdrant-client = [ + +[[package]] +name = "qdrant-client" +version = "1.6.4" +description = "Client library for the Qdrant vector search engine" +optional = true +python-versions = ">=3.8,<3.13" +files = [ {file = "qdrant_client-1.6.4-py3-none-any.whl", hash = "sha256:db4696978d6a62d78ff60f70b912383f1e467bda3053f732b01ddb5f93281b10"}, {file = "qdrant_client-1.6.4.tar.gz", hash = "sha256:bbd65f383b6a55a9ccf4e301250fa925179340dd90cfde9b93ce4230fd68867b"}, ] -redshift-connector = [ + +[package.dependencies] +fastembed = {version = "0.1.1", optional = true, markers = "python_version < \"3.12\" and extra == \"fastembed\""} +grpcio = ">=1.41.0" +grpcio-tools = ">=1.41.0" +httpx = {version = ">=0.14.0", extras = ["http2"]} +numpy = [ + {version = ">=1.21", markers = "python_version >= \"3.8\" and python_version < \"3.12\""}, + {version = ">=1.26", markers = "python_version >= \"3.12\""}, +] +portalocker = ">=2.7.0,<3.0.0" +pydantic = ">=1.10.8" +urllib3 = ">=1.26.14,<2.0.0" + +[package.extras] +fastembed = ["fastembed (==0.1.1)"] + +[[package]] +name = "redshift-connector" +version = "2.0.915" +description = "Redshift interface library" +optional = true +python-versions = ">=3.6" +files = [ {file = "redshift_connector-2.0.915-py3-none-any.whl", hash = "sha256:d02e8d6fa01dd46504c879953f6abd7fa72980edd1e6a80202448fe35fb4c9e4"}, ] -referencing = [ + +[package.dependencies] +beautifulsoup4 = ">=4.7.0,<5.0.0" +boto3 = ">=1.9.201,<2.0.0" +botocore = ">=1.12.201,<2.0.0" +lxml = ">=4.6.5" +packaging = "*" +pytz = ">=2020.1" +requests = ">=2.23.0,<3.0.0" +scramp = ">=1.2.0,<1.5.0" +setuptools = "*" + +[package.extras] +full = ["numpy", "pandas"] + +[[package]] +name = "referencing" +version = "0.30.2" +description = "JSON Referencing + Python" +optional = false +python-versions = ">=3.8" +files = [ {file = "referencing-0.30.2-py3-none-any.whl", hash = "sha256:449b6669b6121a9e96a7f9e410b245d471e8d48964c67113ce9afe50c8dd7bdf"}, {file = "referencing-0.30.2.tar.gz", hash = "sha256:794ad8003c65938edcdbc027f1933215e0d0ccc0291e3ce20a4d87432b59efc0"}, ] -regex = [ - {file = "regex-2023.8.8-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:88900f521c645f784260a8d346e12a1590f79e96403971241e64c3a265c8ecdb"}, - {file = "regex-2023.8.8-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:3611576aff55918af2697410ff0293d6071b7e00f4b09e005d614686ac4cd57c"}, - {file = "regex-2023.8.8-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b8a0ccc8f2698f120e9e5742f4b38dc944c38744d4bdfc427616f3a163dd9de5"}, - {file = "regex-2023.8.8-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c662a4cbdd6280ee56f841f14620787215a171c4e2d1744c9528bed8f5816c96"}, - {file = "regex-2023.8.8-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:cf0633e4a1b667bfe0bb10b5e53fe0d5f34a6243ea2530eb342491f1adf4f739"}, - {file = "regex-2023.8.8-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:551ad543fa19e94943c5b2cebc54c73353ffff08228ee5f3376bd27b3d5b9800"}, - {file = "regex-2023.8.8-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:54de2619f5ea58474f2ac211ceea6b615af2d7e4306220d4f3fe690c91988a61"}, - {file = "regex-2023.8.8-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:5ec4b3f0aebbbe2fc0134ee30a791af522a92ad9f164858805a77442d7d18570"}, - {file = "regex-2023.8.8-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:3ae646c35cb9f820491760ac62c25b6d6b496757fda2d51be429e0e7b67ae0ab"}, - {file = "regex-2023.8.8-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:ca339088839582d01654e6f83a637a4b8194d0960477b9769d2ff2cfa0fa36d2"}, - {file = "regex-2023.8.8-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:d9b6627408021452dcd0d2cdf8da0534e19d93d070bfa8b6b4176f99711e7f90"}, - {file = "regex-2023.8.8-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:bd3366aceedf274f765a3a4bc95d6cd97b130d1dda524d8f25225d14123c01db"}, - {file = "regex-2023.8.8-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:7aed90a72fc3654fba9bc4b7f851571dcc368120432ad68b226bd593f3f6c0b7"}, - {file = "regex-2023.8.8-cp310-cp310-win32.whl", hash = "sha256:80b80b889cb767cc47f31d2b2f3dec2db8126fbcd0cff31b3925b4dc6609dcdb"}, - {file = "regex-2023.8.8-cp310-cp310-win_amd64.whl", hash = "sha256:b82edc98d107cbc7357da7a5a695901b47d6eb0420e587256ba3ad24b80b7d0b"}, - {file = "regex-2023.8.8-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:1e7d84d64c84ad97bf06f3c8cb5e48941f135ace28f450d86af6b6512f1c9a71"}, - {file = "regex-2023.8.8-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:ce0f9fbe7d295f9922c0424a3637b88c6c472b75eafeaff6f910494a1fa719ef"}, - {file = "regex-2023.8.8-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:06c57e14ac723b04458df5956cfb7e2d9caa6e9d353c0b4c7d5d54fcb1325c46"}, - {file = "regex-2023.8.8-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e7a9aaa5a1267125eef22cef3b63484c3241aaec6f48949b366d26c7250e0357"}, - {file = "regex-2023.8.8-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9b7408511fca48a82a119d78a77c2f5eb1b22fe88b0d2450ed0756d194fe7a9a"}, - {file = "regex-2023.8.8-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:14dc6f2d88192a67d708341f3085df6a4f5a0c7b03dec08d763ca2cd86e9f559"}, - {file = "regex-2023.8.8-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:48c640b99213643d141550326f34f0502fedb1798adb3c9eb79650b1ecb2f177"}, - {file = "regex-2023.8.8-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:0085da0f6c6393428bf0d9c08d8b1874d805bb55e17cb1dfa5ddb7cfb11140bf"}, - {file = "regex-2023.8.8-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:964b16dcc10c79a4a2be9f1273fcc2684a9eedb3906439720598029a797b46e6"}, - {file = "regex-2023.8.8-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:7ce606c14bb195b0e5108544b540e2c5faed6843367e4ab3deb5c6aa5e681208"}, - {file = "regex-2023.8.8-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:40f029d73b10fac448c73d6eb33d57b34607f40116e9f6e9f0d32e9229b147d7"}, - {file = "regex-2023.8.8-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:3b8e6ea6be6d64104d8e9afc34c151926f8182f84e7ac290a93925c0db004bfd"}, - {file = "regex-2023.8.8-cp311-cp311-win32.whl", hash = "sha256:942f8b1f3b223638b02df7df79140646c03938d488fbfb771824f3d05fc083a8"}, - {file = "regex-2023.8.8-cp311-cp311-win_amd64.whl", hash = "sha256:51d8ea2a3a1a8fe4f67de21b8b93757005213e8ac3917567872f2865185fa7fb"}, - {file = "regex-2023.8.8-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:e951d1a8e9963ea51efd7f150450803e3b95db5939f994ad3d5edac2b6f6e2b4"}, - {file = "regex-2023.8.8-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:704f63b774218207b8ccc6c47fcef5340741e5d839d11d606f70af93ee78e4d4"}, - {file = "regex-2023.8.8-cp36-cp36m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:22283c769a7b01c8ac355d5be0715bf6929b6267619505e289f792b01304d898"}, - {file = "regex-2023.8.8-cp36-cp36m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:91129ff1bb0619bc1f4ad19485718cc623a2dc433dff95baadbf89405c7f6b57"}, - {file = "regex-2023.8.8-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:de35342190deb7b866ad6ba5cbcccb2d22c0487ee0cbb251efef0843d705f0d4"}, - {file = "regex-2023.8.8-cp36-cp36m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b993b6f524d1e274a5062488a43e3f9f8764ee9745ccd8e8193df743dbe5ee61"}, - {file = "regex-2023.8.8-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:3026cbcf11d79095a32d9a13bbc572a458727bd5b1ca332df4a79faecd45281c"}, - {file = "regex-2023.8.8-cp36-cp36m-musllinux_1_1_aarch64.whl", hash = "sha256:293352710172239bf579c90a9864d0df57340b6fd21272345222fb6371bf82b3"}, - {file = "regex-2023.8.8-cp36-cp36m-musllinux_1_1_i686.whl", hash = "sha256:d909b5a3fff619dc7e48b6b1bedc2f30ec43033ba7af32f936c10839e81b9217"}, - {file = "regex-2023.8.8-cp36-cp36m-musllinux_1_1_ppc64le.whl", hash = "sha256:3d370ff652323c5307d9c8e4c62efd1956fb08051b0e9210212bc51168b4ff56"}, - {file = "regex-2023.8.8-cp36-cp36m-musllinux_1_1_s390x.whl", hash = "sha256:b076da1ed19dc37788f6a934c60adf97bd02c7eea461b73730513921a85d4235"}, - {file = "regex-2023.8.8-cp36-cp36m-musllinux_1_1_x86_64.whl", hash = "sha256:e9941a4ada58f6218694f382e43fdd256e97615db9da135e77359da257a7168b"}, - {file = "regex-2023.8.8-cp36-cp36m-win32.whl", hash = "sha256:a8c65c17aed7e15a0c824cdc63a6b104dfc530f6fa8cb6ac51c437af52b481c7"}, - {file = "regex-2023.8.8-cp36-cp36m-win_amd64.whl", hash = "sha256:aadf28046e77a72f30dcc1ab185639e8de7f4104b8cb5c6dfa5d8ed860e57236"}, - {file = "regex-2023.8.8-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:423adfa872b4908843ac3e7a30f957f5d5282944b81ca0a3b8a7ccbbfaa06103"}, - {file = "regex-2023.8.8-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4ae594c66f4a7e1ea67232a0846649a7c94c188d6c071ac0210c3e86a5f92109"}, - {file = "regex-2023.8.8-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e51c80c168074faa793685656c38eb7a06cbad7774c8cbc3ea05552d615393d8"}, - {file = "regex-2023.8.8-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:09b7f4c66aa9d1522b06e31a54f15581c37286237208df1345108fcf4e050c18"}, - {file = "regex-2023.8.8-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2e73e5243af12d9cd6a9d6a45a43570dbe2e5b1cdfc862f5ae2b031e44dd95a8"}, - {file = "regex-2023.8.8-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:941460db8fe3bd613db52f05259c9336f5a47ccae7d7def44cc277184030a116"}, - {file = "regex-2023.8.8-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:f0ccf3e01afeb412a1a9993049cb160d0352dba635bbca7762b2dc722aa5742a"}, - {file = "regex-2023.8.8-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:2e9216e0d2cdce7dbc9be48cb3eacb962740a09b011a116fd7af8c832ab116ca"}, - {file = "regex-2023.8.8-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:5cd9cd7170459b9223c5e592ac036e0704bee765706445c353d96f2890e816c8"}, - {file = "regex-2023.8.8-cp37-cp37m-musllinux_1_1_ppc64le.whl", hash = "sha256:4873ef92e03a4309b3ccd8281454801b291b689f6ad45ef8c3658b6fa761d7ac"}, - {file = "regex-2023.8.8-cp37-cp37m-musllinux_1_1_s390x.whl", hash = "sha256:239c3c2a339d3b3ddd51c2daef10874410917cd2b998f043c13e2084cb191684"}, - {file = "regex-2023.8.8-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:1005c60ed7037be0d9dea1f9c53cc42f836188227366370867222bda4c3c6bd7"}, - {file = "regex-2023.8.8-cp37-cp37m-win32.whl", hash = "sha256:e6bd1e9b95bc5614a7a9c9c44fde9539cba1c823b43a9f7bc11266446dd568e3"}, - {file = "regex-2023.8.8-cp37-cp37m-win_amd64.whl", hash = "sha256:9a96edd79661e93327cfeac4edec72a4046e14550a1d22aa0dd2e3ca52aec921"}, - {file = "regex-2023.8.8-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:f2181c20ef18747d5f4a7ea513e09ea03bdd50884a11ce46066bb90fe4213675"}, - {file = "regex-2023.8.8-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:a2ad5add903eb7cdde2b7c64aaca405f3957ab34f16594d2b78d53b8b1a6a7d6"}, - {file = "regex-2023.8.8-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9233ac249b354c54146e392e8a451e465dd2d967fc773690811d3a8c240ac601"}, - {file = "regex-2023.8.8-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:920974009fb37b20d32afcdf0227a2e707eb83fe418713f7a8b7de038b870d0b"}, - {file = "regex-2023.8.8-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:cd2b6c5dfe0929b6c23dde9624483380b170b6e34ed79054ad131b20203a1a63"}, - {file = "regex-2023.8.8-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:96979d753b1dc3b2169003e1854dc67bfc86edf93c01e84757927f810b8c3c93"}, - {file = "regex-2023.8.8-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2ae54a338191e1356253e7883d9d19f8679b6143703086245fb14d1f20196be9"}, - {file = "regex-2023.8.8-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:2162ae2eb8b079622176a81b65d486ba50b888271302190870b8cc488587d280"}, - {file = "regex-2023.8.8-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:c884d1a59e69e03b93cf0dfee8794c63d7de0ee8f7ffb76e5f75be8131b6400a"}, - {file = "regex-2023.8.8-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:cf9273e96f3ee2ac89ffcb17627a78f78e7516b08f94dc435844ae72576a276e"}, - {file = "regex-2023.8.8-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:83215147121e15d5f3a45d99abeed9cf1fe16869d5c233b08c56cdf75f43a504"}, - {file = "regex-2023.8.8-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:3f7454aa427b8ab9101f3787eb178057c5250478e39b99540cfc2b889c7d0586"}, - {file = "regex-2023.8.8-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:f0640913d2c1044d97e30d7c41728195fc37e54d190c5385eacb52115127b882"}, - {file = "regex-2023.8.8-cp38-cp38-win32.whl", hash = "sha256:0c59122ceccb905a941fb23b087b8eafc5290bf983ebcb14d2301febcbe199c7"}, - {file = "regex-2023.8.8-cp38-cp38-win_amd64.whl", hash = "sha256:c12f6f67495ea05c3d542d119d270007090bad5b843f642d418eb601ec0fa7be"}, - {file = "regex-2023.8.8-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:82cd0a69cd28f6cc3789cc6adeb1027f79526b1ab50b1f6062bbc3a0ccb2dbc3"}, - {file = "regex-2023.8.8-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:bb34d1605f96a245fc39790a117ac1bac8de84ab7691637b26ab2c5efb8f228c"}, - {file = "regex-2023.8.8-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:987b9ac04d0b38ef4f89fbc035e84a7efad9cdd5f1e29024f9289182c8d99e09"}, - {file = "regex-2023.8.8-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9dd6082f4e2aec9b6a0927202c85bc1b09dcab113f97265127c1dc20e2e32495"}, - {file = "regex-2023.8.8-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:7eb95fe8222932c10d4436e7a6f7c99991e3fdd9f36c949eff16a69246dee2dc"}, - {file = "regex-2023.8.8-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7098c524ba9f20717a56a8d551d2ed491ea89cbf37e540759ed3b776a4f8d6eb"}, - {file = "regex-2023.8.8-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4b694430b3f00eb02c594ff5a16db30e054c1b9589a043fe9174584c6efa8033"}, - {file = "regex-2023.8.8-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:b2aeab3895d778155054abea5238d0eb9a72e9242bd4b43f42fd911ef9a13470"}, - {file = "regex-2023.8.8-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:988631b9d78b546e284478c2ec15c8a85960e262e247b35ca5eaf7ee22f6050a"}, - {file = "regex-2023.8.8-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:67ecd894e56a0c6108ec5ab1d8fa8418ec0cff45844a855966b875d1039a2e34"}, - {file = "regex-2023.8.8-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:14898830f0a0eb67cae2bbbc787c1a7d6e34ecc06fbd39d3af5fe29a4468e2c9"}, - {file = "regex-2023.8.8-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:f2200e00b62568cfd920127782c61bc1c546062a879cdc741cfcc6976668dfcf"}, - {file = "regex-2023.8.8-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:9691a549c19c22d26a4f3b948071e93517bdf86e41b81d8c6ac8a964bb71e5a6"}, - {file = "regex-2023.8.8-cp39-cp39-win32.whl", hash = "sha256:6ab2ed84bf0137927846b37e882745a827458689eb969028af8032b1b3dac78e"}, - {file = "regex-2023.8.8-cp39-cp39-win_amd64.whl", hash = "sha256:5543c055d8ec7801901e1193a51570643d6a6ab8751b1f7dd9af71af467538bb"}, - {file = "regex-2023.8.8.tar.gz", hash = "sha256:fcbdc5f2b0f1cd0f6a56cdb46fe41d2cce1e644e3b68832f3eeebc5fb0f7712e"}, -] -requests = [ + +[package.dependencies] +attrs = ">=22.2.0" +rpds-py = ">=0.7.0" + +[[package]] +name = "regex" +version = "2023.12.25" +description = "Alternative regular expression module, to replace re." +optional = false +python-versions = ">=3.7" +files = [ + {file = "regex-2023.12.25-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:0694219a1d54336fd0445ea382d49d36882415c0134ee1e8332afd1529f0baa5"}, + {file = "regex-2023.12.25-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:b014333bd0217ad3d54c143de9d4b9a3ca1c5a29a6d0d554952ea071cff0f1f8"}, + {file = "regex-2023.12.25-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:d865984b3f71f6d0af64d0d88f5733521698f6c16f445bb09ce746c92c97c586"}, + {file = "regex-2023.12.25-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1e0eabac536b4cc7f57a5f3d095bfa557860ab912f25965e08fe1545e2ed8b4c"}, + {file = "regex-2023.12.25-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c25a8ad70e716f96e13a637802813f65d8a6760ef48672aa3502f4c24ea8b400"}, + {file = "regex-2023.12.25-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a9b6d73353f777630626f403b0652055ebfe8ff142a44ec2cf18ae470395766e"}, + {file = "regex-2023.12.25-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a9cc99d6946d750eb75827cb53c4371b8b0fe89c733a94b1573c9dd16ea6c9e4"}, + {file = "regex-2023.12.25-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:88d1f7bef20c721359d8675f7d9f8e414ec5003d8f642fdfd8087777ff7f94b5"}, + {file = "regex-2023.12.25-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:cb3fe77aec8f1995611f966d0c656fdce398317f850d0e6e7aebdfe61f40e1cd"}, + {file = "regex-2023.12.25-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:7aa47c2e9ea33a4a2a05f40fcd3ea36d73853a2aae7b4feab6fc85f8bf2c9704"}, + {file = "regex-2023.12.25-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:df26481f0c7a3f8739fecb3e81bc9da3fcfae34d6c094563b9d4670b047312e1"}, + {file = "regex-2023.12.25-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:c40281f7d70baf6e0db0c2f7472b31609f5bc2748fe7275ea65a0b4601d9b392"}, + {file = "regex-2023.12.25-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:d94a1db462d5690ebf6ae86d11c5e420042b9898af5dcf278bd97d6bda065423"}, + {file = "regex-2023.12.25-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:ba1b30765a55acf15dce3f364e4928b80858fa8f979ad41f862358939bdd1f2f"}, + {file = "regex-2023.12.25-cp310-cp310-win32.whl", hash = "sha256:150c39f5b964e4d7dba46a7962a088fbc91f06e606f023ce57bb347a3b2d4630"}, + {file = "regex-2023.12.25-cp310-cp310-win_amd64.whl", hash = "sha256:09da66917262d9481c719599116c7dc0c321ffcec4b1f510c4f8a066f8768105"}, + {file = "regex-2023.12.25-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:1b9d811f72210fa9306aeb88385b8f8bcef0dfbf3873410413c00aa94c56c2b6"}, + {file = "regex-2023.12.25-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:d902a43085a308cef32c0d3aea962524b725403fd9373dea18110904003bac97"}, + {file = "regex-2023.12.25-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:d166eafc19f4718df38887b2bbe1467a4f74a9830e8605089ea7a30dd4da8887"}, + {file = "regex-2023.12.25-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c7ad32824b7f02bb3c9f80306d405a1d9b7bb89362d68b3c5a9be53836caebdb"}, + {file = "regex-2023.12.25-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:636ba0a77de609d6510235b7f0e77ec494d2657108f777e8765efc060094c98c"}, + {file = "regex-2023.12.25-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:0fda75704357805eb953a3ee15a2b240694a9a514548cd49b3c5124b4e2ad01b"}, + {file = "regex-2023.12.25-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f72cbae7f6b01591f90814250e636065850c5926751af02bb48da94dfced7baa"}, + {file = "regex-2023.12.25-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:db2a0b1857f18b11e3b0e54ddfefc96af46b0896fb678c85f63fb8c37518b3e7"}, + {file = "regex-2023.12.25-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:7502534e55c7c36c0978c91ba6f61703faf7ce733715ca48f499d3dbbd7657e0"}, + {file = "regex-2023.12.25-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:e8c7e08bb566de4faaf11984af13f6bcf6a08f327b13631d41d62592681d24fe"}, + {file = "regex-2023.12.25-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:283fc8eed679758de38fe493b7d7d84a198b558942b03f017b1f94dda8efae80"}, + {file = "regex-2023.12.25-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:f44dd4d68697559d007462b0a3a1d9acd61d97072b71f6d1968daef26bc744bd"}, + {file = "regex-2023.12.25-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:67d3ccfc590e5e7197750fcb3a2915b416a53e2de847a728cfa60141054123d4"}, + {file = "regex-2023.12.25-cp311-cp311-win32.whl", hash = "sha256:68191f80a9bad283432385961d9efe09d783bcd36ed35a60fb1ff3f1ec2efe87"}, + {file = "regex-2023.12.25-cp311-cp311-win_amd64.whl", hash = "sha256:7d2af3f6b8419661a0c421584cfe8aaec1c0e435ce7e47ee2a97e344b98f794f"}, + {file = "regex-2023.12.25-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:8a0ccf52bb37d1a700375a6b395bff5dd15c50acb745f7db30415bae3c2b0715"}, + {file = "regex-2023.12.25-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:c3c4a78615b7762740531c27cf46e2f388d8d727d0c0c739e72048beb26c8a9d"}, + {file = "regex-2023.12.25-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:ad83e7545b4ab69216cef4cc47e344d19622e28aabec61574b20257c65466d6a"}, + {file = "regex-2023.12.25-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b7a635871143661feccce3979e1727c4e094f2bdfd3ec4b90dfd4f16f571a87a"}, + {file = "regex-2023.12.25-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d498eea3f581fbe1b34b59c697512a8baef88212f92e4c7830fcc1499f5b45a5"}, + {file = "regex-2023.12.25-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:43f7cd5754d02a56ae4ebb91b33461dc67be8e3e0153f593c509e21d219c5060"}, + {file = "regex-2023.12.25-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:51f4b32f793812714fd5307222a7f77e739b9bc566dc94a18126aba3b92b98a3"}, + {file = "regex-2023.12.25-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ba99d8077424501b9616b43a2d208095746fb1284fc5ba490139651f971d39d9"}, + {file = "regex-2023.12.25-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:4bfc2b16e3ba8850e0e262467275dd4d62f0d045e0e9eda2bc65078c0110a11f"}, + {file = "regex-2023.12.25-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:8c2c19dae8a3eb0ea45a8448356ed561be843b13cbc34b840922ddf565498c1c"}, + {file = "regex-2023.12.25-cp312-cp312-musllinux_1_1_ppc64le.whl", hash = "sha256:60080bb3d8617d96f0fb7e19796384cc2467447ef1c491694850ebd3670bc457"}, + {file = "regex-2023.12.25-cp312-cp312-musllinux_1_1_s390x.whl", hash = "sha256:b77e27b79448e34c2c51c09836033056a0547aa360c45eeeb67803da7b0eedaf"}, + {file = "regex-2023.12.25-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:518440c991f514331f4850a63560321f833979d145d7d81186dbe2f19e27ae3d"}, + {file = "regex-2023.12.25-cp312-cp312-win32.whl", hash = "sha256:e2610e9406d3b0073636a3a2e80db05a02f0c3169b5632022b4e81c0364bcda5"}, + {file = "regex-2023.12.25-cp312-cp312-win_amd64.whl", hash = "sha256:cc37b9aeebab425f11f27e5e9e6cf580be7206c6582a64467a14dda211abc232"}, + {file = "regex-2023.12.25-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:da695d75ac97cb1cd725adac136d25ca687da4536154cdc2815f576e4da11c69"}, + {file = "regex-2023.12.25-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d126361607b33c4eb7b36debc173bf25d7805847346dd4d99b5499e1fef52bc7"}, + {file = "regex-2023.12.25-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4719bb05094d7d8563a450cf8738d2e1061420f79cfcc1fa7f0a44744c4d8f73"}, + {file = "regex-2023.12.25-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5dd58946bce44b53b06d94aa95560d0b243eb2fe64227cba50017a8d8b3cd3e2"}, + {file = "regex-2023.12.25-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:22a86d9fff2009302c440b9d799ef2fe322416d2d58fc124b926aa89365ec482"}, + {file = "regex-2023.12.25-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2aae8101919e8aa05ecfe6322b278f41ce2994c4a430303c4cd163fef746e04f"}, + {file = "regex-2023.12.25-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:e692296c4cc2873967771345a876bcfc1c547e8dd695c6b89342488b0ea55cd8"}, + {file = "regex-2023.12.25-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:263ef5cc10979837f243950637fffb06e8daed7f1ac1e39d5910fd29929e489a"}, + {file = "regex-2023.12.25-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:d6f7e255e5fa94642a0724e35406e6cb7001c09d476ab5fce002f652b36d0c39"}, + {file = "regex-2023.12.25-cp37-cp37m-musllinux_1_1_ppc64le.whl", hash = "sha256:88ad44e220e22b63b0f8f81f007e8abbb92874d8ced66f32571ef8beb0643b2b"}, + {file = "regex-2023.12.25-cp37-cp37m-musllinux_1_1_s390x.whl", hash = "sha256:3a17d3ede18f9cedcbe23d2daa8a2cd6f59fe2bf082c567e43083bba3fb00347"}, + {file = "regex-2023.12.25-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:d15b274f9e15b1a0b7a45d2ac86d1f634d983ca40d6b886721626c47a400bf39"}, + {file = "regex-2023.12.25-cp37-cp37m-win32.whl", hash = "sha256:ed19b3a05ae0c97dd8f75a5d8f21f7723a8c33bbc555da6bbe1f96c470139d3c"}, + {file = "regex-2023.12.25-cp37-cp37m-win_amd64.whl", hash = "sha256:a6d1047952c0b8104a1d371f88f4ab62e6275567d4458c1e26e9627ad489b445"}, + {file = "regex-2023.12.25-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:b43523d7bc2abd757119dbfb38af91b5735eea45537ec6ec3a5ec3f9562a1c53"}, + {file = "regex-2023.12.25-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:efb2d82f33b2212898f1659fb1c2e9ac30493ac41e4d53123da374c3b5541e64"}, + {file = "regex-2023.12.25-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:b7fca9205b59c1a3d5031f7e64ed627a1074730a51c2a80e97653e3e9fa0d415"}, + {file = "regex-2023.12.25-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:086dd15e9435b393ae06f96ab69ab2d333f5d65cbe65ca5a3ef0ec9564dfe770"}, + {file = "regex-2023.12.25-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e81469f7d01efed9b53740aedd26085f20d49da65f9c1f41e822a33992cb1590"}, + {file = "regex-2023.12.25-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:34e4af5b27232f68042aa40a91c3b9bb4da0eeb31b7632e0091afc4310afe6cb"}, + {file = "regex-2023.12.25-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9852b76ab558e45b20bf1893b59af64a28bd3820b0c2efc80e0a70a4a3ea51c1"}, + {file = "regex-2023.12.25-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ff100b203092af77d1a5a7abe085b3506b7eaaf9abf65b73b7d6905b6cb76988"}, + {file = "regex-2023.12.25-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:cc038b2d8b1470364b1888a98fd22d616fba2b6309c5b5f181ad4483e0017861"}, + {file = "regex-2023.12.25-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:094ba386bb5c01e54e14434d4caabf6583334090865b23ef58e0424a6286d3dc"}, + {file = "regex-2023.12.25-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:5cd05d0f57846d8ba4b71d9c00f6f37d6b97d5e5ef8b3c3840426a475c8f70f4"}, + {file = "regex-2023.12.25-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:9aa1a67bbf0f957bbe096375887b2505f5d8ae16bf04488e8b0f334c36e31360"}, + {file = "regex-2023.12.25-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:98a2636994f943b871786c9e82bfe7883ecdaba2ef5df54e1450fa9869d1f756"}, + {file = "regex-2023.12.25-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:37f8e93a81fc5e5bd8db7e10e62dc64261bcd88f8d7e6640aaebe9bc180d9ce2"}, + {file = "regex-2023.12.25-cp38-cp38-win32.whl", hash = "sha256:d78bd484930c1da2b9679290a41cdb25cc127d783768a0369d6b449e72f88beb"}, + {file = "regex-2023.12.25-cp38-cp38-win_amd64.whl", hash = "sha256:b521dcecebc5b978b447f0f69b5b7f3840eac454862270406a39837ffae4e697"}, + {file = "regex-2023.12.25-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:f7bc09bc9c29ebead055bcba136a67378f03d66bf359e87d0f7c759d6d4ffa31"}, + {file = "regex-2023.12.25-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:e14b73607d6231f3cc4622809c196b540a6a44e903bcfad940779c80dffa7be7"}, + {file = "regex-2023.12.25-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:9eda5f7a50141291beda3edd00abc2d4a5b16c29c92daf8d5bd76934150f3edc"}, + {file = "regex-2023.12.25-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cc6bb9aa69aacf0f6032c307da718f61a40cf970849e471254e0e91c56ffca95"}, + {file = "regex-2023.12.25-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:298dc6354d414bc921581be85695d18912bea163a8b23cac9a2562bbcd5088b1"}, + {file = "regex-2023.12.25-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2f4e475a80ecbd15896a976aa0b386c5525d0ed34d5c600b6d3ebac0a67c7ddf"}, + {file = "regex-2023.12.25-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:531ac6cf22b53e0696f8e1d56ce2396311254eb806111ddd3922c9d937151dae"}, + {file = "regex-2023.12.25-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:22f3470f7524b6da61e2020672df2f3063676aff444db1daa283c2ea4ed259d6"}, + {file = "regex-2023.12.25-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:89723d2112697feaa320c9d351e5f5e7b841e83f8b143dba8e2d2b5f04e10923"}, + {file = "regex-2023.12.25-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:0ecf44ddf9171cd7566ef1768047f6e66975788258b1c6c6ca78098b95cf9a3d"}, + {file = "regex-2023.12.25-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:905466ad1702ed4acfd67a902af50b8db1feeb9781436372261808df7a2a7bca"}, + {file = "regex-2023.12.25-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:4558410b7a5607a645e9804a3e9dd509af12fb72b9825b13791a37cd417d73a5"}, + {file = "regex-2023.12.25-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:7e316026cc1095f2a3e8cc012822c99f413b702eaa2ca5408a513609488cb62f"}, + {file = "regex-2023.12.25-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:3b1de218d5375cd6ac4b5493e0b9f3df2be331e86520f23382f216c137913d20"}, + {file = "regex-2023.12.25-cp39-cp39-win32.whl", hash = "sha256:11a963f8e25ab5c61348d090bf1b07f1953929c13bd2309a0662e9ff680763c9"}, + {file = "regex-2023.12.25-cp39-cp39-win_amd64.whl", hash = "sha256:e693e233ac92ba83a87024e1d32b5f9ab15ca55ddd916d878146f4e3406b5c91"}, + {file = "regex-2023.12.25.tar.gz", hash = "sha256:29171aa128da69afdf4bde412d5bedc335f2ca8fcfe4489038577d05f16181e5"}, +] + +[[package]] +name = "requests" +version = "2.31.0" +description = "Python HTTP for Humans." +optional = false +python-versions = ">=3.7" +files = [ {file = "requests-2.31.0-py3-none-any.whl", hash = "sha256:58cd2187c01e70e6e26505bca751777aa9f2ee0b7f4300988b709f44e013003f"}, {file = "requests-2.31.0.tar.gz", hash = "sha256:942c5a758f98d790eaed1a29cb6eefc7ffb0d1cf7af05c3d2791656dbd6ad1e1"}, ] -requests-mock = [ + +[package.dependencies] +certifi = ">=2017.4.17" +charset-normalizer = ">=2,<4" +idna = ">=2.5,<4" +urllib3 = ">=1.21.1,<3" + +[package.extras] +socks = ["PySocks (>=1.5.6,!=1.5.7)"] +use-chardet-on-py3 = ["chardet (>=3.0.2,<6)"] + +[[package]] +name = "requests-mock" +version = "1.11.0" +description = "Mock out responses from the requests package" +optional = false +python-versions = "*" +files = [ {file = "requests-mock-1.11.0.tar.gz", hash = "sha256:ef10b572b489a5f28e09b708697208c4a3b2b89ef80a9f01584340ea357ec3c4"}, {file = "requests_mock-1.11.0-py2.py3-none-any.whl", hash = "sha256:f7fae383f228633f6bececebdab236c478ace2284d6292c6e7e2867b9ab74d15"}, ] -requests-oauthlib = [ + +[package.dependencies] +requests = ">=2.3,<3" +six = "*" + +[package.extras] +fixture = ["fixtures"] +test = ["fixtures", "mock", "purl", "pytest", "requests-futures", "sphinx", "testtools"] + +[[package]] +name = "requests-oauthlib" +version = "1.3.1" +description = "OAuthlib authentication support for Requests." +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" +files = [ {file = "requests-oauthlib-1.3.1.tar.gz", hash = "sha256:75beac4a47881eeb94d5ea5d6ad31ef88856affe2332b9aafb52c6452ccf0d7a"}, {file = "requests_oauthlib-1.3.1-py2.py3-none-any.whl", hash = "sha256:2577c501a2fb8d05a304c09d090d6e47c306fef15809d102b327cf8364bddab5"}, ] -requests-toolbelt = [ + +[package.dependencies] +oauthlib = ">=3.0.0" +requests = ">=2.0.0" + +[package.extras] +rsa = ["oauthlib[signedtoken] (>=3.0.0)"] + +[[package]] +name = "requests-toolbelt" +version = "1.0.0" +description = "A utility belt for advanced users of python-requests" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" +files = [ {file = "requests-toolbelt-1.0.0.tar.gz", hash = "sha256:7681a0a3d047012b5bdc0ee37d7f8f07ebe76ab08caeccfc3921ce23c88d5bc6"}, {file = "requests_toolbelt-1.0.0-py2.py3-none-any.whl", hash = "sha256:cccfdd665f0a24fcf4726e690f65639d272bb0637b9b92dfd91a5568ccf6bd06"}, ] -requirements-parser = [ + +[package.dependencies] +requests = ">=2.0.1,<3.0.0" + +[[package]] +name = "requirements-parser" +version = "0.5.0" +description = "This is a small Python module for parsing Pip requirement files." +optional = false +python-versions = ">=3.6,<4.0" +files = [ {file = "requirements-parser-0.5.0.tar.gz", hash = "sha256:3336f3a3ae23e06d3f0f88595e4052396e3adf91688787f637e5d2ca1a904069"}, {file = "requirements_parser-0.5.0-py3-none-any.whl", hash = "sha256:e7fcdcd04f2049e73a9fb150d8a0f9d51ce4108f5f7cbeac74c484e17b12bcd9"}, ] -rfc3339-validator = [ + +[package.dependencies] +types-setuptools = ">=57.0.0" + +[[package]] +name = "rfc3339-validator" +version = "0.1.4" +description = "A pure python RFC3339 validator" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" +files = [ {file = "rfc3339_validator-0.1.4-py2.py3-none-any.whl", hash = "sha256:24f6ec1eda14ef823da9e36ec7113124b39c04d50a4d3d3a3c2859577e7791fa"}, {file = "rfc3339_validator-0.1.4.tar.gz", hash = "sha256:138a2abdf93304ad60530167e51d2dfb9549521a836871b88d7f4695d0022f6b"}, ] -rich = [ + +[package.dependencies] +six = "*" + +[[package]] +name = "rich" +version = "13.5.2" +description = "Render rich text, tables, progress bars, syntax highlighting, markdown and more to the terminal" +optional = false +python-versions = ">=3.7.0" +files = [ {file = "rich-13.5.2-py3-none-any.whl", hash = "sha256:146a90b3b6b47cac4a73c12866a499e9817426423f57c5a66949c086191a8808"}, {file = "rich-13.5.2.tar.gz", hash = "sha256:fb9d6c0a0f643c99eed3875b5377a184132ba9be4d61516a55273d3554d75a39"}, ] -rich-argparse = [ + +[package.dependencies] +markdown-it-py = ">=2.2.0" +pygments = ">=2.13.0,<3.0.0" +typing-extensions = {version = ">=4.0.0,<5.0", markers = "python_version < \"3.9\""} + +[package.extras] +jupyter = ["ipywidgets (>=7.5.1,<9)"] + +[[package]] +name = "rich-argparse" +version = "1.3.0" +description = "Rich help formatters for argparse and optparse" +optional = false +python-versions = ">=3.7" +files = [ {file = "rich_argparse-1.3.0-py3-none-any.whl", hash = "sha256:1a5eda1659c0a215862fe3630fcbe68d7792f18a8106baaf4e005b9896acc6f6"}, {file = "rich_argparse-1.3.0.tar.gz", hash = "sha256:974cc1ba0aaa0d6aabc09ab1b78f9ba928670e08590f9551121bcbc60c75b74a"}, ] -rpds-py = [ + +[package.dependencies] +rich = ">=11.0.0" + +[[package]] +name = "rpds-py" +version = "0.10.0" +description = "Python bindings to Rust's persistent data structures (rpds)" +optional = false +python-versions = ">=3.8" +files = [ {file = "rpds_py-0.10.0-cp310-cp310-macosx_10_7_x86_64.whl", hash = "sha256:c1e0e9916301e3b3d970814b1439ca59487f0616d30f36a44cead66ee1748c31"}, {file = "rpds_py-0.10.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:8ce8caa29ebbdcde67e5fd652c811d34bc01f249dbc0d61e5cc4db05ae79a83b"}, {file = "rpds_py-0.10.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ad277f74b1c164f7248afa968700e410651eb858d7c160d109fb451dc45a2f09"}, @@ -8249,35 +7452,148 @@ rpds-py = [ {file = "rpds_py-0.10.0-pp39-pypy39_pp73-musllinux_1_2_x86_64.whl", hash = "sha256:872f3dcaa8bf2245944861d7311179d2c0c9b2aaa7d3b464d99a7c2e401f01fa"}, {file = "rpds_py-0.10.0.tar.gz", hash = "sha256:e36d7369363d2707d5f68950a64c4e025991eb0177db01ccb6aa6facae48b69f"}, ] -rsa = [ + +[[package]] +name = "rsa" +version = "4.9" +description = "Pure-Python RSA implementation" +optional = false +python-versions = ">=3.6,<4" +files = [ {file = "rsa-4.9-py3-none-any.whl", hash = "sha256:90260d9058e514786967344d0ef75fa8727eed8a7d2e43ce9f4bcf1b536174f7"}, {file = "rsa-4.9.tar.gz", hash = "sha256:e38464a49c6c85d7f1351b0126661487a7e0a14a50f1675ec50eb34d4f20ef21"}, ] -s3fs = [ + +[package.dependencies] +pyasn1 = ">=0.1.3" + +[[package]] +name = "s3fs" +version = "2023.6.0" +description = "Convenient Filesystem interface over S3" +optional = true +python-versions = ">= 3.8" +files = [ {file = "s3fs-2023.6.0-py3-none-any.whl", hash = "sha256:d1a0a423d0d2e17fb2a193d9531935dc3f45ba742693448a461b6b34f6a92a24"}, {file = "s3fs-2023.6.0.tar.gz", hash = "sha256:63fd8ddf05eb722de784b7b503196107f2a518061298cf005a8a4715b4d49117"}, ] -s3transfer = [ + +[package.dependencies] +aiobotocore = ">=2.5.0,<2.6.0" +aiohttp = "<4.0.0a0 || >4.0.0a0,<4.0.0a1 || >4.0.0a1" +fsspec = "2023.6.0" + +[package.extras] +awscli = ["aiobotocore[awscli] (>=2.5.0,<2.6.0)"] +boto3 = ["aiobotocore[boto3] (>=2.5.0,<2.6.0)"] + +[[package]] +name = "s3transfer" +version = "0.6.2" +description = "An Amazon S3 Transfer Manager" +optional = true +python-versions = ">= 3.7" +files = [ {file = "s3transfer-0.6.2-py3-none-any.whl", hash = "sha256:b014be3a8a2aab98cfe1abc7229cc5a9a0cf05eb9c1f2b86b230fd8df3f78084"}, {file = "s3transfer-0.6.2.tar.gz", hash = "sha256:cab66d3380cca3e70939ef2255d01cd8aece6a4907a9528740f668c4b0611861"}, ] -scramp = [ + +[package.dependencies] +botocore = ">=1.12.36,<2.0a.0" + +[package.extras] +crt = ["botocore[crt] (>=1.20.29,<2.0a.0)"] + +[[package]] +name = "scramp" +version = "1.4.4" +description = "An implementation of the SCRAM protocol." +optional = true +python-versions = ">=3.7" +files = [ {file = "scramp-1.4.4-py3-none-any.whl", hash = "sha256:b142312df7c2977241d951318b7ee923d6b7a4f75ba0f05b621ece1ed616faa3"}, {file = "scramp-1.4.4.tar.gz", hash = "sha256:b7022a140040f33cf863ab2657917ed05287a807b917950489b89b9f685d59bc"}, ] -secretstorage = [ + +[package.dependencies] +asn1crypto = ">=1.5.1" + +[[package]] +name = "secretstorage" +version = "3.3.3" +description = "Python bindings to FreeDesktop.org Secret Service API" +optional = true +python-versions = ">=3.6" +files = [ {file = "SecretStorage-3.3.3-py3-none-any.whl", hash = "sha256:f356e6628222568e3af06f2eba8df495efa13b3b63081dafd4f7d9a7b7bc9f99"}, {file = "SecretStorage-3.3.3.tar.gz", hash = "sha256:2403533ef369eca6d2ba81718576c5e0f564d5cca1b58f73a8b23e7d4eeebd77"}, ] -semver = [ + +[package.dependencies] +cryptography = ">=2.0" +jeepney = ">=0.6" + +[[package]] +name = "semver" +version = "3.0.1" +description = "Python helper for Semantic Versioning (https://semver.org)" +optional = false +python-versions = ">=3.7" +files = [ {file = "semver-3.0.1-py3-none-any.whl", hash = "sha256:2a23844ba1647362c7490fe3995a86e097bb590d16f0f32dfc383008f19e4cdf"}, {file = "semver-3.0.1.tar.gz", hash = "sha256:9ec78c5447883c67b97f98c3b6212796708191d22e4ad30f4570f840171cbce1"}, ] -sentry-sdk = [ + +[[package]] +name = "sentry-sdk" +version = "1.30.0" +description = "Python client for Sentry (https://sentry.io)" +optional = false +python-versions = "*" +files = [ {file = "sentry-sdk-1.30.0.tar.gz", hash = "sha256:7dc873b87e1faf4d00614afd1058bfa1522942f33daef8a59f90de8ed75cd10c"}, {file = "sentry_sdk-1.30.0-py2.py3-none-any.whl", hash = "sha256:2e53ad63f96bb9da6570ba2e755c267e529edcf58580a2c0d2a11ef26e1e678b"}, ] -setproctitle = [ + +[package.dependencies] +certifi = "*" +urllib3 = {version = ">=1.26.11", markers = "python_version >= \"3.6\""} + +[package.extras] +aiohttp = ["aiohttp (>=3.5)"] +arq = ["arq (>=0.23)"] +beam = ["apache-beam (>=2.12)"] +bottle = ["bottle (>=0.12.13)"] +celery = ["celery (>=3)"] +chalice = ["chalice (>=1.16.0)"] +django = ["django (>=1.8)"] +falcon = ["falcon (>=1.4)"] +fastapi = ["fastapi (>=0.79.0)"] +flask = ["blinker (>=1.1)", "flask (>=0.11)", "markupsafe"] +grpcio = ["grpcio (>=1.21.1)"] +httpx = ["httpx (>=0.16.0)"] +huey = ["huey (>=2)"] +loguru = ["loguru (>=0.5)"] +opentelemetry = ["opentelemetry-distro (>=0.35b0)"] +opentelemetry-experimental = ["opentelemetry-distro (>=0.40b0,<1.0)", "opentelemetry-instrumentation-aiohttp-client (>=0.40b0,<1.0)", "opentelemetry-instrumentation-django (>=0.40b0,<1.0)", "opentelemetry-instrumentation-fastapi (>=0.40b0,<1.0)", "opentelemetry-instrumentation-flask (>=0.40b0,<1.0)", "opentelemetry-instrumentation-requests (>=0.40b0,<1.0)", "opentelemetry-instrumentation-sqlite3 (>=0.40b0,<1.0)", "opentelemetry-instrumentation-urllib (>=0.40b0,<1.0)"] +pure-eval = ["asttokens", "executing", "pure-eval"] +pymongo = ["pymongo (>=3.1)"] +pyspark = ["pyspark (>=2.4.4)"] +quart = ["blinker (>=1.1)", "quart (>=0.16.1)"] +rq = ["rq (>=0.6)"] +sanic = ["sanic (>=0.8)"] +sqlalchemy = ["sqlalchemy (>=1.2)"] +starlette = ["starlette (>=0.19.1)"] +starlite = ["starlite (>=1.48)"] +tornado = ["tornado (>=5)"] + +[[package]] +name = "setproctitle" +version = "1.3.2" +description = "A Python module to customize the process title" +optional = false +python-versions = ">=3.7" +files = [ {file = "setproctitle-1.3.2-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:288943dec88e178bb2fd868adf491197cc0fc8b6810416b1c6775e686bab87fe"}, {file = "setproctitle-1.3.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:630f6fe5e24a619ccf970c78e084319ee8be5be253ecc9b5b216b0f474f5ef18"}, {file = "setproctitle-1.3.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6c877691b90026670e5a70adfbcc735460a9f4c274d35ec5e8a43ce3f8443005"}, @@ -8351,11 +7667,33 @@ setproctitle = [ {file = "setproctitle-1.3.2-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:7aa0aac1711fadffc1d51e9d00a3bea61f68443d6ac0241a224e4d622489d665"}, {file = "setproctitle-1.3.2.tar.gz", hash = "sha256:b9fb97907c830d260fa0658ed58afd48a86b2b88aac521135c352ff7fd3477fd"}, ] -setuptools = [ + +[package.extras] +test = ["pytest"] + +[[package]] +name = "setuptools" +version = "68.1.2" +description = "Easily download, build, install, upgrade, and uninstall Python packages" +optional = false +python-versions = ">=3.8" +files = [ {file = "setuptools-68.1.2-py3-none-any.whl", hash = "sha256:3d8083eed2d13afc9426f227b24fd1659489ec107c0e86cec2ffdde5c92e790b"}, {file = "setuptools-68.1.2.tar.gz", hash = "sha256:3d4dfa6d95f1b101d695a6160a7626e15583af71a5f52176efa5d39a054d475d"}, ] -simplejson = [ + +[package.extras] +docs = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "pygments-github-lexers (==0.0.5)", "rst.linker (>=1.9)", "sphinx (>=3.5,<=7.1.2)", "sphinx-favicon", "sphinx-hoverxref (<2)", "sphinx-inline-tabs", "sphinx-lint", "sphinx-notfound-page (==0.8.3)", "sphinx-reredirects", "sphinxcontrib-towncrier"] +testing = ["build[virtualenv]", "filelock (>=3.4.0)", "flake8-2020", "ini2toml[lite] (>=0.9)", "jaraco.develop (>=7.21)", "jaraco.envs (>=2.2)", "jaraco.path (>=3.2.0)", "pip (>=19.1)", "pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-mypy (>=0.9.1)", "pytest-perf", "pytest-ruff", "pytest-timeout", "pytest-xdist", "tomli-w (>=1.0.0)", "virtualenv (>=13.0.0)", "wheel"] +testing-integration = ["build[virtualenv]", "filelock (>=3.4.0)", "jaraco.envs (>=2.2)", "jaraco.path (>=3.2.0)", "pytest", "pytest-enabler", "pytest-xdist", "tomli", "virtualenv (>=13.0.0)", "wheel"] + +[[package]] +name = "simplejson" +version = "3.19.1" +description = "Simple, fast, extensible JSON encoder/decoder for Python" +optional = false +python-versions = ">=2.5, !=3.0.*, !=3.1.*, !=3.2.*" +files = [ {file = "simplejson-3.19.1-cp27-cp27m-macosx_10_9_x86_64.whl", hash = "sha256:412e58997a30c5deb8cab5858b8e2e5b40ca007079f7010ee74565cc13d19665"}, {file = "simplejson-3.19.1-cp27-cp27m-manylinux1_i686.whl", hash = "sha256:e765b1f47293dedf77946f0427e03ee45def2862edacd8868c6cf9ab97c8afbd"}, {file = "simplejson-3.19.1-cp27-cp27m-manylinux1_x86_64.whl", hash = "sha256:3231100edee292da78948fa0a77dee4e5a94a0a60bcba9ed7a9dc77f4d4bb11e"}, @@ -8442,19 +7780,47 @@ simplejson = [ {file = "simplejson-3.19.1-py3-none-any.whl", hash = "sha256:4710806eb75e87919b858af0cba4ffedc01b463edc3982ded7b55143f39e41e1"}, {file = "simplejson-3.19.1.tar.gz", hash = "sha256:6277f60848a7d8319d27d2be767a7546bc965535b28070e310b3a9af90604a4c"}, ] -six = [ + +[[package]] +name = "six" +version = "1.16.0" +description = "Python 2 and 3 compatibility utilities" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*" +files = [ {file = "six-1.16.0-py2.py3-none-any.whl", hash = "sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254"}, {file = "six-1.16.0.tar.gz", hash = "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926"}, ] -smmap = [ + +[[package]] +name = "smmap" +version = "5.0.0" +description = "A pure Python implementation of a sliding window memory map manager" +optional = false +python-versions = ">=3.6" +files = [ {file = "smmap-5.0.0-py3-none-any.whl", hash = "sha256:2aba19d6a040e78d8b09de5c57e96207b09ed71d8e55ce0959eeee6c8e190d94"}, {file = "smmap-5.0.0.tar.gz", hash = "sha256:c840e62059cd3be204b0c9c9f74be2c09d5648eddd4580d9314c3ecde0b30936"}, ] -sniffio = [ + +[[package]] +name = "sniffio" +version = "1.3.0" +description = "Sniff out which async library your code is running under" +optional = false +python-versions = ">=3.7" +files = [ {file = "sniffio-1.3.0-py3-none-any.whl", hash = "sha256:eecefdce1e5bbfb7ad2eeaabf7c1eeb404d7757c379bd1f7e5cce9d8bf425384"}, {file = "sniffio-1.3.0.tar.gz", hash = "sha256:e60305c5e5d314f5389259b7f22aaa33d8f7dee49763119234af3755c55b9101"}, ] -snowflake-connector-python = [ + +[[package]] +name = "snowflake-connector-python" +version = "3.5.0" +description = "Snowflake Connector for Python" +optional = true +python-versions = ">=3.8" +files = [ {file = "snowflake-connector-python-3.5.0.tar.gz", hash = "sha256:654e4a1f68a491544bd8f7c5ab02eb8531df67c5f4309d5253bd204044f8a1b3"}, {file = "snowflake_connector_python-3.5.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:a365fa4f23be27a4a46d04f73a48ccb1ddad5b9558f100ba592a49571c90a33c"}, {file = "snowflake_connector_python-3.5.0-cp310-cp310-macosx_11_0_x86_64.whl", hash = "sha256:5b648b8f32aa540e9adf14e84ea5d77a6c3c6cbc3cbcf172622a0b8db0e99384"}, @@ -8477,19 +7843,66 @@ snowflake-connector-python = [ {file = "snowflake_connector_python-3.5.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ee97a8ac0aaf40a7b7420c8936a66d8d33376cd40498ac3d38efa7bb5712d14a"}, {file = "snowflake_connector_python-3.5.0-cp39-cp39-win_amd64.whl", hash = "sha256:e8cd747e2719ba44dd2ce0e9b1e6f8b03485b2b335a352f3b45138b56fad5888"}, ] -sortedcontainers = [ + +[package.dependencies] +asn1crypto = ">0.24.0,<2.0.0" +certifi = ">=2017.4.17" +cffi = ">=1.9,<2.0.0" +charset-normalizer = ">=2,<4" +cryptography = ">=3.1.0,<42.0.0" +filelock = ">=3.5,<4" +idna = ">=2.5,<4" +keyring = {version = "<16.1.0 || >16.1.0,<25.0.0", optional = true, markers = "extra == \"secure-local-storage\""} +packaging = "*" +platformdirs = ">=2.6.0,<4.0.0" +pyjwt = "<3.0.0" +pyOpenSSL = ">=16.2.0,<24.0.0" +pytz = "*" +requests = "<3.0.0" +sortedcontainers = ">=2.4.0" +tomlkit = "*" +typing-extensions = ">=4.3,<5" +urllib3 = ">=1.21.1,<2.0.0" + +[package.extras] +development = ["Cython", "coverage", "more-itertools", "numpy (<1.27.0)", "pendulum (!=2.1.1)", "pexpect", "pytest (<7.5.0)", "pytest-cov", "pytest-rerunfailures", "pytest-timeout", "pytest-xdist", "pytzdata"] +pandas = ["pandas (>=1.0.0,<2.1.0)", "pyarrow"] +secure-local-storage = ["keyring (!=16.1.0,<25.0.0)"] + +[[package]] +name = "sortedcontainers" +version = "2.4.0" +description = "Sorted Containers -- Sorted List, Sorted Dict, Sorted Set" +optional = true +python-versions = "*" +files = [ {file = "sortedcontainers-2.4.0-py2.py3-none-any.whl", hash = "sha256:a163dcaede0f1c021485e957a39245190e74249897e2ae4b2aa38595db237ee0"}, {file = "sortedcontainers-2.4.0.tar.gz", hash = "sha256:25caa5a06cc30b6b83d11423433f65d1f9d76c4c6a0c90e3379eaa43b9bfdb88"}, ] -soupsieve = [ + +[[package]] +name = "soupsieve" +version = "2.5" +description = "A modern CSS selector implementation for Beautiful Soup." +optional = true +python-versions = ">=3.8" +files = [ {file = "soupsieve-2.5-py3-none-any.whl", hash = "sha256:eaa337ff55a1579b6549dc679565eac1e3d000563bcb1c8ab0d0fefbc0c2cdc7"}, {file = "soupsieve-2.5.tar.gz", hash = "sha256:5663d5a7b3bfaeee0bc4372e7fc48f9cff4940b3eec54a6451cc5299f1097690"}, ] -sqlalchemy = [ + +[[package]] +name = "sqlalchemy" +version = "1.4.49" +description = "Database Abstraction Library" +optional = false +python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,>=2.7" +files = [ {file = "SQLAlchemy-1.4.49-cp27-cp27m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:2e126cf98b7fd38f1e33c64484406b78e937b1a280e078ef558b95bf5b6895f6"}, {file = "SQLAlchemy-1.4.49-cp27-cp27mu-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:03db81b89fe7ef3857b4a00b63dedd632d6183d4ea5a31c5d8a92e000a41fc71"}, {file = "SQLAlchemy-1.4.49-cp310-cp310-macosx_11_0_x86_64.whl", hash = "sha256:95b9df9afd680b7a3b13b38adf6e3a38995da5e162cc7524ef08e3be4e5ed3e1"}, {file = "SQLAlchemy-1.4.49-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a63e43bf3f668c11bb0444ce6e809c1227b8f067ca1068898f3008a273f52b09"}, + {file = "SQLAlchemy-1.4.49-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ca46de16650d143a928d10842939dab208e8d8c3a9a8757600cae9b7c579c5cd"}, {file = "SQLAlchemy-1.4.49-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:f835c050ebaa4e48b18403bed2c0fda986525896efd76c245bdd4db995e51a4c"}, {file = "SQLAlchemy-1.4.49-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9c21b172dfb22e0db303ff6419451f0cac891d2e911bb9fbf8003d717f1bcf91"}, {file = "SQLAlchemy-1.4.49-cp310-cp310-win32.whl", hash = "sha256:5fb1ebdfc8373b5a291485757bd6431de8d7ed42c27439f543c81f6c8febd729"}, @@ -8499,84 +7912,280 @@ sqlalchemy = [ {file = "SQLAlchemy-1.4.49-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5debe7d49b8acf1f3035317e63d9ec8d5e4d904c6e75a2a9246a119f5f2fdf3d"}, {file = "SQLAlchemy-1.4.49-cp311-cp311-win32.whl", hash = "sha256:82b08e82da3756765c2e75f327b9bf6b0f043c9c3925fb95fb51e1567fa4ee87"}, {file = "SQLAlchemy-1.4.49-cp311-cp311-win_amd64.whl", hash = "sha256:171e04eeb5d1c0d96a544caf982621a1711d078dbc5c96f11d6469169bd003f1"}, + {file = "SQLAlchemy-1.4.49-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:f23755c384c2969ca2f7667a83f7c5648fcf8b62a3f2bbd883d805454964a800"}, + {file = "SQLAlchemy-1.4.49-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8396e896e08e37032e87e7fbf4a15f431aa878c286dc7f79e616c2feacdb366c"}, + {file = "SQLAlchemy-1.4.49-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:66da9627cfcc43bbdebd47bfe0145bb662041472393c03b7802253993b6b7c90"}, + {file = "SQLAlchemy-1.4.49-cp312-cp312-win32.whl", hash = "sha256:9a06e046ffeb8a484279e54bda0a5abfd9675f594a2e38ef3133d7e4d75b6214"}, + {file = "SQLAlchemy-1.4.49-cp312-cp312-win_amd64.whl", hash = "sha256:7cf8b90ad84ad3a45098b1c9f56f2b161601e4670827d6b892ea0e884569bd1d"}, {file = "SQLAlchemy-1.4.49-cp36-cp36m-macosx_10_14_x86_64.whl", hash = "sha256:36e58f8c4fe43984384e3fbe6341ac99b6b4e083de2fe838f0fdb91cebe9e9cb"}, {file = "SQLAlchemy-1.4.49-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b31e67ff419013f99ad6f8fc73ee19ea31585e1e9fe773744c0f3ce58c039c30"}, + {file = "SQLAlchemy-1.4.49-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ebc22807a7e161c0d8f3da34018ab7c97ef6223578fcdd99b1d3e7ed1100a5db"}, {file = "SQLAlchemy-1.4.49-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:c14b29d9e1529f99efd550cd04dbb6db6ba5d690abb96d52de2bff4ed518bc95"}, {file = "SQLAlchemy-1.4.49-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c40f3470e084d31247aea228aa1c39bbc0904c2b9ccbf5d3cfa2ea2dac06f26d"}, {file = "SQLAlchemy-1.4.49-cp36-cp36m-win32.whl", hash = "sha256:706bfa02157b97c136547c406f263e4c6274a7b061b3eb9742915dd774bbc264"}, {file = "SQLAlchemy-1.4.49-cp36-cp36m-win_amd64.whl", hash = "sha256:a7f7b5c07ae5c0cfd24c2db86071fb2a3d947da7bd487e359cc91e67ac1c6d2e"}, {file = "SQLAlchemy-1.4.49-cp37-cp37m-macosx_11_0_x86_64.whl", hash = "sha256:4afbbf5ef41ac18e02c8dc1f86c04b22b7a2125f2a030e25bbb4aff31abb224b"}, {file = "SQLAlchemy-1.4.49-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:24e300c0c2147484a002b175f4e1361f102e82c345bf263242f0449672a4bccf"}, + {file = "SQLAlchemy-1.4.49-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:393cd06c3b00b57f5421e2133e088df9cabcececcea180327e43b937b5a7caa5"}, {file = "SQLAlchemy-1.4.49-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:201de072b818f8ad55c80d18d1a788729cccf9be6d9dc3b9d8613b053cd4836d"}, {file = "SQLAlchemy-1.4.49-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7653ed6817c710d0c95558232aba799307d14ae084cc9b1f4c389157ec50df5c"}, {file = "SQLAlchemy-1.4.49-cp37-cp37m-win32.whl", hash = "sha256:647e0b309cb4512b1f1b78471fdaf72921b6fa6e750b9f891e09c6e2f0e5326f"}, {file = "SQLAlchemy-1.4.49-cp37-cp37m-win_amd64.whl", hash = "sha256:ab73ed1a05ff539afc4a7f8cf371764cdf79768ecb7d2ec691e3ff89abbc541e"}, {file = "SQLAlchemy-1.4.49-cp38-cp38-macosx_11_0_x86_64.whl", hash = "sha256:37ce517c011560d68f1ffb28af65d7e06f873f191eb3a73af5671e9c3fada08a"}, {file = "SQLAlchemy-1.4.49-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a1878ce508edea4a879015ab5215546c444233881301e97ca16fe251e89f1c55"}, + {file = "SQLAlchemy-1.4.49-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:95ab792ca493891d7a45a077e35b418f68435efb3e1706cb8155e20e86a9013c"}, {file = "SQLAlchemy-1.4.49-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:0e8e608983e6f85d0852ca61f97e521b62e67969e6e640fe6c6b575d4db68557"}, {file = "SQLAlchemy-1.4.49-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ccf956da45290df6e809ea12c54c02ace7f8ff4d765d6d3dfb3655ee876ce58d"}, {file = "SQLAlchemy-1.4.49-cp38-cp38-win32.whl", hash = "sha256:f167c8175ab908ce48bd6550679cc6ea20ae169379e73c7720a28f89e53aa532"}, {file = "SQLAlchemy-1.4.49-cp38-cp38-win_amd64.whl", hash = "sha256:45806315aae81a0c202752558f0df52b42d11dd7ba0097bf71e253b4215f34f4"}, {file = "SQLAlchemy-1.4.49-cp39-cp39-macosx_11_0_x86_64.whl", hash = "sha256:b6d0c4b15d65087738a6e22e0ff461b407533ff65a73b818089efc8eb2b3e1de"}, {file = "SQLAlchemy-1.4.49-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a843e34abfd4c797018fd8d00ffffa99fd5184c421f190b6ca99def4087689bd"}, + {file = "SQLAlchemy-1.4.49-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:738d7321212941ab19ba2acf02a68b8ee64987b248ffa2101630e8fccb549e0d"}, {file = "SQLAlchemy-1.4.49-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:1c890421651b45a681181301b3497e4d57c0d01dc001e10438a40e9a9c25ee77"}, {file = "SQLAlchemy-1.4.49-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d26f280b8f0a8f497bc10573849ad6dc62e671d2468826e5c748d04ed9e670d5"}, {file = "SQLAlchemy-1.4.49-cp39-cp39-win32.whl", hash = "sha256:ec2268de67f73b43320383947e74700e95c6770d0c68c4e615e9897e46296294"}, {file = "SQLAlchemy-1.4.49-cp39-cp39-win_amd64.whl", hash = "sha256:bbdf16372859b8ed3f4d05f925a984771cd2abd18bd187042f24be4886c2a15f"}, {file = "SQLAlchemy-1.4.49.tar.gz", hash = "sha256:06ff25cbae30c396c4b7737464f2a7fc37a67b7da409993b182b024cec80aed9"}, ] -sqlalchemy-jsonfield = [ + +[package.dependencies] +greenlet = {version = "!=0.4.17", markers = "python_version >= \"3\" and (platform_machine == \"aarch64\" or platform_machine == \"ppc64le\" or platform_machine == \"x86_64\" or platform_machine == \"amd64\" or platform_machine == \"AMD64\" or platform_machine == \"win32\" or platform_machine == \"WIN32\")"} + +[package.extras] +aiomysql = ["aiomysql", "greenlet (!=0.4.17)"] +aiosqlite = ["aiosqlite", "greenlet (!=0.4.17)", "typing-extensions (!=3.10.0.1)"] +asyncio = ["greenlet (!=0.4.17)"] +asyncmy = ["asyncmy (>=0.2.3,!=0.2.4)", "greenlet (!=0.4.17)"] +mariadb-connector = ["mariadb (>=1.0.1,!=1.1.2)"] +mssql = ["pyodbc"] +mssql-pymssql = ["pymssql"] +mssql-pyodbc = ["pyodbc"] +mypy = ["mypy (>=0.910)", "sqlalchemy2-stubs"] +mysql = ["mysqlclient (>=1.4.0)", "mysqlclient (>=1.4.0,<2)"] +mysql-connector = ["mysql-connector-python"] +oracle = ["cx-oracle (>=7)", "cx-oracle (>=7,<8)"] +postgresql = ["psycopg2 (>=2.7)"] +postgresql-asyncpg = ["asyncpg", "greenlet (!=0.4.17)"] +postgresql-pg8000 = ["pg8000 (>=1.16.6,!=1.29.0)"] +postgresql-psycopg2binary = ["psycopg2-binary"] +postgresql-psycopg2cffi = ["psycopg2cffi"] +pymysql = ["pymysql", "pymysql (<1)"] +sqlcipher = ["sqlcipher3-binary"] + +[[package]] +name = "sqlalchemy-jsonfield" +version = "1.0.1.post0" +description = "SQLALchemy JSONField implementation for storing dicts at SQL" +optional = false +python-versions = ">=3.7.0" +files = [ {file = "SQLAlchemy-JSONField-1.0.1.post0.tar.gz", hash = "sha256:72a5e714fe0493d2660abd7484a9fd9f492f493a0856288dd22a5decb29f5dc4"}, {file = "SQLAlchemy_JSONField-1.0.1.post0-py3-none-any.whl", hash = "sha256:d6f1e5ee329a3c0d9d164e40d81a2143ac8332e09988fbbaff84179dac5503d4"}, ] -sqlalchemy-utils = [ + +[package.dependencies] +sqlalchemy = "*" + +[[package]] +name = "sqlalchemy-utils" +version = "0.41.1" +description = "Various utility functions for SQLAlchemy." +optional = false +python-versions = ">=3.6" +files = [ {file = "SQLAlchemy-Utils-0.41.1.tar.gz", hash = "sha256:a2181bff01eeb84479e38571d2c0718eb52042f9afd8c194d0d02877e84b7d74"}, {file = "SQLAlchemy_Utils-0.41.1-py3-none-any.whl", hash = "sha256:6c96b0768ea3f15c0dc56b363d386138c562752b84f647fb8d31a2223aaab801"}, ] -sqlfluff = [ + +[package.dependencies] +SQLAlchemy = ">=1.3" + +[package.extras] +arrow = ["arrow (>=0.3.4)"] +babel = ["Babel (>=1.3)"] +color = ["colour (>=0.0.4)"] +encrypted = ["cryptography (>=0.6)"] +intervals = ["intervals (>=0.7.1)"] +password = ["passlib (>=1.6,<2.0)"] +pendulum = ["pendulum (>=2.0.5)"] +phone = ["phonenumbers (>=5.9.2)"] +test = ["Jinja2 (>=2.3)", "Pygments (>=1.2)", "backports.zoneinfo", "docutils (>=0.10)", "flake8 (>=2.4.0)", "flexmock (>=0.9.7)", "isort (>=4.2.2)", "pg8000 (>=1.12.4)", "psycopg (>=3.1.8)", "psycopg2 (>=2.5.1)", "psycopg2cffi (>=2.8.1)", "pymysql", "pyodbc", "pytest (>=2.7.1)", "python-dateutil (>=2.6)", "pytz (>=2014.2)"] +test-all = ["Babel (>=1.3)", "Jinja2 (>=2.3)", "Pygments (>=1.2)", "arrow (>=0.3.4)", "backports.zoneinfo", "colour (>=0.0.4)", "cryptography (>=0.6)", "docutils (>=0.10)", "flake8 (>=2.4.0)", "flexmock (>=0.9.7)", "furl (>=0.4.1)", "intervals (>=0.7.1)", "isort (>=4.2.2)", "passlib (>=1.6,<2.0)", "pendulum (>=2.0.5)", "pg8000 (>=1.12.4)", "phonenumbers (>=5.9.2)", "psycopg (>=3.1.8)", "psycopg2 (>=2.5.1)", "psycopg2cffi (>=2.8.1)", "pymysql", "pyodbc", "pytest (>=2.7.1)", "python-dateutil", "python-dateutil (>=2.6)", "pytz (>=2014.2)"] +timezone = ["python-dateutil"] +url = ["furl (>=0.4.1)"] + +[[package]] +name = "sqlfluff" +version = "2.3.2" +description = "The SQL Linter for Humans" +optional = false +python-versions = ">=3.7" +files = [ {file = "sqlfluff-2.3.2-py3-none-any.whl", hash = "sha256:85c8b683e283ff632fe28529ddb60585ea2d1d3c614fc7a1db171632b99dcce3"}, {file = "sqlfluff-2.3.2.tar.gz", hash = "sha256:3403ce7e9133766d7336b7e26638657ec6cc9e5610e35186b7f02cc427dd49b7"}, ] -sqlparams = [ + +[package.dependencies] +appdirs = "*" +chardet = "*" +click = "*" +colorama = ">=0.3" +diff-cover = ">=2.5.0" +importlib-resources = {version = "*", markers = "python_version < \"3.9\""} +Jinja2 = "*" +pathspec = "*" +pytest = "*" +pyyaml = ">=5.1" +regex = "*" +tblib = "*" +toml = {version = "*", markers = "python_version < \"3.11\""} +tqdm = "*" +typing-extensions = "*" + +[[package]] +name = "sqlparams" +version = "6.0.1" +description = "Convert between various DB API 2.0 parameter styles." +optional = true +python-versions = ">=3.8" +files = [ {file = "sqlparams-6.0.1-py3-none-any.whl", hash = "sha256:566651376315c832876be4a0f58ffa23a23fab257d77ee492bdf8d301e169d0d"}, {file = "sqlparams-6.0.1.tar.gz", hash = "sha256:032b2f949d4afbcbfa24003f6fb407f2fc8468184e3d8ca3d59ba6b30d4935bf"}, ] -sqlparse = [ + +[[package]] +name = "sqlparse" +version = "0.4.4" +description = "A non-validating SQL parser." +optional = false +python-versions = ">=3.5" +files = [ {file = "sqlparse-0.4.4-py3-none-any.whl", hash = "sha256:5430a4fe2ac7d0f93e66f1efc6e1338a41884b7ddf2a350cedd20ccc4d9d28f3"}, {file = "sqlparse-0.4.4.tar.gz", hash = "sha256:d446183e84b8349fa3061f0fe7f06ca94ba65b426946ffebe6e3e8295332420c"}, ] -stevedore = [ + +[package.extras] +dev = ["build", "flake8"] +doc = ["sphinx"] +test = ["pytest", "pytest-cov"] + +[[package]] +name = "stevedore" +version = "5.1.0" +description = "Manage dynamic plugins for Python applications" +optional = false +python-versions = ">=3.8" +files = [ {file = "stevedore-5.1.0-py3-none-any.whl", hash = "sha256:8cc040628f3cea5d7128f2e76cf486b2251a4e543c7b938f58d9a377f6694a2d"}, {file = "stevedore-5.1.0.tar.gz", hash = "sha256:a54534acf9b89bc7ed264807013b505bf07f74dbe4bcfa37d32bd063870b087c"}, ] -sympy = [ + +[package.dependencies] +pbr = ">=2.0.0,<2.1.0 || >2.1.0" + +[[package]] +name = "sympy" +version = "1.12" +description = "Computer algebra system (CAS) in Python" +optional = true +python-versions = ">=3.8" +files = [ {file = "sympy-1.12-py3-none-any.whl", hash = "sha256:c3588cd4295d0c0f603d0f2ae780587e64e2efeedb3521e46b9bb1d08d184fa5"}, {file = "sympy-1.12.tar.gz", hash = "sha256:ebf595c8dac3e0fdc4152c51878b498396ec7f30e7a914d6071e674d49420fb8"}, ] -tabulate = [ + +[package.dependencies] +mpmath = ">=0.19" + +[[package]] +name = "tabulate" +version = "0.9.0" +description = "Pretty-print tabular data" +optional = false +python-versions = ">=3.7" +files = [ {file = "tabulate-0.9.0-py3-none-any.whl", hash = "sha256:024ca478df22e9340661486f85298cff5f6dcdba14f3813e8830015b9ed1948f"}, {file = "tabulate-0.9.0.tar.gz", hash = "sha256:0095b12bf5966de529c0feb1fa08671671b3368eec77d7ef7ab114be2c068b3c"}, ] -tblib = [ + +[package.extras] +widechars = ["wcwidth"] + +[[package]] +name = "tblib" +version = "2.0.0" +description = "Traceback serialization library." +optional = false +python-versions = ">=3.7" +files = [ {file = "tblib-2.0.0-py3-none-any.whl", hash = "sha256:9100bfa016b047d5b980d66e7efed952fbd20bd85b56110aaf473cb97d18709a"}, {file = "tblib-2.0.0.tar.gz", hash = "sha256:a6df30f272c08bf8be66e0775fad862005d950a6b8449b94f7c788731d70ecd7"}, ] -tenacity = [ + +[[package]] +name = "tenacity" +version = "8.2.3" +description = "Retry code until it succeeds" +optional = false +python-versions = ">=3.7" +files = [ {file = "tenacity-8.2.3-py3-none-any.whl", hash = "sha256:ce510e327a630c9e1beaf17d42e6ffacc88185044ad85cf74c0a8887c6a0f88c"}, {file = "tenacity-8.2.3.tar.gz", hash = "sha256:5398ef0d78e63f40007c1fb4c0bff96e1911394d2fa8d194f77619c05ff6cc8a"}, ] -termcolor = [ + +[package.extras] +doc = ["reno", "sphinx", "tornado (>=4.5)"] + +[[package]] +name = "termcolor" +version = "2.3.0" +description = "ANSI color formatting for output in terminal" +optional = false +python-versions = ">=3.7" +files = [ {file = "termcolor-2.3.0-py3-none-any.whl", hash = "sha256:3afb05607b89aed0ffe25202399ee0867ad4d3cb4180d98aaf8eefa6a5f7d475"}, {file = "termcolor-2.3.0.tar.gz", hash = "sha256:b5b08f68937f138fe92f6c089b99f1e2da0ae56c52b78bf7075fd95420fd9a5a"}, ] -text-unidecode = [ + +[package.extras] +tests = ["pytest", "pytest-cov"] + +[[package]] +name = "text-unidecode" +version = "1.3" +description = "The most basic Text::Unidecode port" +optional = false +python-versions = "*" +files = [ {file = "text-unidecode-1.3.tar.gz", hash = "sha256:bad6603bb14d279193107714b288be206cac565dfa49aa5b105294dd5c4aab93"}, {file = "text_unidecode-1.3-py2.py3-none-any.whl", hash = "sha256:1311f10e8b895935241623731c2ba64f4c455287888b18189350b67134a822e8"}, ] -thrift = [ + +[[package]] +name = "thrift" +version = "0.16.0" +description = "Python bindings for the Apache Thrift RPC system" +optional = true +python-versions = "*" +files = [ {file = "thrift-0.16.0.tar.gz", hash = "sha256:2b5b6488fcded21f9d312aa23c9ff6a0195d0f6ae26ddbd5ad9e3e25dfc14408"}, ] -tokenizers = [ + +[package.dependencies] +six = ">=1.7.2" + +[package.extras] +all = ["tornado (>=4.0)", "twisted"] +tornado = ["tornado (>=4.0)"] +twisted = ["twisted"] + +[[package]] +name = "tokenizers" +version = "0.13.3" +description = "Fast and Customizable Tokenizers" +optional = true +python-versions = "*" +files = [ {file = "tokenizers-0.13.3-cp310-cp310-macosx_10_11_x86_64.whl", hash = "sha256:f3835c5be51de8c0a092058a4d4380cb9244fb34681fd0a295fbf0a52a5fdf33"}, {file = "tokenizers-0.13.3-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:4ef4c3e821730f2692489e926b184321e887f34fb8a6b80b8096b966ba663d07"}, {file = "tokenizers-0.13.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c5fd1a6a25353e9aa762e2aae5a1e63883cad9f4e997c447ec39d071020459bc"}, @@ -8618,122 +8227,360 @@ tokenizers = [ {file = "tokenizers-0.13.3-cp39-cp39-win_amd64.whl", hash = "sha256:bc0a6f1ba036e482db6453571c9e3e60ecd5489980ffd95d11dc9f960483d783"}, {file = "tokenizers-0.13.3.tar.gz", hash = "sha256:2e546dbb68b623008a5442353137fbb0123d311a6d7ba52f2667c8862a75af2e"}, ] -toml = [ + +[package.extras] +dev = ["black (==22.3)", "datasets", "numpy", "pytest", "requests"] +docs = ["setuptools-rust", "sphinx", "sphinx-rtd-theme"] +testing = ["black (==22.3)", "datasets", "numpy", "pytest", "requests"] + +[[package]] +name = "toml" +version = "0.10.2" +description = "Python Library for Tom's Obvious, Minimal Language" +optional = false +python-versions = ">=2.6, !=3.0.*, !=3.1.*, !=3.2.*" +files = [ {file = "toml-0.10.2-py2.py3-none-any.whl", hash = "sha256:806143ae5bfb6a3c6e736a764057db0e6a0e05e338b5630894a5f779cabb4f9b"}, {file = "toml-0.10.2.tar.gz", hash = "sha256:b3bda1d108d5dd99f4a20d24d9c348e91c4db7ab1b749200bded2f839ccbe68f"}, ] -tomli = [ + +[[package]] +name = "tomli" +version = "2.0.1" +description = "A lil' TOML parser" +optional = false +python-versions = ">=3.7" +files = [ {file = "tomli-2.0.1-py3-none-any.whl", hash = "sha256:939de3e7a6161af0c887ef91b7d41a53e7c5a1ca976325f429cb46ea9bc30ecc"}, {file = "tomli-2.0.1.tar.gz", hash = "sha256:de526c12914f0c550d15924c62d72abc48d6fe7364aa87328337a31007fe8a4f"}, ] -tomli-w = [ + +[[package]] +name = "tomli-w" +version = "1.0.0" +description = "A lil' TOML writer" +optional = false +python-versions = ">=3.7" +files = [ {file = "tomli_w-1.0.0-py3-none-any.whl", hash = "sha256:9f2a07e8be30a0729e533ec968016807069991ae2fd921a78d42f429ae5f4463"}, {file = "tomli_w-1.0.0.tar.gz", hash = "sha256:f463434305e0336248cac9c2dc8076b707d8a12d019dd349f5c1e382dd1ae1b9"}, ] -tomlkit = [ + +[[package]] +name = "tomlkit" +version = "0.12.1" +description = "Style preserving TOML library" +optional = false +python-versions = ">=3.7" +files = [ {file = "tomlkit-0.12.1-py3-none-any.whl", hash = "sha256:712cbd236609acc6a3e2e97253dfc52d4c2082982a88f61b640ecf0817eab899"}, {file = "tomlkit-0.12.1.tar.gz", hash = "sha256:38e1ff8edb991273ec9f6181244a6a391ac30e9f5098e7535640ea6be97a7c86"}, ] -tqdm = [ + +[[package]] +name = "tqdm" +version = "4.66.1" +description = "Fast, Extensible Progress Meter" +optional = false +python-versions = ">=3.7" +files = [ {file = "tqdm-4.66.1-py3-none-any.whl", hash = "sha256:d302b3c5b53d47bce91fea46679d9c3c6508cf6332229aa1e7d8653723793386"}, {file = "tqdm-4.66.1.tar.gz", hash = "sha256:d88e651f9db8d8551a62556d3cff9e3034274ca5d66e93197cf2490e2dcb69c7"}, ] -typeapi = [ + +[package.dependencies] +colorama = {version = "*", markers = "platform_system == \"Windows\""} + +[package.extras] +dev = ["pytest (>=6)", "pytest-cov", "pytest-timeout", "pytest-xdist"] +notebook = ["ipywidgets (>=6)"] +slack = ["slack-sdk"] +telegram = ["requests"] + +[[package]] +name = "typeapi" +version = "2.1.1" +description = "" +optional = false +python-versions = ">=3.6.3,<4.0.0" +files = [ {file = "typeapi-2.1.1-py3-none-any.whl", hash = "sha256:ef41577f316bfd362572e727ba349dab80a7362318a80fc72e6a807017d04c5c"}, {file = "typeapi-2.1.1.tar.gz", hash = "sha256:49b3c1d3382e27dccbb59132a3a823c61954f679a0c61f119fd6d8470073a298"}, ] -types-awscrt = [ + +[package.dependencies] +typing-extensions = ">=3.0.0" + +[[package]] +name = "types-awscrt" +version = "0.19.1" +description = "Type annotations and code completion for awscrt" +optional = false +python-versions = ">=3.7,<4.0" +files = [ {file = "types_awscrt-0.19.1-py3-none-any.whl", hash = "sha256:68fffeb75396e9e7614cd930b2d52295f680230774750907bcafb56f11514043"}, {file = "types_awscrt-0.19.1.tar.gz", hash = "sha256:61833aa140e724a9098025610f4b8cde3dcf65b842631d7447378f9f5db4e1fd"}, ] -types-cachetools = [ + +[[package]] +name = "types-cachetools" +version = "5.3.0.6" +description = "Typing stubs for cachetools" +optional = false +python-versions = "*" +files = [ {file = "types-cachetools-5.3.0.6.tar.gz", hash = "sha256:595f0342d246c8ba534f5a762cf4c2f60ecb61e8002b8b2277fd5cf791d4e851"}, {file = "types_cachetools-5.3.0.6-py3-none-any.whl", hash = "sha256:f7f8a25bfe306f2e6bc2ad0a2f949d9e72f2d91036d509c36d3810bf728bc6e1"}, ] -types-click = [ + +[[package]] +name = "types-click" +version = "7.1.8" +description = "Typing stubs for click" +optional = false +python-versions = "*" +files = [ {file = "types-click-7.1.8.tar.gz", hash = "sha256:b6604968be6401dc516311ca50708a0a28baa7a0cb840efd7412f0dbbff4e092"}, {file = "types_click-7.1.8-py3-none-any.whl", hash = "sha256:8cb030a669e2e927461be9827375f83c16b8178c365852c060a34e24871e7e81"}, ] -types-deprecated = [ + +[[package]] +name = "types-deprecated" +version = "1.2.9.3" +description = "Typing stubs for Deprecated" +optional = false +python-versions = "*" +files = [ {file = "types-Deprecated-1.2.9.3.tar.gz", hash = "sha256:ef87327adf3e3c4a4c7d8e06e58f6476710d3466ecfb53c49efb080804a70ef3"}, {file = "types_Deprecated-1.2.9.3-py3-none-any.whl", hash = "sha256:24da9210763e5e1b3d0d4f6f8bba9ad3bb6af3fe7f6815fc37e3ede4681704f5"}, ] -types-protobuf = [ + +[[package]] +name = "types-protobuf" +version = "4.24.0.1" +description = "Typing stubs for protobuf" +optional = false +python-versions = "*" +files = [ {file = "types-protobuf-4.24.0.1.tar.gz", hash = "sha256:90adea3b693d6a40d8ef075c58fe6b5cc6e01fe1496301a7e6fc70398dcff92e"}, {file = "types_protobuf-4.24.0.1-py3-none-any.whl", hash = "sha256:df203a204e4ae97d4cca4c9cf725262579dd7857a19f9e7fc74871ccfa073c01"}, ] -types-psutil = [ + +[[package]] +name = "types-psutil" +version = "5.9.5.16" +description = "Typing stubs for psutil" +optional = false +python-versions = "*" +files = [ {file = "types-psutil-5.9.5.16.tar.gz", hash = "sha256:4e9b219efb625d3d04f6bf106934f87cab49aa41a94b0a3b3089403f47a79228"}, {file = "types_psutil-5.9.5.16-py3-none-any.whl", hash = "sha256:fec713104d5d143afea7b976cfa691ca1840f5d19e8714a5d02a96ebd061363e"}, ] -types-psycopg2 = [ + +[[package]] +name = "types-psycopg2" +version = "2.9.21.14" +description = "Typing stubs for psycopg2" +optional = false +python-versions = "*" +files = [ {file = "types-psycopg2-2.9.21.14.tar.gz", hash = "sha256:bf73a0ac4da4e278c89bf1b01fc596d5a5ac7a356cfe6ac0249f47b9e259f868"}, {file = "types_psycopg2-2.9.21.14-py3-none-any.whl", hash = "sha256:cd9c5350631f3bc6184ec8d48f2ed31d4ea660f89d0fffe78239450782f383c5"}, ] -types-python-dateutil = [ + +[[package]] +name = "types-python-dateutil" +version = "2.8.19.14" +description = "Typing stubs for python-dateutil" +optional = false +python-versions = "*" +files = [ {file = "types-python-dateutil-2.8.19.14.tar.gz", hash = "sha256:1f4f10ac98bb8b16ade9dbee3518d9ace017821d94b057a425b069f834737f4b"}, {file = "types_python_dateutil-2.8.19.14-py3-none-any.whl", hash = "sha256:f977b8de27787639986b4e28963263fd0e5158942b3ecef91b9335c130cb1ce9"}, ] -types-pyyaml = [ + +[[package]] +name = "types-pyyaml" +version = "6.0.12.11" +description = "Typing stubs for PyYAML" +optional = false +python-versions = "*" +files = [ {file = "types-PyYAML-6.0.12.11.tar.gz", hash = "sha256:7d340b19ca28cddfdba438ee638cd4084bde213e501a3978738543e27094775b"}, {file = "types_PyYAML-6.0.12.11-py3-none-any.whl", hash = "sha256:a461508f3096d1d5810ec5ab95d7eeecb651f3a15b71959999988942063bf01d"}, ] -types-requests = [ + +[[package]] +name = "types-requests" +version = "2.31.0.2" +description = "Typing stubs for requests" +optional = false +python-versions = "*" +files = [ {file = "types-requests-2.31.0.2.tar.gz", hash = "sha256:6aa3f7faf0ea52d728bb18c0a0d1522d9bfd8c72d26ff6f61bfc3d06a411cf40"}, {file = "types_requests-2.31.0.2-py3-none-any.whl", hash = "sha256:56d181c85b5925cbc59f4489a57e72a8b2166f18273fd8ba7b6fe0c0b986f12a"}, ] -types-s3transfer = [ + +[package.dependencies] +types-urllib3 = "*" + +[[package]] +name = "types-s3transfer" +version = "0.6.2" +description = "Type annotations and code completion for s3transfer" +optional = false +python-versions = ">=3.7,<4.0" +files = [ {file = "types_s3transfer-0.6.2-py3-none-any.whl", hash = "sha256:1068877b6e59be5226fa3006ae64371ac9d5bc590dfdbd9c66fd0a075d3254ac"}, {file = "types_s3transfer-0.6.2.tar.gz", hash = "sha256:4ba9b483796fdcd026aa162ee03bdcedd2bf7d08e9387c820dcdd158b0102057"}, ] -types-setuptools = [ + +[[package]] +name = "types-setuptools" +version = "68.1.0.1" +description = "Typing stubs for setuptools" +optional = false +python-versions = "*" +files = [ {file = "types-setuptools-68.1.0.1.tar.gz", hash = "sha256:271ed8da44885cd9a701c86e48cc6d3cc988052260e72b3ce26c26b3028f86ed"}, {file = "types_setuptools-68.1.0.1-py3-none-any.whl", hash = "sha256:a9a0d2ca1da8a15924890d464adcee4004deb07b6a99bd0b1881eac5c73cb3a7"}, ] -types-simplejson = [ + +[[package]] +name = "types-simplejson" +version = "3.19.0.2" +description = "Typing stubs for simplejson" +optional = false +python-versions = "*" +files = [ {file = "types-simplejson-3.19.0.2.tar.gz", hash = "sha256:ebc81f886f89d99d6b80c726518aa2228bc77c26438f18fd81455e4f79f8ee1b"}, {file = "types_simplejson-3.19.0.2-py3-none-any.whl", hash = "sha256:8ba093dc7884f59b3e62aed217144085e675a269debc32678fd80e0b43b2b86f"}, ] -types-sqlalchemy = [ + +[[package]] +name = "types-sqlalchemy" +version = "1.4.53.38" +description = "Typing stubs for SQLAlchemy" +optional = false +python-versions = "*" +files = [ {file = "types-SQLAlchemy-1.4.53.38.tar.gz", hash = "sha256:5bb7463537e04e1aa5a3557eb725930df99226dcfd3c9bf93008025bfe5c169e"}, {file = "types_SQLAlchemy-1.4.53.38-py3-none-any.whl", hash = "sha256:7e60e74f823931cc9a9e8adb0a4c05e5533e6708b8a266807893a739faf4eaaa"}, ] -types-tqdm = [ + +[[package]] +name = "types-tqdm" +version = "4.66.0.2" +description = "Typing stubs for tqdm" +optional = false +python-versions = "*" +files = [ {file = "types-tqdm-4.66.0.2.tar.gz", hash = "sha256:9553a5e44c1d485fce19f505b8bd65c0c3e87e870678d1f2ed764ae59a55d45f"}, {file = "types_tqdm-4.66.0.2-py3-none-any.whl", hash = "sha256:13dddd38908834abdf0acdc2b70cab7ac4bcc5ad7356ced450471662e58a0ffc"}, ] -types-urllib3 = [ + +[[package]] +name = "types-urllib3" +version = "1.26.25.14" +description = "Typing stubs for urllib3" +optional = false +python-versions = "*" +files = [ {file = "types-urllib3-1.26.25.14.tar.gz", hash = "sha256:229b7f577c951b8c1b92c1bc2b2fdb0b49847bd2af6d1cc2a2e3dd340f3bda8f"}, {file = "types_urllib3-1.26.25.14-py3-none-any.whl", hash = "sha256:9683bbb7fb72e32bfe9d2be6e04875fbe1b3eeec3cbb4ea231435aa7fd6b4f0e"}, ] -typing-extensions = [ + +[[package]] +name = "typing-extensions" +version = "4.7.1" +description = "Backported and Experimental Type Hints for Python 3.7+" +optional = false +python-versions = ">=3.7" +files = [ {file = "typing_extensions-4.7.1-py3-none-any.whl", hash = "sha256:440d5dd3af93b060174bf433bccd69b0babc3b15b1a8dca43789fd7f61514b36"}, {file = "typing_extensions-4.7.1.tar.gz", hash = "sha256:b75ddc264f0ba5615db7ba217daeb99701ad295353c45f9e95963337ceeeffb2"}, ] -tzdata = [ + +[[package]] +name = "tzdata" +version = "2023.3" +description = "Provider of IANA time zone data" +optional = false +python-versions = ">=2" +files = [ {file = "tzdata-2023.3-py2.py3-none-any.whl", hash = "sha256:7e65763eef3120314099b6939b5546db7adce1e7d6f2e179e3df563c70511eda"}, {file = "tzdata-2023.3.tar.gz", hash = "sha256:11ef1e08e54acb0d4f95bdb1be05da659673de4acbd21bf9c69e94cc5e907a3a"}, ] -uc-micro-py = [ + +[[package]] +name = "uc-micro-py" +version = "1.0.2" +description = "Micro subset of unicode data files for linkify-it-py projects." +optional = false +python-versions = ">=3.7" +files = [ {file = "uc-micro-py-1.0.2.tar.gz", hash = "sha256:30ae2ac9c49f39ac6dce743bd187fcd2b574b16ca095fa74cd9396795c954c54"}, {file = "uc_micro_py-1.0.2-py3-none-any.whl", hash = "sha256:8c9110c309db9d9e87302e2f4ad2c3152770930d88ab385cd544e7a7e75f3de0"}, ] -unicodecsv = [ + +[package.extras] +test = ["coverage", "pytest", "pytest-cov"] + +[[package]] +name = "unicodecsv" +version = "0.14.1" +description = "Python2's stdlib csv module is nice, but it doesn't support unicode. This module is a drop-in replacement which *does*." +optional = false +python-versions = "*" +files = [ {file = "unicodecsv-0.14.1.tar.gz", hash = "sha256:018c08037d48649a0412063ff4eda26eaa81eff1546dbffa51fa5293276ff7fc"}, ] -uritemplate = [ + +[[package]] +name = "uritemplate" +version = "4.1.1" +description = "Implementation of RFC 6570 URI Templates" +optional = false +python-versions = ">=3.6" +files = [ {file = "uritemplate-4.1.1-py2.py3-none-any.whl", hash = "sha256:830c08b8d99bdd312ea4ead05994a38e8936266f84b9a7878232db50b044e02e"}, {file = "uritemplate-4.1.1.tar.gz", hash = "sha256:4346edfc5c3b79f694bccd6d6099a322bbeb628dbf2cd86eea55a456ce5124f0"}, ] -urllib3 = [ + +[[package]] +name = "urllib3" +version = "1.26.16" +description = "HTTP library with thread-safe connection pooling, file post, and more." +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*" +files = [ {file = "urllib3-1.26.16-py2.py3-none-any.whl", hash = "sha256:8d36afa7616d8ab714608411b4a3b13e58f463aee519024578e062e141dce20f"}, {file = "urllib3-1.26.16.tar.gz", hash = "sha256:8f135f6502756bde6b2a9b28989df5fbe87c9970cecaa69041edcce7f0589b14"}, ] -validators = [ + +[package.extras] +brotli = ["brotli (>=1.0.9)", "brotlicffi (>=0.8.0)", "brotlipy (>=0.6.0)"] +secure = ["certifi", "cryptography (>=1.3.4)", "idna (>=2.0.0)", "ipaddress", "pyOpenSSL (>=0.14)", "urllib3-secure-extra"] +socks = ["PySocks (>=1.5.6,!=1.5.7,<2.0)"] + +[[package]] +name = "validators" +version = "0.21.0" +description = "Python Data Validation for Humans™" +optional = true +python-versions = ">=3.8,<4.0" +files = [ {file = "validators-0.21.0-py3-none-any.whl", hash = "sha256:3470db6f2384c49727ee319afa2e97aec3f8fad736faa6067e0fd7f9eaf2c551"}, {file = "validators-0.21.0.tar.gz", hash = "sha256:245b98ab778ed9352a7269c6a8f6c2a839bed5b2a7e3e60273ce399d247dd4b3"}, ] -watchdog = [ + +[[package]] +name = "watchdog" +version = "3.0.0" +description = "Filesystem events monitoring" +optional = false +python-versions = ">=3.7" +files = [ {file = "watchdog-3.0.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:336adfc6f5cc4e037d52db31194f7581ff744b67382eb6021c868322e32eef41"}, {file = "watchdog-3.0.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:a70a8dcde91be523c35b2bf96196edc5730edb347e374c7de7cd20c43ed95397"}, {file = "watchdog-3.0.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:adfdeab2da79ea2f76f87eb42a3ab1966a5313e5a69a0213a3cc06ef692b0e96"}, @@ -8762,23 +8609,79 @@ watchdog = [ {file = "watchdog-3.0.0-py3-none-win_ia64.whl", hash = "sha256:5d9f3a10e02d7371cd929b5d8f11e87d4bad890212ed3901f9b4d68767bee759"}, {file = "watchdog-3.0.0.tar.gz", hash = "sha256:4d98a320595da7a7c5a18fc48cb633c2e73cda78f93cac2ef42d42bf609a33f9"}, ] -wcwidth = [ + +[package.extras] +watchmedo = ["PyYAML (>=3.10)"] + +[[package]] +name = "wcwidth" +version = "0.2.6" +description = "Measures the displayed width of unicode strings in a terminal" +optional = false +python-versions = "*" +files = [ {file = "wcwidth-0.2.6-py2.py3-none-any.whl", hash = "sha256:795b138f6875577cd91bba52baf9e445cd5118fd32723b460e30a0af30ea230e"}, {file = "wcwidth-0.2.6.tar.gz", hash = "sha256:a5220780a404dbe3353789870978e472cfe477761f06ee55077256e509b156d0"}, ] -weaviate-client = [ + +[[package]] +name = "weaviate-client" +version = "3.23.2" +description = "A python native Weaviate client" +optional = true +python-versions = ">=3.8" +files = [ {file = "weaviate-client-3.23.2.tar.gz", hash = "sha256:1c8c94df032dd2fa5a4ea615fc69ccb983ffad5cc02974f78c793839e61ac150"}, {file = "weaviate_client-3.23.2-py3-none-any.whl", hash = "sha256:88ffc38cca07806d64726cc74bc194c7da50b222aa4e2cd129f4c1f5e53e9b61"}, ] -werkzeug = [ + +[package.dependencies] +authlib = ">=1.1.0" +requests = ">=2.28.0,<=2.31.0" +tqdm = ">=4.59.0,<5.0.0" +validators = ">=0.18.2,<=0.21.0" + +[package.extras] +grpc = ["grpcio", "grpcio-tools"] + +[[package]] +name = "werkzeug" +version = "2.3.7" +description = "The comprehensive WSGI web application library." +optional = false +python-versions = ">=3.8" +files = [ {file = "werkzeug-2.3.7-py3-none-any.whl", hash = "sha256:effc12dba7f3bd72e605ce49807bbe692bd729c3bb122a3b91747a6ae77df528"}, {file = "werkzeug-2.3.7.tar.gz", hash = "sha256:2b8c0e447b4b9dbcc85dd97b6eeb4dcbaf6c8b6c3be0bd654e25553e0a2157d8"}, ] -wheel = [ + +[package.dependencies] +MarkupSafe = ">=2.1.1" + +[package.extras] +watchdog = ["watchdog (>=2.3)"] + +[[package]] +name = "wheel" +version = "0.41.2" +description = "A built-package format for Python" +optional = false +python-versions = ">=3.7" +files = [ {file = "wheel-0.41.2-py3-none-any.whl", hash = "sha256:75909db2664838d015e3d9139004ee16711748a52c8f336b52882266540215d8"}, {file = "wheel-0.41.2.tar.gz", hash = "sha256:0c5ac5ff2afb79ac23ab82bab027a0be7b5dbcf2e54dc50efe4bf507de1f7985"}, ] -win-precise-time = [ + +[package.extras] +test = ["pytest (>=6.0.0)", "setuptools (>=65)"] + +[[package]] +name = "win-precise-time" +version = "1.4.2" +description = "" +optional = false +python-versions = ">=3.7" +files = [ {file = "win-precise-time-1.4.2.tar.gz", hash = "sha256:89274785cbc5f2997e01675206da3203835a442c60fd97798415c6b3c179c0b9"}, {file = "win_precise_time-1.4.2-cp310-cp310-win32.whl", hash = "sha256:7fa13a2247c2ef41cd5e9b930f40716eacc7fc1f079ea72853bd5613fe087a1a"}, {file = "win_precise_time-1.4.2-cp310-cp310-win_amd64.whl", hash = "sha256:bb8e44b0fc35fde268e8a781cdcd9f47d47abcd8089465d2d1d1063976411c8e"}, @@ -8793,7 +8696,14 @@ win-precise-time = [ {file = "win_precise_time-1.4.2-cp39-cp39-win32.whl", hash = "sha256:50d11a6ff92e1be96a8d4bee99ff6dc07a0ea0e2a392b0956bb2192e334f41ba"}, {file = "win_precise_time-1.4.2-cp39-cp39-win_amd64.whl", hash = "sha256:3f510fa92d9c39ea533c983e1d62c7bc66fdf0a3e3c3bdda48d4ebb634ff7034"}, ] -wrapt = [ + +[[package]] +name = "wrapt" +version = "1.15.0" +description = "Module for decorators, wrappers and monkey patching." +optional = false +python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,>=2.7" +files = [ {file = "wrapt-1.15.0-cp27-cp27m-macosx_10_9_x86_64.whl", hash = "sha256:ca1cccf838cd28d5a0883b342474c630ac48cac5df0ee6eacc9c7290f76b11c1"}, {file = "wrapt-1.15.0-cp27-cp27m-manylinux1_i686.whl", hash = "sha256:e826aadda3cae59295b95343db8f3d965fb31059da7de01ee8d1c40a60398b29"}, {file = "wrapt-1.15.0-cp27-cp27m-manylinux1_x86_64.whl", hash = "sha256:5fc8e02f5984a55d2c653f5fea93531e9836abbd84342c1d1e17abc4a15084c2"}, @@ -8870,15 +8780,45 @@ wrapt = [ {file = "wrapt-1.15.0-py3-none-any.whl", hash = "sha256:64b1df0f83706b4ef4cfb4fb0e4c2669100fd7ecacfb59e091fad300d4e04640"}, {file = "wrapt-1.15.0.tar.gz", hash = "sha256:d06730c6aed78cee4126234cf2d071e01b44b915e725a6cb439a879ec9754a3a"}, ] -wtforms = [ + +[[package]] +name = "wtforms" +version = "3.0.1" +description = "Form validation and rendering for Python web development." +optional = false +python-versions = ">=3.7" +files = [ {file = "WTForms-3.0.1-py3-none-any.whl", hash = "sha256:837f2f0e0ca79481b92884962b914eba4e72b7a2daaf1f939c890ed0124b834b"}, {file = "WTForms-3.0.1.tar.gz", hash = "sha256:6b351bbb12dd58af57ffef05bc78425d08d1914e0fd68ee14143b7ade023c5bc"}, ] -yapf = [ + +[package.dependencies] +MarkupSafe = "*" + +[package.extras] +email = ["email-validator"] + +[[package]] +name = "yapf" +version = "0.33.0" +description = "A formatter for Python code." +optional = false +python-versions = "*" +files = [ {file = "yapf-0.33.0-py2.py3-none-any.whl", hash = "sha256:4c2b59bd5ffe46f3a7da48df87596877189148226ce267c16e8b44240e51578d"}, {file = "yapf-0.33.0.tar.gz", hash = "sha256:da62bdfea3df3673553351e6246abed26d9fe6780e548a5af9e70f6d2b4f5b9a"}, ] -yarl = [ + +[package.dependencies] +tomli = ">=2.0.1" + +[[package]] +name = "yarl" +version = "1.9.2" +description = "Yet another URL library" +optional = false +python-versions = ">=3.7" +files = [ {file = "yarl-1.9.2-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:8c2ad583743d16ddbdf6bb14b5cd76bf43b0d0006e918809d5d4ddf7bde8dd82"}, {file = "yarl-1.9.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:82aa6264b36c50acfb2424ad5ca537a2060ab6de158a5bd2a72a032cc75b9eb8"}, {file = "yarl-1.9.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:c0c77533b5ed4bcc38e943178ccae29b9bcf48ffd1063f5821192f23a1bd27b9"}, @@ -8954,7 +8894,49 @@ yarl = [ {file = "yarl-1.9.2-cp39-cp39-win_amd64.whl", hash = "sha256:61016e7d582bc46a5378ffdd02cd0314fb8ba52f40f9cf4d9a5e7dbef88dee18"}, {file = "yarl-1.9.2.tar.gz", hash = "sha256:04ab9d4b9f587c06d801c2abfe9317b77cdf996c65a90d5e84ecc45010823571"}, ] -zipp = [ + +[package.dependencies] +idna = ">=2.0" +multidict = ">=4.0" + +[[package]] +name = "zipp" +version = "3.16.2" +description = "Backport of pathlib-compatible object wrapper for zip files" +optional = false +python-versions = ">=3.8" +files = [ {file = "zipp-3.16.2-py3-none-any.whl", hash = "sha256:679e51dd4403591b2d6838a48de3d283f3d188412a9782faadf845f298736ba0"}, {file = "zipp-3.16.2.tar.gz", hash = "sha256:ebc15946aa78bd63458992fc81ec3b6f7b1e92d51c35e6de1c3804e73b799147"}, ] + +[package.extras] +docs = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-lint"] +testing = ["big-O", "jaraco.functools", "jaraco.itertools", "more-itertools", "pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-ignore-flaky", "pytest-mypy (>=0.9.1)", "pytest-ruff"] + +[extras] +athena = ["botocore", "pyarrow", "pyathena", "s3fs"] +az = ["adlfs"] +bigquery = ["gcsfs", "google-cloud-bigquery", "grpcio", "pyarrow"] +cli = ["cron-descriptor", "pipdeptree"] +databricks = ["databricks-sql-connector"] +dbt = ["dbt-athena-community", "dbt-bigquery", "dbt-core", "dbt-databricks", "dbt-duckdb", "dbt-redshift", "dbt-snowflake"] +duckdb = ["duckdb"] +filesystem = ["botocore", "s3fs"] +gcp = ["gcsfs", "google-cloud-bigquery", "grpcio"] +gs = ["gcsfs"] +motherduck = ["duckdb", "pyarrow"] +mssql = ["pyodbc"] +parquet = ["pyarrow"] +postgres = ["psycopg2-binary", "psycopg2cffi"] +qdrant = ["qdrant-client"] +redshift = ["psycopg2-binary", "psycopg2cffi"] +s3 = ["botocore", "s3fs"] +snowflake = ["snowflake-connector-python"] +synapse = ["adlfs", "pyarrow", "pyodbc"] +weaviate = ["weaviate-client"] + +[metadata] +lock-version = "2.0" +python-versions = ">=3.8.1,<3.13" +content-hash = "4c9aa9a9d0059390f52f96bb8b5cf20a555ed54e4d7b8ac8bcb1ffefd85808f5" diff --git a/pyproject.toml b/pyproject.toml index af77df148c..440df139dd 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -58,7 +58,7 @@ psycopg2cffi = {version = ">=2.9.0", optional = true, markers="platform_python_i grpcio = {version = ">=1.50.0", optional = true} google-cloud-bigquery = {version = ">=2.26.0", optional = true} pyarrow = {version = ">=12.0.0", optional = true} -duckdb = {version = ">=0.6.1,<0.10.0", optional = true} +duckdb = {version = ">=0.6.1,<0.11.0", optional = true} dbt-core = {version = ">=1.2.0", optional = true} dbt-redshift = {version = ">=1.2.0", optional = true} dbt-bigquery = {version = ">=1.2.0", optional = true} @@ -106,6 +106,10 @@ databricks = ["databricks-sql-connector"] dlt = "dlt.cli._dlt:_main" [tool.poetry.group.dev.dependencies] +cffi = "^1.16" +greenlet = "^3.0.3" +regex = "^2023.10" +pendulum = ">=3" requests-mock = "^1.10.0" types-click = "^7.1.8" sqlfluff = "^2.3.2" @@ -148,7 +152,10 @@ tqdm = "^4.65.0" enlighten = "^1.11.2" alive-progress = "^3.1.1" pydantic = ">2" -pandas = ">2" +pandas = [ + {version = ">2.1", markers = "python_version >= '3.9'"}, + {version = "<2.1", markers = "python_version < '3.9'"} +] [tool.poetry.group.airflow] optional = true From 83dc38a46be9487e042c3d5ce0efbbe9028023eb Mon Sep 17 00:00:00 2001 From: Marcin Rudolf Date: Wed, 21 Feb 2024 01:17:06 +0100 Subject: [PATCH 005/105] adds py 12 common tests --- .github/workflows/test_common.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/test_common.yml b/.github/workflows/test_common.yml index dd7c7db2fe..6ec1212d1a 100644 --- a/.github/workflows/test_common.yml +++ b/.github/workflows/test_common.yml @@ -32,6 +32,8 @@ jobs: os: "ubuntu-latest" - python-version: "3.10.x" os: "ubuntu-latest" + - python-version: "3.12.x" + os: "ubuntu-latest" defaults: run: From 21ebfee642ca7e738d87300de07c5bbc1a5b9e6b Mon Sep 17 00:00:00 2001 From: Marcin Rudolf Date: Wed, 21 Feb 2024 01:17:37 +0100 Subject: [PATCH 006/105] fixes typings after deps bump --- dlt/common/data_types/type_helpers.py | 6 +++--- dlt/common/time.py | 8 +++++--- dlt/pipeline/pipeline.py | 2 +- dlt/pipeline/track.py | 2 +- pytest.ini | 2 +- tests/common/schema/test_detections.py | 3 ++- tests/common/storages/test_load_storage.py | 2 +- tests/common/storages/utils.py | 2 +- 8 files changed, 15 insertions(+), 12 deletions(-) diff --git a/dlt/common/data_types/type_helpers.py b/dlt/common/data_types/type_helpers.py index 9e1cd2278d..a721c9e0bc 100644 --- a/dlt/common/data_types/type_helpers.py +++ b/dlt/common/data_types/type_helpers.py @@ -81,13 +81,13 @@ def coerce_from_date_types( if to_type == "text": return v.isoformat() if to_type == "bigint": - return v.int_timestamp # type: ignore + return v.int_timestamp if to_type == "double": - return v.timestamp() # type: ignore + return v.timestamp() if to_type == "date": return ensure_pendulum_date(v) if to_type == "time": - return v.time() # type: ignore[no-any-return] + return v.time() raise TypeError(f"Cannot convert timestamp to {to_type}") diff --git a/dlt/common/time.py b/dlt/common/time.py index 4f4dd05ef0..c06e2e2581 100644 --- a/dlt/common/time.py +++ b/dlt/common/time.py @@ -58,7 +58,9 @@ def parse_iso_like_datetime(value: Any) -> Union[pendulum.DateTime, pendulum.Dat return pendulum.time(dtv.hour, dtv.minute, dtv.second, dtv.microsecond) if isinstance(dtv, datetime.datetime): return pendulum.instance(dtv) - return pendulum.date(dtv.year, dtv.month, dtv.day) + if isinstance(dtv, pendulum.Duration): + raise ValueError("Interval ISO 8601 not supported: " + value) + return pendulum.date(dtv.year, dtv.month, dtv.day) # type: ignore[union-attr] def ensure_pendulum_date(value: TAnyDateTime) -> pendulum.Date: @@ -75,7 +77,7 @@ def ensure_pendulum_date(value: TAnyDateTime) -> pendulum.Date: if isinstance(value, datetime.datetime): # both py datetime and pendulum datetime are handled here value = pendulum.instance(value) - return value.in_tz(UTC).date() # type: ignore + return value.in_tz(UTC).date() elif isinstance(value, datetime.date): return pendulum.date(value.year, value.month, value.day) elif isinstance(value, (int, float, str)): @@ -83,7 +85,7 @@ def ensure_pendulum_date(value: TAnyDateTime) -> pendulum.Date: if isinstance(result, datetime.time): raise ValueError(f"Cannot coerce {value} to a pendulum.DateTime object.") if isinstance(result, pendulum.DateTime): - return result.in_tz(UTC).date() # type: ignore + return result.in_tz(UTC).date() return pendulum.date(result.year, result.month, result.day) raise TypeError(f"Cannot coerce {value} to a pendulum.DateTime object.") diff --git a/dlt/pipeline/pipeline.py b/dlt/pipeline/pipeline.py index 73c8f076d1..242c2e9a05 100644 --- a/dlt/pipeline/pipeline.py +++ b/dlt/pipeline/pipeline.py @@ -1314,7 +1314,7 @@ def _set_default_schema_name(self, schema: Schema) -> None: self.default_schema_name = schema.name def _create_pipeline_instance_id(self) -> str: - return pendulum.now().format("_YYYYMMDDhhmmss") # type: ignore + return pendulum.now().format("_YYYYMMDDhhmmss") @with_schemas_sync @with_state_sync() diff --git a/dlt/pipeline/track.py b/dlt/pipeline/track.py index 9a9deee017..990c59050e 100644 --- a/dlt/pipeline/track.py +++ b/dlt/pipeline/track.py @@ -36,7 +36,7 @@ def slack_notify_load_success(incoming_hook: str, load_info: LoadInfo, trace: Pi if author: author = f":hard-hat:{author}'s " - total_elapsed = pendulum.now() - trace.started_at + total_elapsed = pendulum.now().diff(trace.started_at) def _get_step_elapsed(step: PipelineStepTrace) -> str: if not step: diff --git a/pytest.ini b/pytest.ini index 81f5451239..0f9f6ab0d8 100644 --- a/pytest.ini +++ b/pytest.ini @@ -1,5 +1,5 @@ [pytest] -pythonpath=dlt +pythonpath= dlt docs/website/docs norecursedirs= .direnv .eggs build dist addopts= -v --showlocals --durations 10 xfail_strict= true diff --git a/tests/common/schema/test_detections.py b/tests/common/schema/test_detections.py index cba2767c94..61ce0ede45 100644 --- a/tests/common/schema/test_detections.py +++ b/tests/common/schema/test_detections.py @@ -27,7 +27,8 @@ def test_iso_timestamp_detection() -> None: assert is_iso_timestamp(str, str(pendulum.now())) == "timestamp" assert is_iso_timestamp(str, "1975-05-21T22:00:00Z") == "timestamp" assert is_iso_timestamp(str, "2022-06-01T00:48:35.040Z") == "timestamp" - assert is_iso_timestamp(str, "1975-0521T22:00:00Z") == "timestamp" + # newer pendulum does not accept this format 🤷 + # assert is_iso_timestamp(str, "1975-0521T22:00:00Z") == "timestamp" assert is_iso_timestamp(str, "2021-07-24 10:51") == "timestamp" # dates and times are not accepted assert is_iso_timestamp(str, "1975-05-21") is None diff --git a/tests/common/storages/test_load_storage.py b/tests/common/storages/test_load_storage.py index e239ec30a2..0fe112581e 100644 --- a/tests/common/storages/test_load_storage.py +++ b/tests/common/storages/test_load_storage.py @@ -97,7 +97,7 @@ def test_complete_package_failed_jobs(load_storage: LoadStorage) -> None: assert failed_info[0].job_file_info.table_name == "mock_table" # a few stats assert failed_info[0].file_size == 32 - assert (pendulum.now() - failed_info[0].created_at).seconds < 2 + assert (pendulum.now().diff(failed_info[0].created_at)).seconds < 2 assert failed_info[0].elapsed < 2 package_info = load_storage.get_load_package_info(load_id) diff --git a/tests/common/storages/utils.py b/tests/common/storages/utils.py index 02258853f1..3158e84c24 100644 --- a/tests/common/storages/utils.py +++ b/tests/common/storages/utils.py @@ -136,7 +136,7 @@ def assert_package_info( if package_state == "normalized": assert package_info.completed_at is None else: - assert (pendulum.now() - package_info.completed_at).seconds < 2 + assert (pendulum.now().diff(package_info.completed_at).seconds) < 2 # get dict package_info.asdict() return package_info From 7985f9d9104840aa0c6228ee3c612e629c7ae7a5 Mon Sep 17 00:00:00 2001 From: Marcin Rudolf Date: Wed, 21 Feb 2024 03:18:04 +0100 Subject: [PATCH 007/105] bumps airflow, yanks duckdb to 0.9.2 --- poetry.lock | 361 ++++++++++++++++++++++++------------------------- pyproject.toml | 8 +- 2 files changed, 182 insertions(+), 187 deletions(-) diff --git a/poetry.lock b/poetry.lock index 62f7a4892e..b25524cfb8 100644 --- a/poetry.lock +++ b/poetry.lock @@ -58,24 +58,24 @@ test = ["PyICU (>=2.4.2)", "coverage (>=3.7.1)", "cssselect (>=0.9.1)", "lxml (> [[package]] name = "aiobotocore" -version = "2.5.2" +version = "2.11.2" description = "Async client for aws services using botocore and aiohttp" optional = true -python-versions = ">=3.7" +python-versions = ">=3.8" files = [ - {file = "aiobotocore-2.5.2-py3-none-any.whl", hash = "sha256:337429ffd3cc367532572d40be809a84c7b5335f3f8eca2f23e09dfaa9a9ef90"}, - {file = "aiobotocore-2.5.2.tar.gz", hash = "sha256:e7399f21570db1c287f1c0c814dd3475dfe1c8166722e2c77ce67f172cbcfa89"}, + {file = "aiobotocore-2.11.2-py3-none-any.whl", hash = "sha256:487fede588040bfa3a43df945275c28c1c73ca75bf705295adb9fbadd2e89be7"}, + {file = "aiobotocore-2.11.2.tar.gz", hash = "sha256:6dd7352248e3523019c5a54a395d2b1c31080697fc80a9ad2672de4eec8c7abd"}, ] [package.dependencies] -aiohttp = ">=3.3.1,<4.0.0" +aiohttp = ">=3.7.4.post0,<4.0.0" aioitertools = ">=0.5.1,<1.0.0" -botocore = ">=1.29.161,<1.29.162" +botocore = ">=1.33.2,<1.34.35" wrapt = ">=1.10.10,<2.0.0" [package.extras] -awscli = ["awscli (>=1.27.161,<1.27.162)"] -boto3 = ["boto3 (>=1.26.161,<1.26.162)"] +awscli = ["awscli (>=1.31.2,<1.32.35)"] +boto3 = ["boto3 (>=1.33.2,<1.34.35)"] [[package]] name = "aiohttp" @@ -297,17 +297,18 @@ trio = ["trio (>=0.22)"] [[package]] name = "apache-airflow" -version = "2.7.2" +version = "2.8.1" description = "Programmatically author, schedule and monitor data pipelines" optional = false -python-versions = "~=3.8" +python-versions = "<3.12,~=3.8" files = [ - {file = "apache-airflow-2.7.2.tar.gz", hash = "sha256:c6fab3449066867d9a7728f40b6b9e27f1ea68bca39b064a27f5c5ddc3262224"}, - {file = "apache_airflow-2.7.2-py3-none-any.whl", hash = "sha256:1bc2c022bcae24b911e49fafd5fb619b49efba87ed7bc8561a2065810d8fe899"}, + {file = "apache_airflow-2.8.1-py3-none-any.whl", hash = "sha256:8178b3fd22a8766beb2e2972352f37402994a2ea4356106a6763e05807efaa88"}, + {file = "apache_airflow-2.8.1.tar.gz", hash = "sha256:7443d82b790886c5ec137a8fdb94d672e33e81336713ca7320b4a1bbad443a9c"}, ] [package.dependencies] alembic = ">=1.6.3,<2.0" +apache-airflow-providers-common-io = "*" apache-airflow-providers-common-sql = "*" apache-airflow-providers-ftp = "*" apache-airflow-providers-http = "*" @@ -317,23 +318,22 @@ argcomplete = ">=1.10" asgiref = "*" attrs = ">=22.1.0" blinker = "*" -cattrs = ">=22.1.0" colorlog = ">=4.0.2,<5.0" configupdater = ">=3.1.1" -connexion = {version = ">=2.10.0", extras = ["flask"]} +connexion = {version = ">=2.10.0,<3.0", extras = ["flask"]} cron-descriptor = ">=1.2.24" croniter = ">=0.3.17" cryptography = ">=0.9.3" deprecated = ">=1.2.13" dill = ">=0.2.2" flask = ">=2.2,<2.3" -flask-appbuilder = "4.3.6" +flask-appbuilder = "4.3.10" flask-caching = ">=1.5.0" flask-login = ">=0.6.2" flask-session = ">=0.4.0" flask-wtf = ">=0.15" +fsspec = ">=2023.10.0" google-re2 = ">=1.0" -graphviz = ">=0.12" gunicorn = ">=20.1.0" httpx = "*" importlib-metadata = {version = ">=1.7", markers = "python_version < \"3.9\""} @@ -353,10 +353,10 @@ opentelemetry-api = ">=1.15.0" opentelemetry-exporter-otlp = "*" packaging = ">=14.0" pathspec = ">=0.9.0" -pendulum = ">=2.0" +pendulum = ">=2.1.2,<4.0" pluggy = ">=1.0" psutil = ">=4.2.0" -pydantic = ">=1.10.0" +pydantic = ">=2.3.0" pygments = ">=2.0.1" pyjwt = ">=2.0.0" python-daemon = ">=3.0.0" @@ -372,16 +372,17 @@ sqlalchemy-jsonfield = ">=1.0" tabulate = ">=0.7.5" tenacity = ">=6.2.0,<8.2.0 || >8.2.0" termcolor = ">=1.1.0" -typing-extensions = ">=4.0.0" unicodecsv = ">=0.14.1" -werkzeug = ">=2.0" +universal-pathlib = ">=0.1.4" +werkzeug = ">=2.0,<3" [package.extras] -aiobotocore = ["aiobotocore (>=2.1.1)"] +aiobotocore = ["aiobotocore (>=2.7.0)"] airbyte = ["apache-airflow-providers-airbyte"] alibaba = ["apache-airflow-providers-alibaba"] -all = ["PyGithub (!=1.58)", "PyOpenSSL", "adal (>=1.2.7)", "aiobotocore (>=2.1.1)", "aiohttp", "aiohttp (>=3.6.3,<4)", "alibabacloud-adb20211201 (>=1.0.0)", "alibabacloud-tea-openapi (>=0.3.7)", "amqp", "analytics-python (>=1.2.9)", "apache-airflow (>=2.4.0)", "apache-airflow (>=2.7.0)", "apache-airflow-providers-airbyte", "apache-airflow-providers-alibaba", "apache-airflow-providers-amazon", "apache-airflow-providers-apache-beam", "apache-airflow-providers-apache-cassandra", "apache-airflow-providers-apache-drill", "apache-airflow-providers-apache-druid", "apache-airflow-providers-apache-flink", "apache-airflow-providers-apache-hdfs", "apache-airflow-providers-apache-hive", "apache-airflow-providers-apache-impala", "apache-airflow-providers-apache-kafka", "apache-airflow-providers-apache-kylin", "apache-airflow-providers-apache-livy", "apache-airflow-providers-apache-pig", "apache-airflow-providers-apache-pinot", "apache-airflow-providers-apache-spark", "apache-airflow-providers-apache-sqoop", "apache-airflow-providers-apprise", "apache-airflow-providers-arangodb", "apache-airflow-providers-asana", "apache-airflow-providers-atlassian-jira", "apache-airflow-providers-celery", "apache-airflow-providers-cloudant", "apache-airflow-providers-cncf-kubernetes", "apache-airflow-providers-common-sql", "apache-airflow-providers-daskexecutor", "apache-airflow-providers-databricks", "apache-airflow-providers-datadog", "apache-airflow-providers-dbt-cloud", "apache-airflow-providers-dingding", "apache-airflow-providers-discord", "apache-airflow-providers-docker", "apache-airflow-providers-elasticsearch", "apache-airflow-providers-exasol", "apache-airflow-providers-facebook", "apache-airflow-providers-ftp", "apache-airflow-providers-github", "apache-airflow-providers-google", "apache-airflow-providers-grpc", "apache-airflow-providers-hashicorp", "apache-airflow-providers-http", "apache-airflow-providers-imap", "apache-airflow-providers-influxdb", "apache-airflow-providers-jdbc", "apache-airflow-providers-jenkins", "apache-airflow-providers-microsoft-azure", "apache-airflow-providers-microsoft-mssql", "apache-airflow-providers-microsoft-psrp", "apache-airflow-providers-microsoft-winrm", "apache-airflow-providers-mongo", "apache-airflow-providers-mysql", "apache-airflow-providers-neo4j", "apache-airflow-providers-odbc", "apache-airflow-providers-openfaas", "apache-airflow-providers-openlineage", "apache-airflow-providers-opsgenie", "apache-airflow-providers-oracle", "apache-airflow-providers-pagerduty", "apache-airflow-providers-papermill", "apache-airflow-providers-plexus", "apache-airflow-providers-postgres", "apache-airflow-providers-presto", "apache-airflow-providers-redis", "apache-airflow-providers-salesforce", "apache-airflow-providers-samba", "apache-airflow-providers-segment", "apache-airflow-providers-sendgrid", "apache-airflow-providers-sftp", "apache-airflow-providers-singularity", "apache-airflow-providers-slack", "apache-airflow-providers-smtp", "apache-airflow-providers-snowflake", "apache-airflow-providers-sqlite", "apache-airflow-providers-ssh", "apache-airflow-providers-tableau", "apache-airflow-providers-tabular", "apache-airflow-providers-telegram", "apache-airflow-providers-trino", "apache-airflow-providers-vertica", "apache-airflow-providers-zendesk", "apache-beam (>=2.47.0)", "apprise", "arrow (>=0.16.0)", "asana (>=0.10,<4.0.0)", "asgiref", "asgiref (>=3.5.2)", "atlasclient (>=0.1.2)", "atlassian-python-api (>=1.14.2)", "attrs (>=22.2)", "authlib (>=1.0.0)", "azure-batch (>=8.0.0)", "azure-cosmos (>=4.0.0)", "azure-datalake-store (>=0.0.45)", "azure-identity (>=1.3.1)", "azure-keyvault-secrets (>=4.1.0)", "azure-kusto-data (>=0.0.43,<0.1)", "azure-mgmt-containerinstance (>=1.5.0,<2.0)", "azure-mgmt-datafactory (>=1.0.0,<2.0)", "azure-mgmt-datalake-store (>=0.5.0)", "azure-mgmt-resource (>=2.2.0)", "azure-servicebus (>=7.6.1)", "azure-storage-blob (>=12.14.0)", "azure-storage-common (>=2.1.0)", "azure-storage-file (>=2.1.0)", "azure-storage-file-datalake (>=12.9.1)", "azure-synapse-spark", "bcrypt (>=2.0.0)", "blinker (>=1.1)", "boto3 (>=1.28.0)", "botocore (>=1.31.0)", "cassandra-driver (>=3.13.0)", "celery (>=5.3.0,!=5.3.2,!=5.3.3,<6)", "cgroupspy (>=0.2.2)", "cloudant (>=2.0)", "cloudpickle (>=1.4.1)", "confluent-kafka (>=1.8.2)", "cryptography (>=2.0.0)", "dask (>=2.9.0,!=2022.10.1,!=2023.5.0)", "databricks-sql-connector (>=2.0.0,<3.0.0)", "datadog (>=0.14.0)", "distributed (>=2.11.1,!=2023.5.0)", "dnspython (>=1.13.0)", "docker (>=5.0.3)", "elasticsearch (>8,<9)", "eventlet (>=0.33.3)", "facebook-business (>=6.0.2)", "flask-appbuilder[oauth] (==4.3.6)", "flask-bcrypt (>=0.7.1)", "flower (>=1.0.0)", "gcloud-aio-auth (>=4.0.0,<5.0.0)", "gcloud-aio-bigquery (>=6.1.2)", "gcloud-aio-storage", "gevent (>=0.13)", "google-ads (>=21.2.0)", "google-api-core (>=2.11.0)", "google-api-python-client (>=1.6.0)", "google-auth (>=1.0.0)", "google-auth (>=1.0.0,<3.0.0)", "google-auth-httplib2 (>=0.0.1)", "google-cloud-aiplatform (>=1.22.1)", "google-cloud-automl (>=2.11.0)", "google-cloud-bigquery-datatransfer (>=3.11.0)", "google-cloud-bigtable (>=2.17.0)", "google-cloud-build (>=3.13.0)", "google-cloud-compute (>=1.10.0)", "google-cloud-container (>=2.17.4)", "google-cloud-datacatalog (>=3.11.1)", "google-cloud-dataflow-client (>=0.8.2)", "google-cloud-dataform (>=0.5.0)", "google-cloud-dataplex (>=1.4.2)", "google-cloud-dataproc (>=5.4.0)", "google-cloud-dataproc-metastore (>=1.12.0)", "google-cloud-dlp (>=3.12.0)", "google-cloud-kms (>=2.15.0)", "google-cloud-language (>=2.9.0)", "google-cloud-logging (>=3.5.0)", "google-cloud-memcache (>=1.7.0)", "google-cloud-monitoring (>=2.14.1)", "google-cloud-orchestration-airflow (>=1.7.0)", "google-cloud-os-login (>=2.9.1)", "google-cloud-pubsub (>=2.15.0)", "google-cloud-redis (>=2.12.0)", "google-cloud-secret-manager (>=2.16.0)", "google-cloud-spanner (>=3.11.1)", "google-cloud-speech (>=2.18.0)", "google-cloud-storage (>=2.7.0)", "google-cloud-storage-transfer (>=1.4.1)", "google-cloud-tasks (>=2.13.0)", "google-cloud-texttospeech (>=2.14.1)", "google-cloud-translate (>=3.11.0)", "google-cloud-videointelligence (>=2.11.0)", "google-cloud-vision (>=3.4.0)", "google-cloud-workflows (>=1.10.0)", "greenlet (>=0.4.9)", "grpcio (>=1.15.0)", "grpcio-gcp (>=0.2.2)", "hdfs[avro,dataframe,kerberos] (>=2.0.4)", "hmsclient (>=0.1.0)", "httpx", "hvac (>=0.10)", "impyla (>=0.18.0,<1.0)", "influxdb-client (>=1.19.0)", "jaydebeapi (>=1.1.1)", "json-merge-patch (>=0.2)", "jsonpath-ng (>=1.5.3)", "kubernetes (>=21.7.0,<24)", "kubernetes-asyncio (>=18.20.1,<25)", "kylinpy (>=2.6)", "ldap3 (>=2.5.1)", "looker-sdk (>=22.2.0)", "mysqlclient (>=1.3.6)", "neo4j (>=4.2.1)", "openlineage-integration-common (>=0.28.0)", "openlineage-python (>=0.28.0)", "opentelemetry-exporter-prometheus", "opsgenie-sdk (>=2.1.5)", "oracledb (>=1.0.0)", "oss2 (>=2.14.0)", "pandas (>=0.17.1)", "pandas-gbq", "papermill[all] (>=1.2.1)", "paramiko (>=2.6.0)", "pdpyras (>=4.1.2)", "pinotdb (>0.4.7)", "plyvel", "presto-python-client (>=0.8.2)", "proto-plus (>=1.19.6)", "psycopg2-binary (>=2.8.0)", "pyarrow (>=9.0.0)", "pydruid (>=0.4.1)", "pyexasol (>=0.5.1)", "pyhive[hive-pure-sasl] (>=0.7.0)", "pykerberos (>=1.1.13)", "pymongo (>=3.6.0)", "pymssql (>=2.1.5)", "pyodbc", "pypsrp (>=0.8.0)", "pyspark", "python-arango (>=7.3.2)", "python-dotenv (>=0.21.0)", "python-jenkins (>=1.0.0)", "python-ldap", "python-telegram-bot (>=20.0.0)", "pywinrm (>=0.4)", "redis (>=4.5.2,!=4.5.5,<5.0.0)", "redshift-connector (>=2.0.888)", "requests (>=2.26.0)", "requests (>=2.27,<3)", "requests-kerberos (>=0.10.0)", "requests-toolbelt", "scrapbook[all]", "sendgrid (>=6.0.0)", "sentry-sdk (>=0.8.0)", "simple-salesforce (>=1.0.0)", "slack-sdk (>=3.0.0)", "smbprotocol (>=1.5.0)", "snowflake-connector-python (>=2.4.1)", "snowflake-sqlalchemy (>=1.1.0)", "spython (>=0.0.56)", "sqlalchemy-bigquery (>=1.2.1)", "sqlalchemy-drill (>=1.1.0)", "sqlalchemy-redshift (>=0.8.6)", "sqlalchemy-spanner (>=1.6.2)", "sqlparse (>=0.4.2)", "sshtunnel (>=0.3.2)", "statsd (>=3.3.0)", "tableauserverclient", "thrift (>=0.9.2)", "thrift-sasl (>=0.2.0)", "trino (>=0.318.0)", "vertica-python (>=0.5.1)", "virtualenv", "watchtower (>=2.0.1,<2.1.0)", "zenpy (>=2.0.24)"] -all-dbs = ["aiohttp (>=3.6.3,<4)", "apache-airflow (>=2.4.0)", "apache-airflow-providers-apache-cassandra", "apache-airflow-providers-apache-drill", "apache-airflow-providers-apache-druid", "apache-airflow-providers-apache-hdfs", "apache-airflow-providers-apache-hive", "apache-airflow-providers-apache-impala", "apache-airflow-providers-apache-pinot", "apache-airflow-providers-arangodb", "apache-airflow-providers-cloudant", "apache-airflow-providers-common-sql (>=1.3.1)", "apache-airflow-providers-common-sql (>=1.5.0)", "apache-airflow-providers-databricks", "apache-airflow-providers-exasol", "apache-airflow-providers-influxdb", "apache-airflow-providers-microsoft-mssql", "apache-airflow-providers-mongo", "apache-airflow-providers-mysql", "apache-airflow-providers-neo4j", "apache-airflow-providers-postgres", "apache-airflow-providers-presto", "apache-airflow-providers-trino", "apache-airflow-providers-vertica", "cassandra-driver (>=3.13.0)", "cloudant (>=2.0)", "databricks-sql-connector (>=2.0.0,<3.0.0)", "dnspython (>=1.13.0)", "hdfs[avro,dataframe,kerberos] (>=2.0.4)", "hmsclient (>=0.1.0)", "impyla (>=0.18.0,<1.0)", "influxdb-client (>=1.19.0)", "mysqlclient (>=1.3.6)", "neo4j (>=4.2.1)", "pandas (>=0.17.1)", "pinotdb (>0.4.7)", "presto-python-client (>=0.8.2)", "psycopg2-binary (>=2.8.0)", "pydruid (>=0.4.1)", "pyexasol (>=0.5.1)", "pyhive[hive-pure-sasl] (>=0.7.0)", "pymongo (>=3.6.0)", "pymssql (>=2.1.5)", "python-arango (>=7.3.2)", "requests (>=2.26.0)", "requests (>=2.27,<3)", "sqlalchemy-drill (>=1.1.0)", "thrift (>=0.9.2)", "trino (>=0.318.0)", "vertica-python (>=0.5.1)"] +all = ["apache-airflow[aiobotocore]", "apache-airflow[airbyte]", "apache-airflow[alibaba]", "apache-airflow[all-core]", "apache-airflow[amazon]", "apache-airflow[apache-atlas]", "apache-airflow[apache-beam]", "apache-airflow[apache-cassandra]", "apache-airflow[apache-drill]", "apache-airflow[apache-druid]", "apache-airflow[apache-flink]", "apache-airflow[apache-hdfs]", "apache-airflow[apache-hive]", "apache-airflow[apache-impala]", "apache-airflow[apache-kafka]", "apache-airflow[apache-kylin]", "apache-airflow[apache-livy]", "apache-airflow[apache-pig]", "apache-airflow[apache-pinot]", "apache-airflow[apache-spark]", "apache-airflow[apache-sqoop]", "apache-airflow[apache-webhdfs]", "apache-airflow[apprise]", "apache-airflow[arangodb]", "apache-airflow[asana]", "apache-airflow[async]", "apache-airflow[atlassian-jira]", "apache-airflow[celery]", "apache-airflow[cgroups]", "apache-airflow[cloudant]", "apache-airflow[cncf-kubernetes]", "apache-airflow[cohere]", "apache-airflow[common-io]", "apache-airflow[common-sql]", "apache-airflow[daskexecutor]", "apache-airflow[databricks]", "apache-airflow[datadog]", "apache-airflow[dbt-cloud]", "apache-airflow[deprecated-api]", "apache-airflow[dingding]", "apache-airflow[discord]", "apache-airflow[docker]", "apache-airflow[elasticsearch]", "apache-airflow[exasol]", "apache-airflow[facebook]", "apache-airflow[ftp]", "apache-airflow[github-enterprise]", "apache-airflow[github]", "apache-airflow[google-auth]", "apache-airflow[google]", "apache-airflow[graphviz]", "apache-airflow[grpc]", "apache-airflow[hashicorp]", "apache-airflow[http]", "apache-airflow[imap]", "apache-airflow[influxdb]", "apache-airflow[jdbc]", "apache-airflow[jenkins]", "apache-airflow[kerberos]", "apache-airflow[ldap]", "apache-airflow[leveldb]", "apache-airflow[microsoft-azure]", "apache-airflow[microsoft-mssql]", "apache-airflow[microsoft-psrp]", "apache-airflow[microsoft-winrm]", "apache-airflow[mongo]", "apache-airflow[mysql]", "apache-airflow[neo4j]", "apache-airflow[odbc]", "apache-airflow[openai]", "apache-airflow[openfaas]", "apache-airflow[openlineage]", "apache-airflow[opensearch]", "apache-airflow[opsgenie]", "apache-airflow[oracle]", "apache-airflow[otel]", "apache-airflow[pagerduty]", "apache-airflow[pandas]", "apache-airflow[papermill]", "apache-airflow[password]", "apache-airflow[pgvector]", "apache-airflow[pinecone]", "apache-airflow[plexus]", "apache-airflow[postgres]", "apache-airflow[presto]", "apache-airflow[rabbitmq]", "apache-airflow[redis]", "apache-airflow[s3fs]", "apache-airflow[salesforce]", "apache-airflow[samba]", "apache-airflow[saml]", "apache-airflow[segment]", "apache-airflow[sendgrid]", "apache-airflow[sentry]", "apache-airflow[sftp]", "apache-airflow[singularity]", "apache-airflow[slack]", "apache-airflow[smtp]", "apache-airflow[snowflake]", "apache-airflow[sqlite]", "apache-airflow[ssh]", "apache-airflow[statsd]", "apache-airflow[tableau]", "apache-airflow[tabular]", "apache-airflow[telegram]", "apache-airflow[trino]", "apache-airflow[vertica]", "apache-airflow[virtualenv]", "apache-airflow[weaviate]", "apache-airflow[yandex]", "apache-airflow[zendesk]"] +all-core = ["apache-airflow[aiobotocore]", "apache-airflow[apache-atlas]", "apache-airflow[apache-webhdfs]", "apache-airflow[async]", "apache-airflow[cgroups]", "apache-airflow[deprecated-api]", "apache-airflow[github-enterprise]", "apache-airflow[google-auth]", "apache-airflow[graphviz]", "apache-airflow[kerberos]", "apache-airflow[ldap]", "apache-airflow[leveldb]", "apache-airflow[otel]", "apache-airflow[pandas]", "apache-airflow[password]", "apache-airflow[rabbitmq]", "apache-airflow[s3fs]", "apache-airflow[saml]", "apache-airflow[sentry]", "apache-airflow[statsd]", "apache-airflow[virtualenv]"] +all-dbs = ["apache-airflow[apache-cassandra]", "apache-airflow[apache-drill]", "apache-airflow[apache-druid]", "apache-airflow[apache-hdfs]", "apache-airflow[apache-hive]", "apache-airflow[apache-impala]", "apache-airflow[apache-pinot]", "apache-airflow[arangodb]", "apache-airflow[cloudant]", "apache-airflow[databricks]", "apache-airflow[exasol]", "apache-airflow[influxdb]", "apache-airflow[microsoft-mssql]", "apache-airflow[mongo]", "apache-airflow[mysql]", "apache-airflow[neo4j]", "apache-airflow[postgres]", "apache-airflow[presto]", "apache-airflow[trino]", "apache-airflow[vertica]"] amazon = ["apache-airflow-providers-amazon"] apache-atlas = ["atlasclient (>=0.1.2)"] apache-beam = ["apache-airflow-providers-apache-beam"] @@ -390,7 +391,7 @@ apache-drill = ["apache-airflow-providers-apache-drill"] apache-druid = ["apache-airflow-providers-apache-druid"] apache-flink = ["apache-airflow-providers-apache-flink"] apache-hdfs = ["apache-airflow-providers-apache-hdfs"] -apache-hive = ["apache-airflow-providers-apache-hive (>=5.1.0)"] +apache-hive = ["apache-airflow-providers-apache-hive"] apache-impala = ["apache-airflow-providers-apache-impala"] apache-kafka = ["apache-airflow-providers-apache-kafka"] apache-kylin = ["apache-airflow-providers-apache-kylin"] @@ -398,59 +399,53 @@ apache-livy = ["apache-airflow-providers-apache-livy"] apache-pig = ["apache-airflow-providers-apache-pig"] apache-pinot = ["apache-airflow-providers-apache-pinot"] apache-spark = ["apache-airflow-providers-apache-spark"] -apache-sqoop = ["apache-airflow-providers-apache-sqoop"] apache-webhdfs = ["hdfs[avro,dataframe,kerberos] (>=2.0.4)"] apprise = ["apache-airflow-providers-apprise"] arangodb = ["apache-airflow-providers-arangodb"] asana = ["apache-airflow-providers-asana"] async = ["eventlet (>=0.33.3)", "gevent (>=0.13)", "greenlet (>=0.4.9)"] -atlas = ["apache-airflow-providers-apache-atlas"] +atlas = ["apache-airflow[apache-atlas]"] atlassian-jira = ["apache-airflow-providers-atlassian-jira"] -aws = ["apache-airflow-providers-amazon"] -azure = ["apache-airflow-providers-microsoft-azure"] -cassandra = ["apache-airflow-providers-apache-cassandra"] -celery = ["apache-airflow (>=2.4.0)", "apache-airflow-providers-celery", "celery (>=5.3.0,!=5.3.2,!=5.3.3,<6)", "flower (>=1.0.0)"] +aws = ["apache-airflow[amazon]"] +azure = ["apache-airflow[microsoft-azure]"] +cassandra = ["apache-airflow[apache-cassandra]"] +celery = ["apache-airflow-providers-celery"] cgroups = ["cgroupspy (>=0.2.2)"] cloudant = ["apache-airflow-providers-cloudant"] -cncf-kubernetes = ["apache-airflow (>=2.4.0)", "apache-airflow-providers-cncf-kubernetes", "asgiref (>=3.5.2)", "cryptography (>=2.0.0)", "kubernetes (>=21.7.0,<24)", "kubernetes-asyncio (>=18.20.1,<25)"] +cncf-kubernetes = ["apache-airflow-providers-cncf-kubernetes"] +cohere = ["apache-airflow-providers-cohere"] +common-io = ["apache-airflow-providers-common-io"] common-sql = ["apache-airflow-providers-common-sql"] -dask = ["apache-airflow (>=2.4.0)", "apache-airflow-providers-daskexecutor", "cloudpickle (>=1.4.1)", "dask (>=2.9.0,!=2022.10.1,!=2023.5.0)", "distributed (>=2.11.1,!=2023.5.0)"] -daskexecutor = ["apache-airflow (>=2.4.0)", "apache-airflow-providers-daskexecutor", "cloudpickle (>=1.4.1)", "dask (>=2.9.0,!=2022.10.1,!=2023.5.0)", "distributed (>=2.11.1,!=2023.5.0)"] databricks = ["apache-airflow-providers-databricks"] datadog = ["apache-airflow-providers-datadog"] dbt-cloud = ["apache-airflow-providers-dbt-cloud"] deprecated-api = ["requests (>=2.26.0)"] -devel = ["aiobotocore (>=2.1.1)", "aioresponses", "apache-airflow (>=2.4.0)", "apache-airflow-providers-common-sql", "astroid (>=2.12.3,<3.0)", "aws-xray-sdk", "backports.zoneinfo (>=0.2.1)", "bcrypt (>=2.0.0)", "beautifulsoup4 (>=4.7.1)", "black", "blinker", "cgroupspy (>=0.2.2)", "checksumdir", "click (>=8.0)", "click (>=8.0,!=8.1.4,!=8.1.5)", "coverage (>=7.2)", "cryptography (>=2.0.0)", "docutils (<0.17.0)", "eralchemy2", "filelock", "flask-bcrypt (>=0.7.1)", "gitpython", "ipdb", "jsonschema (>=3.0)", "kubernetes (>=21.7.0,<24)", "mongomock", "moto[glue] (>=4.0)", "mypy (==1.2.0)", "mypy-boto3-appflow (>=1.28.0)", "mypy-boto3-rds (>=1.28.0)", "mypy-boto3-redshift-data (>=1.28.0)", "mypy-boto3-s3 (>=1.28.0)", "mysqlclient (>=1.3.6)", "openapi-spec-validator (>=0.2.8)", "pandas (>=0.17.1)", "pipdeptree", "pre-commit", "pyarrow (>=9.0.0)", "pygithub", "pytest", "pytest-asyncio", "pytest-capture-warnings", "pytest-cov", "pytest-httpx", "pytest-instafail", "pytest-mock", "pytest-rerunfailures", "pytest-timeouts", "pytest-xdist", "pywinrm", "requests-mock", "rich-click (>=1.5)", "ruff (>=0.0.219)", "semver", "sphinx (>=5.2.0)", "sphinx-airflow-theme", "sphinx-argparse (>=0.1.13)", "sphinx-autoapi (>=2.0.0)", "sphinx-copybutton", "sphinx-jinja (>=2.0)", "sphinx-rtd-theme (>=0.1.6)", "sphinxcontrib-httpdomain (>=1.7.0)", "sphinxcontrib-redoc (>=1.6.0)", "sphinxcontrib-spelling (>=7.3)", "time-machine", "towncrier", "twine", "types-Deprecated", "types-Markdown", "types-PyMySQL", "types-PyYAML", "types-certifi", "types-croniter", "types-docutils", "types-paramiko", "types-protobuf", "types-python-dateutil", "types-python-slugify", "types-pytz", "types-redis", "types-requests", "types-setuptools", "types-tabulate", "types-termcolor", "types-toml", "wheel", "yamllint"] -devel-all = ["PyGithub (!=1.58)", "PyOpenSSL", "adal (>=1.2.7)", "aiobotocore (>=2.1.1)", "aiohttp", "aiohttp (>=3.6.3,<4)", "aioresponses", "alibabacloud-adb20211201 (>=1.0.0)", "alibabacloud-tea-openapi (>=0.3.7)", "amqp", "analytics-python (>=1.2.9)", "apache-airflow (>=2.4.0)", "apache-airflow (>=2.7.0)", "apache-airflow-providers-airbyte", "apache-airflow-providers-alibaba", "apache-airflow-providers-amazon", "apache-airflow-providers-apache-beam", "apache-airflow-providers-apache-cassandra", "apache-airflow-providers-apache-drill", "apache-airflow-providers-apache-druid", "apache-airflow-providers-apache-flink", "apache-airflow-providers-apache-hdfs", "apache-airflow-providers-apache-hive", "apache-airflow-providers-apache-impala", "apache-airflow-providers-apache-kafka", "apache-airflow-providers-apache-kylin", "apache-airflow-providers-apache-livy", "apache-airflow-providers-apache-pig", "apache-airflow-providers-apache-pinot", "apache-airflow-providers-apache-spark", "apache-airflow-providers-apache-sqoop", "apache-airflow-providers-apprise", "apache-airflow-providers-arangodb", "apache-airflow-providers-asana", "apache-airflow-providers-atlassian-jira", "apache-airflow-providers-celery", "apache-airflow-providers-cloudant", "apache-airflow-providers-cncf-kubernetes", "apache-airflow-providers-common-sql", "apache-airflow-providers-daskexecutor", "apache-airflow-providers-databricks", "apache-airflow-providers-datadog", "apache-airflow-providers-dbt-cloud", "apache-airflow-providers-dingding", "apache-airflow-providers-discord", "apache-airflow-providers-docker", "apache-airflow-providers-elasticsearch", "apache-airflow-providers-exasol", "apache-airflow-providers-facebook", "apache-airflow-providers-ftp", "apache-airflow-providers-github", "apache-airflow-providers-google", "apache-airflow-providers-grpc", "apache-airflow-providers-hashicorp", "apache-airflow-providers-http", "apache-airflow-providers-imap", "apache-airflow-providers-influxdb", "apache-airflow-providers-jdbc", "apache-airflow-providers-jenkins", "apache-airflow-providers-microsoft-azure", "apache-airflow-providers-microsoft-mssql", "apache-airflow-providers-microsoft-psrp", "apache-airflow-providers-microsoft-winrm", "apache-airflow-providers-mongo", "apache-airflow-providers-mysql", "apache-airflow-providers-neo4j", "apache-airflow-providers-odbc", "apache-airflow-providers-openfaas", "apache-airflow-providers-openlineage", "apache-airflow-providers-opsgenie", "apache-airflow-providers-oracle", "apache-airflow-providers-pagerduty", "apache-airflow-providers-papermill", "apache-airflow-providers-plexus", "apache-airflow-providers-postgres", "apache-airflow-providers-presto", "apache-airflow-providers-redis", "apache-airflow-providers-salesforce", "apache-airflow-providers-samba", "apache-airflow-providers-segment", "apache-airflow-providers-sendgrid", "apache-airflow-providers-sftp", "apache-airflow-providers-singularity", "apache-airflow-providers-slack", "apache-airflow-providers-smtp", "apache-airflow-providers-snowflake", "apache-airflow-providers-sqlite", "apache-airflow-providers-ssh", "apache-airflow-providers-tableau", "apache-airflow-providers-tabular", "apache-airflow-providers-telegram", "apache-airflow-providers-trino", "apache-airflow-providers-vertica", "apache-airflow-providers-zendesk", "apache-beam (>=2.47.0)", "apprise", "arrow (>=0.16.0)", "asana (>=0.10,<4.0.0)", "asgiref", "asgiref (>=3.5.2)", "astroid (>=2.12.3,<3.0)", "atlasclient (>=0.1.2)", "atlassian-python-api (>=1.14.2)", "attrs (>=22.2)", "authlib (>=1.0.0)", "aws-xray-sdk", "azure-batch (>=8.0.0)", "azure-cosmos (>=4.0.0)", "azure-datalake-store (>=0.0.45)", "azure-identity (>=1.3.1)", "azure-keyvault-secrets (>=4.1.0)", "azure-kusto-data (>=0.0.43,<0.1)", "azure-mgmt-containerinstance (>=1.5.0,<2.0)", "azure-mgmt-datafactory (>=1.0.0,<2.0)", "azure-mgmt-datalake-store (>=0.5.0)", "azure-mgmt-resource (>=2.2.0)", "azure-servicebus (>=7.6.1)", "azure-storage-blob (>=12.14.0)", "azure-storage-common (>=2.1.0)", "azure-storage-file (>=2.1.0)", "azure-storage-file-datalake (>=12.9.1)", "azure-synapse-spark", "backports.zoneinfo (>=0.2.1)", "bcrypt (>=2.0.0)", "beautifulsoup4 (>=4.7.1)", "black", "blinker", "blinker (>=1.1)", "boto3 (>=1.28.0)", "botocore (>=1.31.0)", "cassandra-driver (>=3.13.0)", "celery (>=5.3.0,!=5.3.2,!=5.3.3,<6)", "cgroupspy (>=0.2.2)", "checksumdir", "click (>=8.0)", "click (>=8.0,!=8.1.4,!=8.1.5)", "cloudant (>=2.0)", "cloudpickle (>=1.4.1)", "confluent-kafka (>=1.8.2)", "coverage (>=7.2)", "cryptography (>=2.0.0)", "dask (>=2.9.0,!=2022.10.1,!=2023.5.0)", "databricks-sql-connector (>=2.0.0,<3.0.0)", "datadog (>=0.14.0)", "distributed (>=2.11.1,!=2023.5.0)", "dnspython (>=1.13.0)", "docker (>=5.0.3)", "docutils (<0.17.0)", "elasticsearch (>8,<9)", "eralchemy2", "eventlet (>=0.33.3)", "facebook-business (>=6.0.2)", "filelock", "flask-appbuilder[oauth] (==4.3.6)", "flask-bcrypt (>=0.7.1)", "flower (>=1.0.0)", "gcloud-aio-auth (>=4.0.0,<5.0.0)", "gcloud-aio-bigquery (>=6.1.2)", "gcloud-aio-storage", "gevent (>=0.13)", "gitpython", "google-ads (>=21.2.0)", "google-api-core (>=2.11.0)", "google-api-python-client (>=1.6.0)", "google-auth (>=1.0.0)", "google-auth (>=1.0.0,<3.0.0)", "google-auth-httplib2 (>=0.0.1)", "google-cloud-aiplatform (>=1.22.1)", "google-cloud-automl (>=2.11.0)", "google-cloud-bigquery-datatransfer (>=3.11.0)", "google-cloud-bigtable (>=2.17.0)", "google-cloud-build (>=3.13.0)", "google-cloud-compute (>=1.10.0)", "google-cloud-container (>=2.17.4)", "google-cloud-datacatalog (>=3.11.1)", "google-cloud-dataflow-client (>=0.8.2)", "google-cloud-dataform (>=0.5.0)", "google-cloud-dataplex (>=1.4.2)", "google-cloud-dataproc (>=5.4.0)", "google-cloud-dataproc-metastore (>=1.12.0)", "google-cloud-dlp (>=3.12.0)", "google-cloud-kms (>=2.15.0)", "google-cloud-language (>=2.9.0)", "google-cloud-logging (>=3.5.0)", "google-cloud-memcache (>=1.7.0)", "google-cloud-monitoring (>=2.14.1)", "google-cloud-orchestration-airflow (>=1.7.0)", "google-cloud-os-login (>=2.9.1)", "google-cloud-pubsub (>=2.15.0)", "google-cloud-redis (>=2.12.0)", "google-cloud-secret-manager (>=2.16.0)", "google-cloud-spanner (>=3.11.1)", "google-cloud-speech (>=2.18.0)", "google-cloud-storage (>=2.7.0)", "google-cloud-storage-transfer (>=1.4.1)", "google-cloud-tasks (>=2.13.0)", "google-cloud-texttospeech (>=2.14.1)", "google-cloud-translate (>=3.11.0)", "google-cloud-videointelligence (>=2.11.0)", "google-cloud-vision (>=3.4.0)", "google-cloud-workflows (>=1.10.0)", "greenlet (>=0.4.9)", "grpcio (>=1.15.0)", "grpcio-gcp (>=0.2.2)", "hdfs[avro,dataframe,kerberos] (>=2.0.4)", "hmsclient (>=0.1.0)", "httpx", "hvac (>=0.10)", "impyla (>=0.18.0,<1.0)", "influxdb-client (>=1.19.0)", "ipdb", "jaydebeapi (>=1.1.1)", "json-merge-patch (>=0.2)", "jsonpath-ng (>=1.5.3)", "jsonschema (>=3.0)", "kubernetes (>=21.7.0,<24)", "kubernetes-asyncio (>=18.20.1,<25)", "kylinpy (>=2.6)", "ldap3 (>=2.5.1)", "looker-sdk (>=22.2.0)", "mongomock", "moto[glue] (>=4.0)", "mypy (==1.2.0)", "mypy-boto3-appflow (>=1.28.0)", "mypy-boto3-rds (>=1.28.0)", "mypy-boto3-redshift-data (>=1.28.0)", "mypy-boto3-s3 (>=1.28.0)", "mysqlclient (>=1.3.6)", "neo4j (>=4.2.1)", "openapi-spec-validator (>=0.2.8)", "openlineage-integration-common (>=0.28.0)", "openlineage-python (>=0.28.0)", "opentelemetry-exporter-prometheus", "opsgenie-sdk (>=2.1.5)", "oracledb (>=1.0.0)", "oss2 (>=2.14.0)", "pandas (>=0.17.1)", "pandas-gbq", "papermill[all] (>=1.2.1)", "paramiko (>=2.6.0)", "pdpyras (>=4.1.2)", "pinotdb (>0.4.7)", "pipdeptree", "plyvel", "pre-commit", "presto-python-client (>=0.8.2)", "proto-plus (>=1.19.6)", "psycopg2-binary (>=2.8.0)", "pyarrow (>=9.0.0)", "pydruid (>=0.4.1)", "pyexasol (>=0.5.1)", "pygithub", "pyhive[hive-pure-sasl] (>=0.7.0)", "pykerberos (>=1.1.13)", "pymongo (>=3.6.0)", "pymssql (>=2.1.5)", "pyodbc", "pypsrp (>=0.8.0)", "pyspark", "pytest", "pytest-asyncio", "pytest-capture-warnings", "pytest-cov", "pytest-httpx", "pytest-instafail", "pytest-mock", "pytest-rerunfailures", "pytest-timeouts", "pytest-xdist", "python-arango (>=7.3.2)", "python-dotenv (>=0.21.0)", "python-jenkins (>=1.0.0)", "python-ldap", "python-telegram-bot (>=20.0.0)", "pywinrm", "pywinrm (>=0.4)", "redis (>=4.5.2,!=4.5.5,<5.0.0)", "redshift-connector (>=2.0.888)", "requests (>=2.26.0)", "requests (>=2.27,<3)", "requests-kerberos (>=0.10.0)", "requests-mock", "requests-toolbelt", "rich-click (>=1.5)", "ruff (>=0.0.219)", "scrapbook[all]", "semver", "sendgrid (>=6.0.0)", "sentry-sdk (>=0.8.0)", "simple-salesforce (>=1.0.0)", "slack-sdk (>=3.0.0)", "smbprotocol (>=1.5.0)", "snowflake-connector-python (>=2.4.1)", "snowflake-sqlalchemy (>=1.1.0)", "sphinx (>=5.2.0)", "sphinx-airflow-theme", "sphinx-argparse (>=0.1.13)", "sphinx-autoapi (>=2.0.0)", "sphinx-copybutton", "sphinx-jinja (>=2.0)", "sphinx-rtd-theme (>=0.1.6)", "sphinxcontrib-httpdomain (>=1.7.0)", "sphinxcontrib-redoc (>=1.6.0)", "sphinxcontrib-spelling (>=7.3)", "spython (>=0.0.56)", "sqlalchemy-bigquery (>=1.2.1)", "sqlalchemy-drill (>=1.1.0)", "sqlalchemy-redshift (>=0.8.6)", "sqlalchemy-spanner (>=1.6.2)", "sqlparse (>=0.4.2)", "sshtunnel (>=0.3.2)", "statsd (>=3.3.0)", "tableauserverclient", "thrift (>=0.9.2)", "thrift-sasl (>=0.2.0)", "time-machine", "towncrier", "trino (>=0.318.0)", "twine", "types-Deprecated", "types-Markdown", "types-PyMySQL", "types-PyYAML", "types-certifi", "types-croniter", "types-docutils", "types-paramiko", "types-protobuf", "types-python-dateutil", "types-python-slugify", "types-pytz", "types-redis", "types-requests", "types-setuptools", "types-tabulate", "types-termcolor", "types-toml", "vertica-python (>=0.5.1)", "virtualenv", "watchtower (>=2.0.1,<2.1.0)", "wheel", "yamllint", "zenpy (>=2.0.24)"] -devel-ci = ["PyGithub (!=1.58)", "PyOpenSSL", "adal (>=1.2.7)", "aiobotocore (>=2.1.1)", "aiohttp", "aiohttp (>=3.6.3,<4)", "aioresponses", "alibabacloud-adb20211201 (>=1.0.0)", "alibabacloud-tea-openapi (>=0.3.7)", "amqp", "analytics-python (>=1.2.9)", "apache-airflow (>=2.4.0)", "apache-airflow (>=2.7.0)", "apache-airflow-providers-airbyte", "apache-airflow-providers-alibaba", "apache-airflow-providers-amazon", "apache-airflow-providers-apache-beam", "apache-airflow-providers-apache-cassandra", "apache-airflow-providers-apache-drill", "apache-airflow-providers-apache-druid", "apache-airflow-providers-apache-flink", "apache-airflow-providers-apache-hdfs", "apache-airflow-providers-apache-hive", "apache-airflow-providers-apache-impala", "apache-airflow-providers-apache-kafka", "apache-airflow-providers-apache-kylin", "apache-airflow-providers-apache-livy", "apache-airflow-providers-apache-pig", "apache-airflow-providers-apache-pinot", "apache-airflow-providers-apache-spark", "apache-airflow-providers-apache-sqoop", "apache-airflow-providers-apprise", "apache-airflow-providers-arangodb", "apache-airflow-providers-asana", "apache-airflow-providers-atlassian-jira", "apache-airflow-providers-celery", "apache-airflow-providers-cloudant", "apache-airflow-providers-cncf-kubernetes", "apache-airflow-providers-common-sql", "apache-airflow-providers-daskexecutor", "apache-airflow-providers-databricks", "apache-airflow-providers-datadog", "apache-airflow-providers-dbt-cloud", "apache-airflow-providers-dingding", "apache-airflow-providers-discord", "apache-airflow-providers-docker", "apache-airflow-providers-elasticsearch", "apache-airflow-providers-exasol", "apache-airflow-providers-facebook", "apache-airflow-providers-ftp", "apache-airflow-providers-github", "apache-airflow-providers-google", "apache-airflow-providers-grpc", "apache-airflow-providers-hashicorp", "apache-airflow-providers-http", "apache-airflow-providers-imap", "apache-airflow-providers-influxdb", "apache-airflow-providers-jdbc", "apache-airflow-providers-jenkins", "apache-airflow-providers-microsoft-azure", "apache-airflow-providers-microsoft-mssql", "apache-airflow-providers-microsoft-psrp", "apache-airflow-providers-microsoft-winrm", "apache-airflow-providers-mongo", "apache-airflow-providers-mysql", "apache-airflow-providers-neo4j", "apache-airflow-providers-odbc", "apache-airflow-providers-openfaas", "apache-airflow-providers-openlineage", "apache-airflow-providers-opsgenie", "apache-airflow-providers-oracle", "apache-airflow-providers-pagerduty", "apache-airflow-providers-papermill", "apache-airflow-providers-plexus", "apache-airflow-providers-postgres", "apache-airflow-providers-presto", "apache-airflow-providers-redis", "apache-airflow-providers-salesforce", "apache-airflow-providers-samba", "apache-airflow-providers-segment", "apache-airflow-providers-sendgrid", "apache-airflow-providers-sftp", "apache-airflow-providers-singularity", "apache-airflow-providers-slack", "apache-airflow-providers-smtp", "apache-airflow-providers-snowflake", "apache-airflow-providers-sqlite", "apache-airflow-providers-ssh", "apache-airflow-providers-tableau", "apache-airflow-providers-tabular", "apache-airflow-providers-telegram", "apache-airflow-providers-trino", "apache-airflow-providers-vertica", "apache-airflow-providers-zendesk", "apache-beam (>=2.47.0)", "apprise", "arrow (>=0.16.0)", "asana (>=0.10,<4.0.0)", "asgiref", "asgiref (>=3.5.2)", "astroid (>=2.12.3,<3.0)", "atlasclient (>=0.1.2)", "atlassian-python-api (>=1.14.2)", "attrs (>=22.2)", "authlib (>=1.0.0)", "aws-xray-sdk", "azure-batch (>=8.0.0)", "azure-cosmos (>=4.0.0)", "azure-datalake-store (>=0.0.45)", "azure-identity (>=1.3.1)", "azure-keyvault-secrets (>=4.1.0)", "azure-kusto-data (>=0.0.43,<0.1)", "azure-mgmt-containerinstance (>=1.5.0,<2.0)", "azure-mgmt-datafactory (>=1.0.0,<2.0)", "azure-mgmt-datalake-store (>=0.5.0)", "azure-mgmt-resource (>=2.2.0)", "azure-servicebus (>=7.6.1)", "azure-storage-blob (>=12.14.0)", "azure-storage-common (>=2.1.0)", "azure-storage-file (>=2.1.0)", "azure-storage-file-datalake (>=12.9.1)", "azure-synapse-spark", "backports.zoneinfo (>=0.2.1)", "bcrypt (>=2.0.0)", "beautifulsoup4 (>=4.7.1)", "black", "blinker", "blinker (>=1.1)", "boto3 (>=1.28.0)", "botocore (>=1.31.0)", "cassandra-driver (>=3.13.0)", "celery (>=5.3.0,!=5.3.2,!=5.3.3,<6)", "cgroupspy (>=0.2.2)", "checksumdir", "click (>=8.0)", "click (>=8.0,!=8.1.4,!=8.1.5)", "cloudant (>=2.0)", "cloudpickle (>=1.4.1)", "confluent-kafka (>=1.8.2)", "coverage (>=7.2)", "cryptography (>=2.0.0)", "dask (>=2.9.0,!=2022.10.1,!=2023.5.0)", "databricks-sql-connector (>=2.0.0,<3.0.0)", "datadog (>=0.14.0)", "distributed (>=2.11.1,!=2023.5.0)", "dnspython (>=1.13.0)", "docker (>=5.0.3)", "docutils (<0.17.0)", "elasticsearch (>8,<9)", "eralchemy2", "eventlet (>=0.33.3)", "facebook-business (>=6.0.2)", "filelock", "flask-appbuilder[oauth] (==4.3.6)", "flask-bcrypt (>=0.7.1)", "flower (>=1.0.0)", "gcloud-aio-auth (>=4.0.0,<5.0.0)", "gcloud-aio-bigquery (>=6.1.2)", "gcloud-aio-storage", "gevent (>=0.13)", "gitpython", "google-ads (>=21.2.0)", "google-api-core (>=2.11.0)", "google-api-python-client (>=1.6.0)", "google-auth (>=1.0.0)", "google-auth (>=1.0.0,<3.0.0)", "google-auth-httplib2 (>=0.0.1)", "google-cloud-aiplatform (>=1.22.1)", "google-cloud-automl (>=2.11.0)", "google-cloud-bigquery-datatransfer (>=3.11.0)", "google-cloud-bigtable (>=2.17.0)", "google-cloud-build (>=3.13.0)", "google-cloud-compute (>=1.10.0)", "google-cloud-container (>=2.17.4)", "google-cloud-datacatalog (>=3.11.1)", "google-cloud-dataflow-client (>=0.8.2)", "google-cloud-dataform (>=0.5.0)", "google-cloud-dataplex (>=1.4.2)", "google-cloud-dataproc (>=5.4.0)", "google-cloud-dataproc-metastore (>=1.12.0)", "google-cloud-dlp (>=3.12.0)", "google-cloud-kms (>=2.15.0)", "google-cloud-language (>=2.9.0)", "google-cloud-logging (>=3.5.0)", "google-cloud-memcache (>=1.7.0)", "google-cloud-monitoring (>=2.14.1)", "google-cloud-orchestration-airflow (>=1.7.0)", "google-cloud-os-login (>=2.9.1)", "google-cloud-pubsub (>=2.15.0)", "google-cloud-redis (>=2.12.0)", "google-cloud-secret-manager (>=2.16.0)", "google-cloud-spanner (>=3.11.1)", "google-cloud-speech (>=2.18.0)", "google-cloud-storage (>=2.7.0)", "google-cloud-storage-transfer (>=1.4.1)", "google-cloud-tasks (>=2.13.0)", "google-cloud-texttospeech (>=2.14.1)", "google-cloud-translate (>=3.11.0)", "google-cloud-videointelligence (>=2.11.0)", "google-cloud-vision (>=3.4.0)", "google-cloud-workflows (>=1.10.0)", "greenlet (>=0.4.9)", "grpcio (>=1.15.0)", "grpcio-gcp (>=0.2.2)", "hdfs[avro,dataframe,kerberos] (>=2.0.4)", "hmsclient (>=0.1.0)", "httpx", "hvac (>=0.10)", "impyla (>=0.18.0,<1.0)", "influxdb-client (>=1.19.0)", "ipdb", "jaydebeapi (>=1.1.1)", "json-merge-patch (>=0.2)", "jsonpath-ng (>=1.5.3)", "jsonschema (>=3.0)", "kubernetes (>=21.7.0,<24)", "kubernetes-asyncio (>=18.20.1,<25)", "kylinpy (>=2.6)", "ldap3 (>=2.5.1)", "looker-sdk (>=22.2.0)", "mongomock", "moto[glue] (>=4.0)", "mypy (==1.2.0)", "mypy-boto3-appflow (>=1.28.0)", "mypy-boto3-rds (>=1.28.0)", "mypy-boto3-redshift-data (>=1.28.0)", "mypy-boto3-s3 (>=1.28.0)", "mysqlclient (>=1.3.6)", "neo4j (>=4.2.1)", "openapi-spec-validator (>=0.2.8)", "openlineage-integration-common (>=0.28.0)", "openlineage-python (>=0.28.0)", "opentelemetry-exporter-prometheus", "opsgenie-sdk (>=2.1.5)", "oracledb (>=1.0.0)", "oss2 (>=2.14.0)", "pandas (>=0.17.1)", "pandas-gbq", "papermill[all] (>=1.2.1)", "paramiko (>=2.6.0)", "pdpyras (>=4.1.2)", "pinotdb (>0.4.7)", "pipdeptree", "plyvel", "pre-commit", "presto-python-client (>=0.8.2)", "proto-plus (>=1.19.6)", "psycopg2-binary (>=2.8.0)", "pyarrow (>=9.0.0)", "pydruid (>=0.4.1)", "pyexasol (>=0.5.1)", "pygithub", "pyhive[hive-pure-sasl] (>=0.7.0)", "pykerberos (>=1.1.13)", "pymongo (>=3.6.0)", "pymssql (>=2.1.5)", "pyodbc", "pypsrp (>=0.8.0)", "pyspark", "pytest", "pytest-asyncio", "pytest-capture-warnings", "pytest-cov", "pytest-httpx", "pytest-instafail", "pytest-mock", "pytest-rerunfailures", "pytest-timeouts", "pytest-xdist", "python-arango (>=7.3.2)", "python-dotenv (>=0.21.0)", "python-jenkins (>=1.0.0)", "python-ldap", "python-telegram-bot (>=20.0.0)", "pywinrm", "pywinrm (>=0.4)", "redis (>=4.5.2,!=4.5.5,<5.0.0)", "redshift-connector (>=2.0.888)", "requests (>=2.26.0)", "requests (>=2.27,<3)", "requests-kerberos (>=0.10.0)", "requests-mock", "requests-toolbelt", "rich-click (>=1.5)", "ruff (>=0.0.219)", "scrapbook[all]", "semver", "sendgrid (>=6.0.0)", "sentry-sdk (>=0.8.0)", "simple-salesforce (>=1.0.0)", "slack-sdk (>=3.0.0)", "smbprotocol (>=1.5.0)", "snowflake-connector-python (>=2.4.1)", "snowflake-sqlalchemy (>=1.1.0)", "sphinx (>=5.2.0)", "sphinx-airflow-theme", "sphinx-argparse (>=0.1.13)", "sphinx-autoapi (>=2.0.0)", "sphinx-copybutton", "sphinx-jinja (>=2.0)", "sphinx-rtd-theme (>=0.1.6)", "sphinxcontrib-httpdomain (>=1.7.0)", "sphinxcontrib-redoc (>=1.6.0)", "sphinxcontrib-spelling (>=7.3)", "spython (>=0.0.56)", "sqlalchemy-bigquery (>=1.2.1)", "sqlalchemy-drill (>=1.1.0)", "sqlalchemy-redshift (>=0.8.6)", "sqlalchemy-spanner (>=1.6.2)", "sqlparse (>=0.4.2)", "sshtunnel (>=0.3.2)", "statsd (>=3.3.0)", "tableauserverclient", "thrift (>=0.9.2)", "thrift-sasl (>=0.2.0)", "time-machine", "towncrier", "trino (>=0.318.0)", "twine", "types-Deprecated", "types-Markdown", "types-PyMySQL", "types-PyYAML", "types-certifi", "types-croniter", "types-docutils", "types-paramiko", "types-protobuf", "types-python-dateutil", "types-python-slugify", "types-pytz", "types-redis", "types-requests", "types-setuptools", "types-tabulate", "types-termcolor", "types-toml", "vertica-python (>=0.5.1)", "virtualenv", "watchtower (>=2.0.1,<2.1.0)", "wheel", "yamllint", "zenpy (>=2.0.24)"] -devel-hadoop = ["aiobotocore (>=2.1.1)", "aioresponses", "apache-airflow (>=2.4.0)", "apache-airflow-providers-apache-hdfs", "apache-airflow-providers-apache-hive", "apache-airflow-providers-common-sql", "apache-airflow-providers-presto", "apache-airflow-providers-trino", "astroid (>=2.12.3,<3.0)", "aws-xray-sdk", "backports.zoneinfo (>=0.2.1)", "bcrypt (>=2.0.0)", "beautifulsoup4 (>=4.7.1)", "black", "blinker", "cgroupspy (>=0.2.2)", "checksumdir", "click (>=8.0)", "click (>=8.0,!=8.1.4,!=8.1.5)", "coverage (>=7.2)", "cryptography (>=2.0.0)", "docutils (<0.17.0)", "eralchemy2", "filelock", "flask-bcrypt (>=0.7.1)", "gitpython", "hdfs[avro,dataframe,kerberos] (>=2.0.4)", "hmsclient (>=0.1.0)", "impyla (>=0.18.0,<1.0)", "ipdb", "jsonschema (>=3.0)", "kubernetes (>=21.7.0,<24)", "mongomock", "moto[glue] (>=4.0)", "mypy (==1.2.0)", "mypy-boto3-appflow (>=1.28.0)", "mypy-boto3-rds (>=1.28.0)", "mypy-boto3-redshift-data (>=1.28.0)", "mypy-boto3-s3 (>=1.28.0)", "mysqlclient (>=1.3.6)", "openapi-spec-validator (>=0.2.8)", "pandas (>=0.17.1)", "pipdeptree", "pre-commit", "presto-python-client (>=0.8.2)", "pyarrow (>=9.0.0)", "pygithub", "pyhive[hive-pure-sasl] (>=0.7.0)", "pykerberos (>=1.1.13)", "pytest", "pytest-asyncio", "pytest-capture-warnings", "pytest-cov", "pytest-httpx", "pytest-instafail", "pytest-mock", "pytest-rerunfailures", "pytest-timeouts", "pytest-xdist", "pywinrm", "requests-kerberos (>=0.10.0)", "requests-mock", "rich-click (>=1.5)", "ruff (>=0.0.219)", "semver", "sphinx (>=5.2.0)", "sphinx-airflow-theme", "sphinx-argparse (>=0.1.13)", "sphinx-autoapi (>=2.0.0)", "sphinx-copybutton", "sphinx-jinja (>=2.0)", "sphinx-rtd-theme (>=0.1.6)", "sphinxcontrib-httpdomain (>=1.7.0)", "sphinxcontrib-redoc (>=1.6.0)", "sphinxcontrib-spelling (>=7.3)", "thrift (>=0.9.2)", "thrift-sasl (>=0.2.0)", "time-machine", "towncrier", "twine", "types-Deprecated", "types-Markdown", "types-PyMySQL", "types-PyYAML", "types-certifi", "types-croniter", "types-docutils", "types-paramiko", "types-protobuf", "types-python-dateutil", "types-python-slugify", "types-pytz", "types-redis", "types-requests", "types-setuptools", "types-tabulate", "types-termcolor", "types-toml", "wheel", "yamllint"] dingding = ["apache-airflow-providers-dingding"] discord = ["apache-airflow-providers-discord"] -doc = ["astroid (>=2.12.3,<3.0)", "checksumdir", "click (>=8.0,!=8.1.4,!=8.1.5)", "docutils (<0.17.0)", "eralchemy2", "sphinx (>=5.2.0)", "sphinx-airflow-theme", "sphinx-argparse (>=0.1.13)", "sphinx-autoapi (>=2.0.0)", "sphinx-copybutton", "sphinx-jinja (>=2.0)", "sphinx-rtd-theme (>=0.1.6)", "sphinxcontrib-httpdomain (>=1.7.0)", "sphinxcontrib-redoc (>=1.6.0)", "sphinxcontrib-spelling (>=7.3)"] -doc-gen = ["eralchemy2"] docker = ["apache-airflow-providers-docker"] -druid = ["apache-airflow-providers-apache-druid"] +druid = ["pache-airflow[apache-druid]"] elasticsearch = ["apache-airflow-providers-elasticsearch"] exasol = ["apache-airflow-providers-exasol"] facebook = ["apache-airflow-providers-facebook"] ftp = ["apache-airflow-providers-ftp"] -gcp = ["apache-airflow-providers-google"] -gcp-api = ["apache-airflow-providers-google"] +gcp = ["apache-airflow[google]"] +gcp-api = ["apache-airflow[google]"] github = ["apache-airflow-providers-github"] -github-enterprise = ["authlib (>=1.0.0)", "flask-appbuilder[oauth] (==4.3.6)"] +github-enterprise = ["apache-airflow[fab]", "authlib (>=1.0.0)"] google = ["apache-airflow-providers-google"] -google-auth = ["authlib (>=1.0.0)", "flask-appbuilder[oauth] (==4.3.6)"] +google-auth = ["apache-airflow[fab]", "authlib (>=1.0.0)"] +graphviz = ["graphviz (>=0.12)"] grpc = ["apache-airflow-providers-grpc"] hashicorp = ["apache-airflow-providers-hashicorp"] -hdfs = ["apache-airflow-providers-apache-hdfs"] -hive = ["apache-airflow-providers-apache-hive"] +hdfs = ["apache-airflow[apache-hdfs]"] +hive = ["apache-airflow[apache-hive]"] http = ["apache-airflow-providers-http"] imap = ["apache-airflow-providers-imap"] influxdb = ["apache-airflow-providers-influxdb"] jdbc = ["apache-airflow-providers-jdbc"] jenkins = ["apache-airflow-providers-jenkins"] kerberos = ["pykerberos (>=1.1.13)", "requests-kerberos (>=0.10.0)", "thrift-sasl (>=0.2.0)"] -kubernetes = ["apache-airflow (>=2.4.0)", "apache-airflow-providers-cncf-kubernetes", "asgiref (>=3.5.2)", "cryptography (>=2.0.0)", "kubernetes (>=21.7.0,<24)", "kubernetes-asyncio (>=18.20.1,<25)"] +kubernetes = ["apache-airflow[cncf-kubernetes]"] ldap = ["ldap3 (>=2.5.1)", "python-ldap"] leveldb = ["plyvel"] microsoft-azure = ["apache-airflow-providers-microsoft-azure"] @@ -458,38 +453,42 @@ microsoft-mssql = ["apache-airflow-providers-microsoft-mssql"] microsoft-psrp = ["apache-airflow-providers-microsoft-psrp"] microsoft-winrm = ["apache-airflow-providers-microsoft-winrm"] mongo = ["apache-airflow-providers-mongo"] -mssql = ["apache-airflow-providers-microsoft-mssql"] +mssql = ["apache-airflow[microsoft-mssql]"] mysql = ["apache-airflow-providers-mysql"] neo4j = ["apache-airflow-providers-neo4j"] odbc = ["apache-airflow-providers-odbc"] +openai = ["apache-airflow-providers-openai"] openfaas = ["apache-airflow-providers-openfaas"] openlineage = ["apache-airflow-providers-openlineage"] +opensearch = ["apache-airflow-providers-opensearch"] opsgenie = ["apache-airflow-providers-opsgenie"] oracle = ["apache-airflow-providers-oracle"] otel = ["opentelemetry-exporter-prometheus"] pagerduty = ["apache-airflow-providers-pagerduty"] -pandas = ["pandas (>=0.17.1)", "pyarrow (>=9.0.0)"] +pandas = ["pandas (>=1.2.5)"] papermill = ["apache-airflow-providers-papermill"] password = ["bcrypt (>=2.0.0)", "flask-bcrypt (>=0.7.1)"] -pinot = ["apache-airflow-providers-apache-pinot"] -plexus = ["apache-airflow-providers-plexus"] +pgvector = ["apache-airflow-providers-pgvector"] +pinecone = ["apache-airflow-providers-pinecone"] +pinot = ["apache-airflow[apache-pinot]"] postgres = ["apache-airflow-providers-postgres"] presto = ["apache-airflow-providers-presto"] -qds = ["apache-airflow-providers-qubole"] rabbitmq = ["amqp"] redis = ["apache-airflow-providers-redis"] -s3 = ["apache-airflow-providers-amazon"] +s3 = ["apache-airflow[amazon]"] +s3fs = ["s3fs (>=2023.10.0)"] salesforce = ["apache-airflow-providers-salesforce"] samba = ["apache-airflow-providers-samba"] +saml = ["python3-saml (>=1.16.0)"] segment = ["apache-airflow-providers-segment"] sendgrid = ["apache-airflow-providers-sendgrid"] -sentry = ["blinker (>=1.1)", "sentry-sdk (>=0.8.0)"] +sentry = ["blinker (>=1.1)", "sentry-sdk (>=1.32.0,!=1.33.0)"] sftp = ["apache-airflow-providers-sftp"] singularity = ["apache-airflow-providers-singularity"] slack = ["apache-airflow-providers-slack"] smtp = ["apache-airflow-providers-smtp"] snowflake = ["apache-airflow-providers-snowflake"] -spark = ["apache-airflow-providers-apache-spark"] +spark = ["apache-airflow[apache-spark]"] sqlite = ["apache-airflow-providers-sqlite"] ssh = ["apache-airflow-providers-ssh"] statsd = ["statsd (>=3.3.0)"] @@ -499,10 +498,29 @@ telegram = ["apache-airflow-providers-telegram"] trino = ["apache-airflow-providers-trino"] vertica = ["apache-airflow-providers-vertica"] virtualenv = ["virtualenv"] -webhdfs = ["hdfs[avro,dataframe,kerberos] (>=2.0.4)"] -winrm = ["apache-airflow-providers-microsoft-winrm"] +weaviate = ["apache-airflow-providers-weaviate"] +webhdfs = ["apache-airflow[apache-webhdfs]"] +winrm = ["apache-airflow[microsoft-winrm]"] +yandex = ["apache-airflow-providers-yandex"] zendesk = ["apache-airflow-providers-zendesk"] +[[package]] +name = "apache-airflow-providers-common-io" +version = "1.3.0" +description = "Provider package apache-airflow-providers-common-io for Apache Airflow" +optional = false +python-versions = "~=3.8" +files = [ + {file = "apache_airflow_providers_common_io-1.3.0-py3-none-any.whl", hash = "sha256:a67c6dd3cb419c68fc1a9ed62f0f434426852e15a46c3159f367b3961332955d"}, + {file = "apache_airflow_providers_common_io-1.3.0.tar.gz", hash = "sha256:7172620a2370031970df2212a9f694a5ff82240f7e498b8b7dfdbae7e6c882d6"}, +] + +[package.dependencies] +apache-airflow = ">=2.8.0" + +[package.extras] +openlineage = ["apache-airflow-providers-openlineage"] + [[package]] name = "apache-airflow-providers-common-sql" version = "1.7.1" @@ -997,19 +1015,19 @@ files = [ [[package]] name = "boto3" -version = "1.26.161" +version = "1.34.34" description = "The AWS SDK for Python" optional = true -python-versions = ">= 3.7" +python-versions = ">= 3.8" files = [ - {file = "boto3-1.26.161-py3-none-any.whl", hash = "sha256:f66e5c9dbe7f34383bcf64fa6070771355c11a44dd75c7f1279f2f37e1c89183"}, - {file = "boto3-1.26.161.tar.gz", hash = "sha256:662731e464d14af1035f44fc6a46b0e3112ee011ac0a5ed416d205daa3e15f25"}, + {file = "boto3-1.34.34-py3-none-any.whl", hash = "sha256:33a8b6d9136fa7427160edb92d2e50f2035f04e9d63a2d1027349053e12626aa"}, + {file = "boto3-1.34.34.tar.gz", hash = "sha256:b2f321e20966f021ec800b7f2c01287a3dd04fc5965acdfbaa9c505a24ca45d1"}, ] [package.dependencies] -botocore = ">=1.29.161,<1.30.0" +botocore = ">=1.34.34,<1.35.0" jmespath = ">=0.7.1,<2.0.0" -s3transfer = ">=0.6.0,<0.7.0" +s3transfer = ">=0.10.0,<0.11.0" [package.extras] crt = ["botocore[crt] (>=1.21.0,<2.0a0)"] @@ -1396,22 +1414,25 @@ xray = ["mypy-boto3-xray (>=1.28.0,<1.29.0)"] [[package]] name = "botocore" -version = "1.29.161" +version = "1.34.34" description = "Low-level, data-driven core of boto 3." optional = true -python-versions = ">= 3.7" +python-versions = ">= 3.8" files = [ - {file = "botocore-1.29.161-py3-none-any.whl", hash = "sha256:b906999dd53dda2ef0ef6f7f55fcc81a4b06b9f1c8a9f65c546e0b981f959f5f"}, - {file = "botocore-1.29.161.tar.gz", hash = "sha256:a50edd715eb510343e27849f36483804aae4b871590db4d4996aa53368dcac40"}, + {file = "botocore-1.34.34-py3-none-any.whl", hash = "sha256:cd060b0d88ebb2b893f1411c1db7f2ba66cc18e52dcc57ad029564ef5fec437b"}, + {file = "botocore-1.34.34.tar.gz", hash = "sha256:54093dc97372bb7683f5c61a279aa8240408abf3b2cc494ae82a9a90c1b784b5"}, ] [package.dependencies] jmespath = ">=0.7.1,<2.0.0" python-dateutil = ">=2.1,<3.0.0" -urllib3 = ">=1.25.4,<1.27" +urllib3 = [ + {version = ">=1.25.4,<1.27", markers = "python_version < \"3.10\""}, + {version = ">=1.25.4,<2.1", markers = "python_version >= \"3.10\""}, +] [package.extras] -crt = ["awscrt (==0.16.9)"] +crt = ["awscrt (==0.19.19)"] [[package]] name = "botocore-stubs" @@ -1450,31 +1471,6 @@ files = [ {file = "cachetools-5.3.1.tar.gz", hash = "sha256:dce83f2d9b4e1f732a8cd44af8e8fab2dbe46201467fc98b3ef8f269092bf62b"}, ] -[[package]] -name = "cattrs" -version = "23.1.2" -description = "Composable complex class support for attrs and dataclasses." -optional = false -python-versions = ">=3.7" -files = [ - {file = "cattrs-23.1.2-py3-none-any.whl", hash = "sha256:b2bb14311ac17bed0d58785e5a60f022e5431aca3932e3fc5cc8ed8639de50a4"}, - {file = "cattrs-23.1.2.tar.gz", hash = "sha256:db1c821b8c537382b2c7c66678c3790091ca0275ac486c76f3c8f3920e83c657"}, -] - -[package.dependencies] -attrs = ">=20" -exceptiongroup = {version = "*", markers = "python_version < \"3.11\""} -typing_extensions = {version = ">=4.1.0", markers = "python_version < \"3.11\""} - -[package.extras] -bson = ["pymongo (>=4.2.0,<5.0.0)"] -cbor2 = ["cbor2 (>=5.4.6,<6.0.0)"] -msgpack = ["msgpack (>=1.0.2,<2.0.0)"] -orjson = ["orjson (>=3.5.2,<4.0.0)"] -pyyaml = ["PyYAML (>=6.0,<7.0)"] -tomlkit = ["tomlkit (>=0.11.4,<0.12.0)"] -ujson = ["ujson (>=5.4.0,<6.0.0)"] - [[package]] name = "certifi" version = "2023.7.22" @@ -2339,58 +2335,50 @@ dates = ["pytz (>=2019.1)"] [[package]] name = "duckdb" -version = "0.10.0" -description = "DuckDB in-process database" +version = "0.9.2" +description = "DuckDB embedded database" optional = false python-versions = ">=3.7.0" files = [ - {file = "duckdb-0.10.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:bd0ffb3fddef0f72a150e4d76e10942a84a1a0447d10907df1621b90d6668060"}, - {file = "duckdb-0.10.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:f3d709d5c7c1a12b5e10d0b05fa916c670cd2b50178e3696faa0cc16048a1745"}, - {file = "duckdb-0.10.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:9114aa22ec5d591a20ce5184be90f49d8e5b5348ceaab21e102c54560d07a5f8"}, - {file = "duckdb-0.10.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:77a37877efadf39caf7cadde0f430fedf762751b9c54750c821e2f1316705a21"}, - {file = "duckdb-0.10.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:87cbc9e1d9c3fc9f14307bea757f99f15f46843c0ab13a6061354410824ed41f"}, - {file = "duckdb-0.10.0-cp310-cp310-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:f0bfec79fed387201550517d325dff4fad2705020bc139d936cab08b9e845662"}, - {file = "duckdb-0.10.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:c5622134d2d9796b15e09de810e450859d4beb46d9b861357ec9ae40a61b775c"}, - {file = "duckdb-0.10.0-cp310-cp310-win_amd64.whl", hash = "sha256:089ee8e831ccaef1b73fc89c43b661567175eed0115454880bafed5e35cda702"}, - {file = "duckdb-0.10.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:a05af63747f1d7021995f0811c333dee7316cec3b06c0d3e4741b9bdb678dd21"}, - {file = "duckdb-0.10.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:072d6eba5d8a59e0069a8b5b4252fed8a21f9fe3f85a9129d186a39b3d0aea03"}, - {file = "duckdb-0.10.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:a77b85668f59b919042832e4659538337f1c7f197123076c5311f1c9cf077df7"}, - {file = "duckdb-0.10.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:96a666f1d2da65d03199a977aec246920920a5ea1da76b70ae02bd4fb1ffc48c"}, - {file = "duckdb-0.10.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2ec76a4262b783628d26612d184834852d9c92fb203e91af789100c17e3d7173"}, - {file = "duckdb-0.10.0-cp311-cp311-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:009dd9d2cdbd3b061a9efbdfc79f2d1a8377bcf49f1e5f430138621f8c083a6c"}, - {file = "duckdb-0.10.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:878f06766088090dad4a2e5ee0081555242b2e8dcb29415ecc97e388cf0cf8d8"}, - {file = "duckdb-0.10.0-cp311-cp311-win_amd64.whl", hash = "sha256:713ff0a1fb63a6d60f454acf67f31656549fb5d63f21ac68314e4f522daa1a89"}, - {file = "duckdb-0.10.0-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:9c0ee450dfedfb52dd4957244e31820feef17228da31af6d052979450a80fd19"}, - {file = "duckdb-0.10.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:ff79b2ea9994398b545c0d10601cd73565fbd09f8951b3d8003c7c5c0cebc7cb"}, - {file = "duckdb-0.10.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:6bdf1aa71b924ef651062e6b8ff9981ad85bec89598294af8a072062c5717340"}, - {file = "duckdb-0.10.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d0265bbc8216be3ced7b377ba8847128a3fc0ef99798a3c4557c1b88e3a01c23"}, - {file = "duckdb-0.10.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1d418a315a07707a693bd985274c0f8c4dd77015d9ef5d8d3da4cc1942fd82e0"}, - {file = "duckdb-0.10.0-cp312-cp312-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:2828475a292e68c71855190b818aded6bce7328f79e38c04a0c75f8f1c0ceef0"}, - {file = "duckdb-0.10.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:c3aaeaae2eba97035c65f31ffdb18202c951337bf2b3d53d77ce1da8ae2ecf51"}, - {file = "duckdb-0.10.0-cp312-cp312-win_amd64.whl", hash = "sha256:c51790aaaea97d8e4a58a114c371ed8d2c4e1ca7cbf29e3bdab6d8ccfc5afc1e"}, - {file = "duckdb-0.10.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:8af1ae7cc77a12206b6c47ade191882cc8f49f750bb3e72bb86ac1d4fa89926a"}, - {file = "duckdb-0.10.0-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:aa4f7e8e8dc0e376aeb280b83f2584d0e25ec38985c27d19f3107b2edc4f4a97"}, - {file = "duckdb-0.10.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:28ae942a79fad913defa912b56483cd7827a4e7721f4ce4bc9025b746ecb3c89"}, - {file = "duckdb-0.10.0-cp37-cp37m-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:01b57802898091455ca2a32c1335aac1e398da77c99e8a96a1e5de09f6a0add9"}, - {file = "duckdb-0.10.0-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:52e1ad4a55fa153d320c367046b9500578192e01c6d04308ba8b540441736f2c"}, - {file = "duckdb-0.10.0-cp37-cp37m-win_amd64.whl", hash = "sha256:904c47d04095af745e989c853f0bfc0776913dfc40dfbd2da7afdbbb5f67fed0"}, - {file = "duckdb-0.10.0-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:184ae7ea5874f3b8fa51ab0f1519bdd088a0b78c32080ee272b1d137e2c8fd9c"}, - {file = "duckdb-0.10.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:bd33982ecc9bac727a032d6cedced9f19033cbad56647147408891eb51a6cb37"}, - {file = "duckdb-0.10.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:f59bf0949899105dd5f8864cb48139bfb78454a8c017b8258ba2b5e90acf7afc"}, - {file = "duckdb-0.10.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:395f3b18948001e35dceb48a4423d574e38656606d033eef375408b539e7b076"}, - {file = "duckdb-0.10.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9b8eb2b803be7ee1df70435c33b03a4598cdaf676cd67ad782b288dcff65d781"}, - {file = "duckdb-0.10.0-cp38-cp38-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:31b2ddd331801064326c8e3587a4db8a31d02aef11332c168f45b3bd92effb41"}, - {file = "duckdb-0.10.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:c8b89e76a041424b8c2026c5dc1f74b53fbbc6c6f650d563259885ab2e7d093d"}, - {file = "duckdb-0.10.0-cp38-cp38-win_amd64.whl", hash = "sha256:79084a82f16c0a54f6bfb7ded5600400c2daa90eb0d83337d81a56924eaee5d4"}, - {file = "duckdb-0.10.0-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:79799b3a270dcd9070f677ba510f1e66b112df3068425691bac97c5e278929c7"}, - {file = "duckdb-0.10.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:e8fc394bfe3434920cdbcfbdd0ac3ba40902faa1dbda088db0ba44003a45318a"}, - {file = "duckdb-0.10.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:c116605551b4abf5786243a59bcef02bd69cc51837d0c57cafaa68cdc428aa0c"}, - {file = "duckdb-0.10.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3191170c3b0a43b0c12644800326f5afdea00d5a4621d59dbbd0c1059139e140"}, - {file = "duckdb-0.10.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fee69a50eb93c72dc77e7ab1fabe0c38d21a52c5da44a86aa217081e38f9f1bd"}, - {file = "duckdb-0.10.0-cp39-cp39-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c5f449e87dacb16b0d145dbe65fa6fdb5a55b2b6911a46d74876e445dd395bac"}, - {file = "duckdb-0.10.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:4487d0df221b17ea4177ad08131bc606b35f25cfadf890987833055b9d10cdf6"}, - {file = "duckdb-0.10.0-cp39-cp39-win_amd64.whl", hash = "sha256:c099ae2ff8fe939fda62da81704f91e2f92ac45e48dc0e37c679c9d243d01e65"}, - {file = "duckdb-0.10.0.tar.gz", hash = "sha256:c02bcc128002aa79e3c9d89b9de25e062d1096a8793bc0d7932317b7977f6845"}, + {file = "duckdb-0.9.2-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:aadcea5160c586704c03a8a796c06a8afffbefefb1986601104a60cb0bfdb5ab"}, + {file = "duckdb-0.9.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:08215f17147ed83cbec972175d9882387366de2ed36c21cbe4add04b39a5bcb4"}, + {file = "duckdb-0.9.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:ee6c2a8aba6850abef5e1be9dbc04b8e72a5b2c2b67f77892317a21fae868fe7"}, + {file = "duckdb-0.9.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1ff49f3da9399900fd58b5acd0bb8bfad22c5147584ad2427a78d937e11ec9d0"}, + {file = "duckdb-0.9.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dd5ac5baf8597efd2bfa75f984654afcabcd698342d59b0e265a0bc6f267b3f0"}, + {file = "duckdb-0.9.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:81c6df905589a1023a27e9712edb5b724566587ef280a0c66a7ec07c8083623b"}, + {file = "duckdb-0.9.2-cp310-cp310-win32.whl", hash = "sha256:a298cd1d821c81d0dec8a60878c4b38c1adea04a9675fb6306c8f9083bbf314d"}, + {file = "duckdb-0.9.2-cp310-cp310-win_amd64.whl", hash = "sha256:492a69cd60b6cb4f671b51893884cdc5efc4c3b2eb76057a007d2a2295427173"}, + {file = "duckdb-0.9.2-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:061a9ea809811d6e3025c5de31bc40e0302cfb08c08feefa574a6491e882e7e8"}, + {file = "duckdb-0.9.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:a43f93be768af39f604b7b9b48891f9177c9282a408051209101ff80f7450d8f"}, + {file = "duckdb-0.9.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:ac29c8c8f56fff5a681f7bf61711ccb9325c5329e64f23cb7ff31781d7b50773"}, + {file = "duckdb-0.9.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b14d98d26bab139114f62ade81350a5342f60a168d94b27ed2c706838f949eda"}, + {file = "duckdb-0.9.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:796a995299878913e765b28cc2b14c8e44fae2f54ab41a9ee668c18449f5f833"}, + {file = "duckdb-0.9.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:6cb64ccfb72c11ec9c41b3cb6181b6fd33deccceda530e94e1c362af5f810ba1"}, + {file = "duckdb-0.9.2-cp311-cp311-win32.whl", hash = "sha256:930740cb7b2cd9e79946e1d3a8f66e15dc5849d4eaeff75c8788d0983b9256a5"}, + {file = "duckdb-0.9.2-cp311-cp311-win_amd64.whl", hash = "sha256:c28f13c45006fd525001b2011cdf91fa216530e9751779651e66edc0e446be50"}, + {file = "duckdb-0.9.2-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:fbce7bbcb4ba7d99fcec84cec08db40bc0dd9342c6c11930ce708817741faeeb"}, + {file = "duckdb-0.9.2-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:15a82109a9e69b1891f0999749f9e3265f550032470f51432f944a37cfdc908b"}, + {file = "duckdb-0.9.2-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9490fb9a35eb74af40db5569d90df8a04a6f09ed9a8c9caa024998c40e2506aa"}, + {file = "duckdb-0.9.2-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:696d5c6dee86c1a491ea15b74aafe34ad2b62dcd46ad7e03b1d00111ca1a8c68"}, + {file = "duckdb-0.9.2-cp37-cp37m-win32.whl", hash = "sha256:4f0935300bdf8b7631ddfc838f36a858c1323696d8c8a2cecbd416bddf6b0631"}, + {file = "duckdb-0.9.2-cp37-cp37m-win_amd64.whl", hash = "sha256:0aab900f7510e4d2613263865570203ddfa2631858c7eb8cbed091af6ceb597f"}, + {file = "duckdb-0.9.2-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:7d8130ed6a0c9421b135d0743705ea95b9a745852977717504e45722c112bf7a"}, + {file = "duckdb-0.9.2-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:974e5de0294f88a1a837378f1f83330395801e9246f4e88ed3bfc8ada65dcbee"}, + {file = "duckdb-0.9.2-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:4fbc297b602ef17e579bb3190c94d19c5002422b55814421a0fc11299c0c1100"}, + {file = "duckdb-0.9.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1dd58a0d84a424924a35b3772419f8cd78a01c626be3147e4934d7a035a8ad68"}, + {file = "duckdb-0.9.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:11a1194a582c80dfb57565daa06141727e415ff5d17e022dc5f31888a5423d33"}, + {file = "duckdb-0.9.2-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:be45d08541002a9338e568dca67ab4f20c0277f8f58a73dfc1435c5b4297c996"}, + {file = "duckdb-0.9.2-cp38-cp38-win32.whl", hash = "sha256:dd6f88aeb7fc0bfecaca633629ff5c986ac966fe3b7dcec0b2c48632fd550ba2"}, + {file = "duckdb-0.9.2-cp38-cp38-win_amd64.whl", hash = "sha256:28100c4a6a04e69aa0f4a6670a6d3d67a65f0337246a0c1a429f3f28f3c40b9a"}, + {file = "duckdb-0.9.2-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:7ae5bf0b6ad4278e46e933e51473b86b4b932dbc54ff097610e5b482dd125552"}, + {file = "duckdb-0.9.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:e5d0bb845a80aa48ed1fd1d2d285dd352e96dc97f8efced2a7429437ccd1fe1f"}, + {file = "duckdb-0.9.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:4ce262d74a52500d10888110dfd6715989926ec936918c232dcbaddb78fc55b4"}, + {file = "duckdb-0.9.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6935240da090a7f7d2666f6d0a5e45ff85715244171ca4e6576060a7f4a1200e"}, + {file = "duckdb-0.9.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a5cfb93e73911696a98b9479299d19cfbc21dd05bb7ab11a923a903f86b4d06e"}, + {file = "duckdb-0.9.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:64e3bc01751f31e7572d2716c3e8da8fe785f1cdc5be329100818d223002213f"}, + {file = "duckdb-0.9.2-cp39-cp39-win32.whl", hash = "sha256:6e5b80f46487636368e31b61461940e3999986359a78660a50dfdd17dd72017c"}, + {file = "duckdb-0.9.2-cp39-cp39-win_amd64.whl", hash = "sha256:e6142a220180dbeea4f341708bd5f9501c5c962ce7ef47c1cadf5e8810b4cb13"}, + {file = "duckdb-0.9.2.tar.gz", hash = "sha256:3843afeab7c3fc4a4c0b53686a4cc1d9cdbdadcbb468d60fef910355ecafd447"}, ] [[package]] @@ -2608,13 +2596,13 @@ dotenv = ["python-dotenv"] [[package]] name = "flask-appbuilder" -version = "4.3.6" +version = "4.3.10" description = "Simple and rapid application development framework, built on top of Flask. includes detailed security, auto CRUD generation for your models, google charts and much more." optional = false python-versions = "~=3.7" files = [ - {file = "Flask-AppBuilder-4.3.6.tar.gz", hash = "sha256:8ca9710fa7d2704747d195e11b487d45a571f40559d8399d9d5dfa42ea1f3c78"}, - {file = "Flask_AppBuilder-4.3.6-py3-none-any.whl", hash = "sha256:840480dfd43134bebf78f3c7dc909e324c2689d2d9f27aeb1880a8a25466bc8d"}, + {file = "Flask-AppBuilder-4.3.10.tar.gz", hash = "sha256:4173c878e56b81c6acac5e3c80c133f4183f43442fd944552bd9f4023f5baceb"}, + {file = "Flask_AppBuilder-4.3.10-py3-none-any.whl", hash = "sha256:c0af506e1a68e7ee14f26a16fda829f1a14f8343654c30bdbb1351d23c545df9"}, ] [package.dependencies] @@ -2622,7 +2610,7 @@ apispec = {version = ">=6.0.0,<7", extras = ["yaml"]} click = ">=8,<9" colorama = ">=0.3.9,<1" email-validator = ">=1.0.5,<2" -Flask = ">=2,<3" +Flask = ">=2,<2.3.0" Flask-Babel = ">=1,<3" Flask-JWT-Extended = ">=4.0.0,<5.0.0" Flask-Limiter = ">3,<4" @@ -2637,6 +2625,7 @@ PyJWT = ">=2.0.0,<3.0.0" python-dateutil = ">=2.3,<3" SQLAlchemy = "<1.5" sqlalchemy-utils = ">=0.32.21,<1" +werkzeug = "<3" WTForms = "<4" [package.extras] @@ -2869,13 +2858,13 @@ files = [ [[package]] name = "fsspec" -version = "2023.6.0" +version = "2024.2.0" description = "File-system specification" optional = false python-versions = ">=3.8" files = [ - {file = "fsspec-2023.6.0-py3-none-any.whl", hash = "sha256:1cbad1faef3e391fba6dc005ae9b5bdcbf43005c9167ce78c915549c352c869a"}, - {file = "fsspec-2023.6.0.tar.gz", hash = "sha256:d0b2f935446169753e7a5c5c55681c54ea91996cc67be93c39a154fb3a2742af"}, + {file = "fsspec-2024.2.0-py3-none-any.whl", hash = "sha256:817f969556fa5916bc682e02ca2045f96ff7f586d45110fcb76022063ad2c7d8"}, + {file = "fsspec-2024.2.0.tar.gz", hash = "sha256:b6ad1a679f760dda52b1168c859d01b7b80648ea6f7f7c7f5a8a91dc3f3ecb84"}, ] [package.extras] @@ -2893,7 +2882,7 @@ github = ["requests"] gs = ["gcsfs"] gui = ["panel"] hdfs = ["pyarrow (>=1)"] -http = ["aiohttp (!=4.0.0a0,!=4.0.0a1)", "requests"] +http = ["aiohttp (!=4.0.0a0,!=4.0.0a1)"] libarchive = ["libarchive-c"] oci = ["ocifs"] s3 = ["s3fs"] @@ -2914,19 +2903,19 @@ files = [ [[package]] name = "gcsfs" -version = "2023.6.0" +version = "2024.2.0" description = "Convenient Filesystem interface over GCS" optional = true python-versions = ">=3.8" files = [ - {file = "gcsfs-2023.6.0-py2.py3-none-any.whl", hash = "sha256:3b3c7d8eddd4ec1380f3b49fbb861ee1e974adb223564401f10884b6260d406f"}, - {file = "gcsfs-2023.6.0.tar.gz", hash = "sha256:30b14fccadb3b7f0d99b2cd03bd8507c40f3a9a7d05847edca571f642bedbdff"}, + {file = "gcsfs-2024.2.0-py2.py3-none-any.whl", hash = "sha256:20bf70cc81d580474dd299d55e1ffcf8b3e81721aeb562e148ca0a3c900d0421"}, + {file = "gcsfs-2024.2.0.tar.gz", hash = "sha256:f7cffd7cae2fb50c56ef883f8aef9792be045b5059f06c1902c3a6151509f506"}, ] [package.dependencies] aiohttp = "<4.0.0a0 || >4.0.0a0,<4.0.0a1 || >4.0.0a1" decorator = ">4.1.2" -fsspec = "2023.6.0" +fsspec = "2024.2.0" google-auth = ">=1.2" google-auth-oauthlib = "*" google-cloud-storage = "*" @@ -3521,22 +3510,6 @@ files = [ [package.extras] test = ["pytest", "sphinx", "sphinx-autobuild", "twine", "wheel"] -[[package]] -name = "graphviz" -version = "0.20.1" -description = "Simple Python interface for Graphviz" -optional = false -python-versions = ">=3.7" -files = [ - {file = "graphviz-0.20.1-py3-none-any.whl", hash = "sha256:587c58a223b51611c0cf461132da386edd896a029524ca61a1462b880bf97977"}, - {file = "graphviz-0.20.1.zip", hash = "sha256:8c58f14adaa3b947daf26c19bc1e98c4e0702cdc31cf99153e6f06904d492bf8"}, -] - -[package.extras] -dev = ["flake8", "pep8-naming", "tox (>=3)", "twine", "wheel"] -docs = ["sphinx (>=5)", "sphinx-autodoc-typehints", "sphinx-rtd-theme"] -test = ["coverage", "mock (>=4)", "pytest (>=7)", "pytest-cov", "pytest-mock (>=3)"] - [[package]] name = "greenlet" version = "3.0.3" @@ -7469,40 +7442,40 @@ pyasn1 = ">=0.1.3" [[package]] name = "s3fs" -version = "2023.6.0" +version = "2024.2.0" description = "Convenient Filesystem interface over S3" optional = true python-versions = ">= 3.8" files = [ - {file = "s3fs-2023.6.0-py3-none-any.whl", hash = "sha256:d1a0a423d0d2e17fb2a193d9531935dc3f45ba742693448a461b6b34f6a92a24"}, - {file = "s3fs-2023.6.0.tar.gz", hash = "sha256:63fd8ddf05eb722de784b7b503196107f2a518061298cf005a8a4715b4d49117"}, + {file = "s3fs-2024.2.0-py3-none-any.whl", hash = "sha256:c140de37175c157cb662aa6ad7423365df732ac5f10ef5bf7b76078c6333a942"}, + {file = "s3fs-2024.2.0.tar.gz", hash = "sha256:f8064f522ad088b56b043047c825734847c0269df19f2613c956d4c20de15b62"}, ] [package.dependencies] -aiobotocore = ">=2.5.0,<2.6.0" +aiobotocore = ">=2.5.4,<3.0.0" aiohttp = "<4.0.0a0 || >4.0.0a0,<4.0.0a1 || >4.0.0a1" -fsspec = "2023.6.0" +fsspec = "2024.2.0" [package.extras] -awscli = ["aiobotocore[awscli] (>=2.5.0,<2.6.0)"] -boto3 = ["aiobotocore[boto3] (>=2.5.0,<2.6.0)"] +awscli = ["aiobotocore[awscli] (>=2.5.4,<3.0.0)"] +boto3 = ["aiobotocore[boto3] (>=2.5.4,<3.0.0)"] [[package]] name = "s3transfer" -version = "0.6.2" +version = "0.10.0" description = "An Amazon S3 Transfer Manager" optional = true -python-versions = ">= 3.7" +python-versions = ">= 3.8" files = [ - {file = "s3transfer-0.6.2-py3-none-any.whl", hash = "sha256:b014be3a8a2aab98cfe1abc7229cc5a9a0cf05eb9c1f2b86b230fd8df3f78084"}, - {file = "s3transfer-0.6.2.tar.gz", hash = "sha256:cab66d3380cca3e70939ef2255d01cd8aece6a4907a9528740f668c4b0611861"}, + {file = "s3transfer-0.10.0-py3-none-any.whl", hash = "sha256:3cdb40f5cfa6966e812209d0994f2a4709b561c88e90cf00c2696d2df4e56b2e"}, + {file = "s3transfer-0.10.0.tar.gz", hash = "sha256:d0c8bbf672d5eebbe4e57945e23b972d963f07d82f661cabf678a5c88831595b"}, ] [package.dependencies] -botocore = ">=1.12.36,<2.0a.0" +botocore = ">=1.33.2,<2.0a.0" [package.extras] -crt = ["botocore[crt] (>=1.20.29,<2.0a.0)"] +crt = ["botocore[crt] (>=1.33.2,<2.0a.0)"] [[package]] name = "scramp" @@ -8536,6 +8509,24 @@ files = [ {file = "unicodecsv-0.14.1.tar.gz", hash = "sha256:018c08037d48649a0412063ff4eda26eaa81eff1546dbffa51fa5293276ff7fc"}, ] +[[package]] +name = "universal-pathlib" +version = "0.2.1" +description = "pathlib api extended to use fsspec backends" +optional = false +python-versions = ">=3.8" +files = [ + {file = "universal_pathlib-0.2.1-py3-none-any.whl", hash = "sha256:bb14881f1c6c025c654a658c253b4cf89e8238dff6d3c847aa5723899227f85e"}, + {file = "universal_pathlib-0.2.1.tar.gz", hash = "sha256:fda2f484d875c26079771f94acfef58647eed80efce75f0bf8824373b432e802"}, +] + +[package.dependencies] +fsspec = ">=2022.1.0" + +[package.extras] +dev = ["adlfs", "aiohttp", "cheroot", "gcsfs", "moto[s3,server] (<5)", "mypy (==1.8.0)", "packaging", "pydantic", "pydantic-settings", "pylint (==2.17.4)", "pytest (==8.0.0)", "pytest-cov (==4.1.0)", "pytest-mock (==3.12.0)", "pytest-sugar (==0.9.7)", "requests", "s3fs", "webdav4[fsspec]", "wsgidav"] +tests = ["mypy (==1.8.0)", "packaging", "pylint (==2.17.4)", "pytest (==8.0.0)", "pytest-cov (==4.1.0)", "pytest-mock (==3.12.0)", "pytest-sugar (==0.9.7)"] + [[package]] name = "uritemplate" version = "4.1.1" @@ -8939,4 +8930,4 @@ weaviate = ["weaviate-client"] [metadata] lock-version = "2.0" python-versions = ">=3.8.1,<3.13" -content-hash = "4c9aa9a9d0059390f52f96bb8b5cf20a555ed54e4d7b8ac8bcb1ffefd85808f5" +content-hash = "ab48e59bcde893f0417f260980e794c5c091a66e409842745a58da47807e3dac" diff --git a/pyproject.toml b/pyproject.toml index 440df139dd..ce52f8b519 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -58,7 +58,7 @@ psycopg2cffi = {version = ">=2.9.0", optional = true, markers="platform_python_i grpcio = {version = ">=1.50.0", optional = true} google-cloud-bigquery = {version = ">=2.26.0", optional = true} pyarrow = {version = ">=12.0.0", optional = true} -duckdb = {version = ">=0.6.1,<0.11.0", optional = true} +duckdb = {version = ">=0.6.1,<0.10.0", optional = true} dbt-core = {version = ">=1.2.0", optional = true} dbt-redshift = {version = ">=1.2.0", optional = true} dbt-bigquery = {version = ">=1.2.0", optional = true} @@ -152,6 +152,10 @@ tqdm = "^4.65.0" enlighten = "^1.11.2" alive-progress = "^3.1.1" pydantic = ">2" +numpy = [ + { version = ">=1.21", python = ">=3.8,<3.12" }, + { version = ">=1.26", python = ">=3.12" } +] pandas = [ {version = ">2.1", markers = "python_version >= '3.9'"}, {version = "<2.1", markers = "python_version < '3.9'"} @@ -161,7 +165,7 @@ pandas = [ optional = true [tool.poetry.group.airflow.dependencies] -apache-airflow = "^2.5.3" +apache-airflow = {version = "^2.8.0", markers = "python_version < '3.12'"} [tool.poetry.group.providers] optional = true From 07f285e0a2ef77c82eeb400c7efbcdfea5bc0470 Mon Sep 17 00:00:00 2001 From: Marcin Rudolf Date: Wed, 21 Feb 2024 03:21:10 +0100 Subject: [PATCH 008/105] fixes tests --- dlt/destinations/impl/weaviate/weaviate_client.py | 6 +++--- tests/helpers/dbt_tests/test_runner_dbt_versions.py | 4 ++-- tests/pipeline/test_dlt_versions.py | 4 ++++ 3 files changed, 9 insertions(+), 5 deletions(-) diff --git a/dlt/destinations/impl/weaviate/weaviate_client.py b/dlt/destinations/impl/weaviate/weaviate_client.py index eb096d0a26..2d23dc38f7 100644 --- a/dlt/destinations/impl/weaviate/weaviate_client.py +++ b/dlt/destinations/impl/weaviate/weaviate_client.py @@ -200,7 +200,7 @@ def check_batch_result(results: List[StrAny]) -> None: data[key] = json.dumps(data[key]) for key in self.date_indices: if key in data: - data[key] = str(ensure_pendulum_datetime(data[key])) + data[key] = ensure_pendulum_datetime(data[key]).isoformat() if self.unique_identifiers: uuid = self.generate_uuid(data, self.unique_identifiers, self.class_name) else: @@ -664,7 +664,7 @@ def complete_load(self, load_id: str) -> None: "load_id": load_id, "schema_name": self.schema.name, "status": 0, - "inserted_at": str(pendulum.now()), + "inserted_at": pendulum.now().isoformat(), } self.create_object(properties, self.schema.loads_table_name) @@ -686,7 +686,7 @@ def _update_schema_in_storage(self, schema: Schema) -> None: "schema_name": schema.name, "version": schema.version, "engine_version": schema.ENGINE_VERSION, - "inserted_at": str(pendulum.now()), + "inserted_at": pendulum.now().isoformat(), "schema": schema_str, } self.create_object(properties, self.schema.version_table_name) diff --git a/tests/helpers/dbt_tests/test_runner_dbt_versions.py b/tests/helpers/dbt_tests/test_runner_dbt_versions.py index a47828a9ea..a7408f00f3 100644 --- a/tests/helpers/dbt_tests/test_runner_dbt_versions.py +++ b/tests/helpers/dbt_tests/test_runner_dbt_versions.py @@ -80,10 +80,10 @@ def test_infer_venv_deps() -> None: # provide version ranges requirements = _create_dbt_deps(["duckdb"], dbt_version=">3") # special duckdb dependency - assert requirements[:-1] == ["dbt-core>3", "dbt-duckdb", "duckdb==0.9.1"] + assert requirements[:-1] == ["dbt-core>3", "dbt-duckdb", "duckdb==0.9.2"] # we do not validate version ranges, pip will do it and fail when creating venv requirements = _create_dbt_deps(["motherduck"], dbt_version="y") - assert requirements[:-1] == ["dbt-corey", "dbt-duckdb", "duckdb==0.9.1"] + assert requirements[:-1] == ["dbt-corey", "dbt-duckdb", "duckdb==0.9.2"] def test_default_profile_name() -> None: diff --git a/tests/pipeline/test_dlt_versions.py b/tests/pipeline/test_dlt_versions.py index 5cf1857dfa..fdd05beb7d 100644 --- a/tests/pipeline/test_dlt_versions.py +++ b/tests/pipeline/test_dlt_versions.py @@ -1,3 +1,4 @@ +import sys import pytest import tempfile import shutil @@ -22,6 +23,9 @@ from tests.utils import TEST_STORAGE_ROOT, test_storage +if sys.version_info > (3, 11): + pytest.skip("Does not run on Python 3.12 and later", allow_module_level=True) + GITHUB_PIPELINE_NAME = "dlt_github_pipeline" GITHUB_DATASET = "github_3" From 06e441e4a73210643d1a13ed2ee21fc0f01271bb Mon Sep 17 00:00:00 2001 From: Marcin Rudolf Date: Wed, 21 Feb 2024 03:26:41 +0100 Subject: [PATCH 009/105] fixes pandas version --- poetry.lock | 14 +++++++------- pyproject.toml | 4 ++-- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/poetry.lock b/poetry.lock index b25524cfb8..352dcaccf9 100644 --- a/poetry.lock +++ b/poetry.lock @@ -5663,7 +5663,11 @@ files = [ ] [package.dependencies] -numpy = {version = ">=1.20.3", markers = "python_version < \"3.10\""} +numpy = [ + {version = ">=1.20.3", markers = "python_version < \"3.10\""}, + {version = ">=1.21.0", markers = "python_version >= \"3.10\" and python_version < \"3.11\""}, + {version = ">=1.23.2", markers = "python_version >= \"3.11\""}, +] python-dateutil = ">=2.8.2" pytz = ">=2020.1" tzdata = ">=2022.1" @@ -5730,11 +5734,7 @@ files = [ ] [package.dependencies] -numpy = [ - {version = ">=1.22.4,<2", markers = "python_version < \"3.11\""}, - {version = ">=1.23.2,<2", markers = "python_version == \"3.11\""}, - {version = ">=1.26.0,<2", markers = "python_version >= \"3.12\""}, -] +numpy = {version = ">=1.26.0,<2", markers = "python_version >= \"3.12\""} python-dateutil = ">=2.8.2" pytz = ">=2020.1" tzdata = ">=2022.7" @@ -8930,4 +8930,4 @@ weaviate = ["weaviate-client"] [metadata] lock-version = "2.0" python-versions = ">=3.8.1,<3.13" -content-hash = "ab48e59bcde893f0417f260980e794c5c091a66e409842745a58da47807e3dac" +content-hash = "1e14eb51aad8b45d9db6bb685e108761cf7eaa6500af85de494bdf1ccd9c7dde" diff --git a/pyproject.toml b/pyproject.toml index ce52f8b519..5d6ee9d645 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -157,8 +157,8 @@ numpy = [ { version = ">=1.26", python = ">=3.12" } ] pandas = [ - {version = ">2.1", markers = "python_version >= '3.9'"}, - {version = "<2.1", markers = "python_version < '3.9'"} + {version = ">2.1", markers = "python_version >= '3.12'"}, + {version = "<2.1", markers = "python_version < '3.12'"} ] [tool.poetry.group.airflow] From 3e846a1cf16f2a629a93172802b7f5f66ad10fbc Mon Sep 17 00:00:00 2001 From: Marcin Rudolf Date: Fri, 23 Feb 2024 00:06:10 +0100 Subject: [PATCH 010/105] adds 3.12 duckdb dep --- dlt/extract/decorators.py | 4 +- dlt/extract/utils.py | 2 +- docs/website/docs/reference/installation.md | 2 +- poetry.lock | 62 ++++++++++++++++++++- pyproject.toml | 9 ++- 5 files changed, 71 insertions(+), 8 deletions(-) diff --git a/dlt/extract/decorators.py b/dlt/extract/decorators.py index 2835d0aafa..af8bb69c42 100644 --- a/dlt/extract/decorators.py +++ b/dlt/extract/decorators.py @@ -271,10 +271,10 @@ async def _wrap_coro(*args: Any, **kwargs: Any) -> TDltSourceImpl: # get spec for wrapped function SPEC = get_fun_spec(conf_f) # get correct wrapper - wrapper = _wrap_coro if inspect.iscoroutinefunction(f) else _wrap + wrapper = _wrap_coro if inspect.iscoroutinefunction(inspect.unwrap(f)) else _wrap # store the source information _SOURCES[_wrap.__qualname__] = SourceInfo(SPEC, wrapper, func_module) - if inspect.iscoroutinefunction(f): + if inspect.iscoroutinefunction(inspect.unwrap(f)): return _wrap_coro else: return _wrap diff --git a/dlt/extract/utils.py b/dlt/extract/utils.py index 0e86994eb4..fc27a5c39e 100644 --- a/dlt/extract/utils.py +++ b/dlt/extract/utils.py @@ -198,7 +198,7 @@ def wrap_resource_gen( if ( inspect.isgeneratorfunction(inspect.unwrap(f)) or inspect.isgenerator(f) - or inspect.isasyncgenfunction(f) + or inspect.isasyncgenfunction(inspect.unwrap(f)) ): def _partial() -> Any: diff --git a/docs/website/docs/reference/installation.md b/docs/website/docs/reference/installation.md index 3178b0fd1a..a802c34597 100644 --- a/docs/website/docs/reference/installation.md +++ b/docs/website/docs/reference/installation.md @@ -8,7 +8,7 @@ keywords: [installation, environment, pip install] ## Set up environment -### Make sure you are using **Python 3.8-3.11** and have `pip` installed +### Make sure you are using **Python 3.8-3.12** and have `pip` installed ```bash python --version diff --git a/poetry.lock b/poetry.lock index 352dcaccf9..c78fab989f 100644 --- a/poetry.lock +++ b/poetry.lock @@ -2381,6 +2381,62 @@ files = [ {file = "duckdb-0.9.2.tar.gz", hash = "sha256:3843afeab7c3fc4a4c0b53686a4cc1d9cdbdadcbb468d60fef910355ecafd447"}, ] +[[package]] +name = "duckdb" +version = "0.10.0" +description = "DuckDB in-process database" +optional = false +python-versions = ">=3.7.0" +files = [ + {file = "duckdb-0.10.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:bd0ffb3fddef0f72a150e4d76e10942a84a1a0447d10907df1621b90d6668060"}, + {file = "duckdb-0.10.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:f3d709d5c7c1a12b5e10d0b05fa916c670cd2b50178e3696faa0cc16048a1745"}, + {file = "duckdb-0.10.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:9114aa22ec5d591a20ce5184be90f49d8e5b5348ceaab21e102c54560d07a5f8"}, + {file = "duckdb-0.10.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:77a37877efadf39caf7cadde0f430fedf762751b9c54750c821e2f1316705a21"}, + {file = "duckdb-0.10.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:87cbc9e1d9c3fc9f14307bea757f99f15f46843c0ab13a6061354410824ed41f"}, + {file = "duckdb-0.10.0-cp310-cp310-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:f0bfec79fed387201550517d325dff4fad2705020bc139d936cab08b9e845662"}, + {file = "duckdb-0.10.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:c5622134d2d9796b15e09de810e450859d4beb46d9b861357ec9ae40a61b775c"}, + {file = "duckdb-0.10.0-cp310-cp310-win_amd64.whl", hash = "sha256:089ee8e831ccaef1b73fc89c43b661567175eed0115454880bafed5e35cda702"}, + {file = "duckdb-0.10.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:a05af63747f1d7021995f0811c333dee7316cec3b06c0d3e4741b9bdb678dd21"}, + {file = "duckdb-0.10.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:072d6eba5d8a59e0069a8b5b4252fed8a21f9fe3f85a9129d186a39b3d0aea03"}, + {file = "duckdb-0.10.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:a77b85668f59b919042832e4659538337f1c7f197123076c5311f1c9cf077df7"}, + {file = "duckdb-0.10.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:96a666f1d2da65d03199a977aec246920920a5ea1da76b70ae02bd4fb1ffc48c"}, + {file = "duckdb-0.10.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2ec76a4262b783628d26612d184834852d9c92fb203e91af789100c17e3d7173"}, + {file = "duckdb-0.10.0-cp311-cp311-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:009dd9d2cdbd3b061a9efbdfc79f2d1a8377bcf49f1e5f430138621f8c083a6c"}, + {file = "duckdb-0.10.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:878f06766088090dad4a2e5ee0081555242b2e8dcb29415ecc97e388cf0cf8d8"}, + {file = "duckdb-0.10.0-cp311-cp311-win_amd64.whl", hash = "sha256:713ff0a1fb63a6d60f454acf67f31656549fb5d63f21ac68314e4f522daa1a89"}, + {file = "duckdb-0.10.0-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:9c0ee450dfedfb52dd4957244e31820feef17228da31af6d052979450a80fd19"}, + {file = "duckdb-0.10.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:ff79b2ea9994398b545c0d10601cd73565fbd09f8951b3d8003c7c5c0cebc7cb"}, + {file = "duckdb-0.10.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:6bdf1aa71b924ef651062e6b8ff9981ad85bec89598294af8a072062c5717340"}, + {file = "duckdb-0.10.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d0265bbc8216be3ced7b377ba8847128a3fc0ef99798a3c4557c1b88e3a01c23"}, + {file = "duckdb-0.10.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1d418a315a07707a693bd985274c0f8c4dd77015d9ef5d8d3da4cc1942fd82e0"}, + {file = "duckdb-0.10.0-cp312-cp312-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:2828475a292e68c71855190b818aded6bce7328f79e38c04a0c75f8f1c0ceef0"}, + {file = "duckdb-0.10.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:c3aaeaae2eba97035c65f31ffdb18202c951337bf2b3d53d77ce1da8ae2ecf51"}, + {file = "duckdb-0.10.0-cp312-cp312-win_amd64.whl", hash = "sha256:c51790aaaea97d8e4a58a114c371ed8d2c4e1ca7cbf29e3bdab6d8ccfc5afc1e"}, + {file = "duckdb-0.10.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:8af1ae7cc77a12206b6c47ade191882cc8f49f750bb3e72bb86ac1d4fa89926a"}, + {file = "duckdb-0.10.0-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:aa4f7e8e8dc0e376aeb280b83f2584d0e25ec38985c27d19f3107b2edc4f4a97"}, + {file = "duckdb-0.10.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:28ae942a79fad913defa912b56483cd7827a4e7721f4ce4bc9025b746ecb3c89"}, + {file = "duckdb-0.10.0-cp37-cp37m-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:01b57802898091455ca2a32c1335aac1e398da77c99e8a96a1e5de09f6a0add9"}, + {file = "duckdb-0.10.0-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:52e1ad4a55fa153d320c367046b9500578192e01c6d04308ba8b540441736f2c"}, + {file = "duckdb-0.10.0-cp37-cp37m-win_amd64.whl", hash = "sha256:904c47d04095af745e989c853f0bfc0776913dfc40dfbd2da7afdbbb5f67fed0"}, + {file = "duckdb-0.10.0-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:184ae7ea5874f3b8fa51ab0f1519bdd088a0b78c32080ee272b1d137e2c8fd9c"}, + {file = "duckdb-0.10.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:bd33982ecc9bac727a032d6cedced9f19033cbad56647147408891eb51a6cb37"}, + {file = "duckdb-0.10.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:f59bf0949899105dd5f8864cb48139bfb78454a8c017b8258ba2b5e90acf7afc"}, + {file = "duckdb-0.10.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:395f3b18948001e35dceb48a4423d574e38656606d033eef375408b539e7b076"}, + {file = "duckdb-0.10.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9b8eb2b803be7ee1df70435c33b03a4598cdaf676cd67ad782b288dcff65d781"}, + {file = "duckdb-0.10.0-cp38-cp38-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:31b2ddd331801064326c8e3587a4db8a31d02aef11332c168f45b3bd92effb41"}, + {file = "duckdb-0.10.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:c8b89e76a041424b8c2026c5dc1f74b53fbbc6c6f650d563259885ab2e7d093d"}, + {file = "duckdb-0.10.0-cp38-cp38-win_amd64.whl", hash = "sha256:79084a82f16c0a54f6bfb7ded5600400c2daa90eb0d83337d81a56924eaee5d4"}, + {file = "duckdb-0.10.0-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:79799b3a270dcd9070f677ba510f1e66b112df3068425691bac97c5e278929c7"}, + {file = "duckdb-0.10.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:e8fc394bfe3434920cdbcfbdd0ac3ba40902faa1dbda088db0ba44003a45318a"}, + {file = "duckdb-0.10.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:c116605551b4abf5786243a59bcef02bd69cc51837d0c57cafaa68cdc428aa0c"}, + {file = "duckdb-0.10.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3191170c3b0a43b0c12644800326f5afdea00d5a4621d59dbbd0c1059139e140"}, + {file = "duckdb-0.10.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fee69a50eb93c72dc77e7ab1fabe0c38d21a52c5da44a86aa217081e38f9f1bd"}, + {file = "duckdb-0.10.0-cp39-cp39-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c5f449e87dacb16b0d145dbe65fa6fdb5a55b2b6911a46d74876e445dd395bac"}, + {file = "duckdb-0.10.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:4487d0df221b17ea4177ad08131bc606b35f25cfadf890987833055b9d10cdf6"}, + {file = "duckdb-0.10.0-cp39-cp39-win_amd64.whl", hash = "sha256:c099ae2ff8fe939fda62da81704f91e2f92ac45e48dc0e37c679c9d243d01e65"}, + {file = "duckdb-0.10.0.tar.gz", hash = "sha256:c02bcc128002aa79e3c9d89b9de25e062d1096a8793bc0d7932317b7977f6845"}, +] + [[package]] name = "email-validator" version = "1.3.1" @@ -8912,11 +8968,11 @@ bigquery = ["gcsfs", "google-cloud-bigquery", "grpcio", "pyarrow"] cli = ["cron-descriptor", "pipdeptree"] databricks = ["databricks-sql-connector"] dbt = ["dbt-athena-community", "dbt-bigquery", "dbt-core", "dbt-databricks", "dbt-duckdb", "dbt-redshift", "dbt-snowflake"] -duckdb = ["duckdb"] +duckdb = ["duckdb", "duckdb"] filesystem = ["botocore", "s3fs"] gcp = ["gcsfs", "google-cloud-bigquery", "grpcio"] gs = ["gcsfs"] -motherduck = ["duckdb", "pyarrow"] +motherduck = ["duckdb", "duckdb", "pyarrow"] mssql = ["pyodbc"] parquet = ["pyarrow"] postgres = ["psycopg2-binary", "psycopg2cffi"] @@ -8930,4 +8986,4 @@ weaviate = ["weaviate-client"] [metadata] lock-version = "2.0" python-versions = ">=3.8.1,<3.13" -content-hash = "1e14eb51aad8b45d9db6bb685e108761cf7eaa6500af85de494bdf1ccd9c7dde" +content-hash = "83b1561c2e6cbe36fb048b09a16f846422c214bc91c973208c5224b04c0cb3b8" diff --git a/pyproject.toml b/pyproject.toml index 5d6ee9d645..847ae4c80b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -58,7 +58,14 @@ psycopg2cffi = {version = ">=2.9.0", optional = true, markers="platform_python_i grpcio = {version = ">=1.50.0", optional = true} google-cloud-bigquery = {version = ">=2.26.0", optional = true} pyarrow = {version = ">=12.0.0", optional = true} -duckdb = {version = ">=0.6.1,<0.10.0", optional = true} +numpy = [ + { version = ">=1.21", python = ">=3.8,<3.12" }, + { version = ">=1.26", python = ">=3.12" } +] +duckdb = [ + {version = ">=0.6.1,<0.10.0", python = ">=3.8,<3.12", optional = true}, + {version = ">=0.10.0,<0.11.0", python = ">=3.12", optional = true} +] dbt-core = {version = ">=1.2.0", optional = true} dbt-redshift = {version = ">=1.2.0", optional = true} dbt-bigquery = {version = ">=1.2.0", optional = true} From 7fa574d2fc7d3f278463a1d830abb8d855a27e58 Mon Sep 17 00:00:00 2001 From: Marcin Rudolf Date: Sat, 24 Feb 2024 15:11:20 +0100 Subject: [PATCH 011/105] adds right hand pipe operator --- dlt/extract/resource.py | 12 +++++++++++- tests/extract/test_sources.py | 17 +++++++++++++++++ 2 files changed, 28 insertions(+), 1 deletion(-) diff --git a/dlt/extract/resource.py b/dlt/extract/resource.py index 3d03486436..e08cafe2c1 100644 --- a/dlt/extract/resource.py +++ b/dlt/extract/resource.py @@ -419,7 +419,9 @@ def __call__(self, *args: Any, **kwargs: Any) -> "DltResource": return r.bind(*args, **kwargs) def __or__(self, transform: Union["DltResource", AnyFun]) -> "DltResource": - """Allows to pipe data from across resources and transform functions with | operator""" + """Allows to pipe data from across resources and transform functions with | operator + This is the LEFT side OR so the self may be resource or transformer + """ # print(f"{resource.name} | {self.name} -> {resource.name}[{resource.is_transformer}]") if isinstance(transform, DltResource): transform.pipe_data_from(self) @@ -432,6 +434,14 @@ def __or__(self, transform: Union["DltResource", AnyFun]) -> "DltResource": else: return self.add_map(transform) + def __ror__(self, data: Union[Iterable[Any], Iterator[Any]]) -> "DltResource": + """Allows to pipe data from across resources and transform functions with | operator + This is the RIGHT side OR so the self may not be a resource and the LEFT must be an object + that does not implement | ie. a list + """ + self.pipe_data_from(self.from_data(data, name="iter_" + uniq_id(4))) + return self + def __iter__(self) -> Iterator[TDataItem]: """Opens iterator that yields the data items from the resources in the same order as in Pipeline class. diff --git a/tests/extract/test_sources.py b/tests/extract/test_sources.py index 97b3a3c558..a94cf680fa 100644 --- a/tests/extract/test_sources.py +++ b/tests/extract/test_sources.py @@ -21,6 +21,7 @@ InvalidTransformerDataTypeGeneratorFunctionRequired, InvalidTransformerGeneratorFunction, ParametrizedResourceUnbound, + ResourceNotATransformer, ResourcesNotFoundError, ) from dlt.extract.pipe import Pipe @@ -767,6 +768,22 @@ def test_add_transform_steps_pipe() -> None: assert list(r) == ["1", "2", "2", "3", "3", "3"] +def test_add_transformer_right_pipe() -> None: + # def tests right hand pipe + r = [1, 2, 3] | dlt.transformer(lambda i: i * 2, name="lambda") + # resource was created for a list + assert r._pipe.parent.name.startswith("iter") + assert list(r) == [2, 4, 6] + + # works for iterators + r = iter([1, 2, 3]) | dlt.transformer(lambda i: i * 3, name="lambda") + assert list(r) == [3, 6, 9] + + # must be a transformer + with pytest.raises(ResourceNotATransformer): + iter([1, 2, 3]) | dlt.resource(lambda i: i * 3, name="lambda") + + def test_limit_infinite_counter() -> None: r = dlt.resource(itertools.count(), name="infinity").add_limit(10) assert list(r) == list(range(10)) From 8c7942d6d45b5aa43e0d74038d054e00a5e93635 Mon Sep 17 00:00:00 2001 From: Marcin Rudolf Date: Sat, 24 Feb 2024 15:11:44 +0100 Subject: [PATCH 012/105] fixes docker ci build --- Makefile | 7 ++++--- poetry.lock | 2 +- pyproject.toml | 4 ---- 3 files changed, 5 insertions(+), 8 deletions(-) diff --git a/Makefile b/Makefile index 8da28717c0..0680f463ec 100644 --- a/Makefile +++ b/Makefile @@ -89,9 +89,10 @@ publish-library: build-library poetry publish test-build-images: build-library - poetry export -f requirements.txt --output _gen_requirements.txt --without-hashes --extras gcp --extras redshift - grep `cat compiled_packages.txt` _gen_requirements.txt > compiled_requirements.txt + # TODO: enable when we can remove special duckdb setting for python 3.12 + # poetry export -f requirements.txt --output _gen_requirements.txt --without-hashes --extras gcp --extras redshift + # grep `cat compiled_packages.txt` _gen_requirements.txt > compiled_requirements.txt docker build -f deploy/dlt/Dockerfile.airflow --build-arg=COMMIT_SHA="$(shell git log -1 --pretty=%h)" --build-arg=IMAGE_VERSION="$(shell poetry version -s)" . - docker build -f deploy/dlt/Dockerfile --build-arg=COMMIT_SHA="$(shell git log -1 --pretty=%h)" --build-arg=IMAGE_VERSION="$(shell poetry version -s)" . + # docker build -f deploy/dlt/Dockerfile --build-arg=COMMIT_SHA="$(shell git log -1 --pretty=%h)" --build-arg=IMAGE_VERSION="$(shell poetry version -s)" . diff --git a/poetry.lock b/poetry.lock index c78fab989f..9c1c9b4226 100644 --- a/poetry.lock +++ b/poetry.lock @@ -8986,4 +8986,4 @@ weaviate = ["weaviate-client"] [metadata] lock-version = "2.0" python-versions = ">=3.8.1,<3.13" -content-hash = "83b1561c2e6cbe36fb048b09a16f846422c214bc91c973208c5224b04c0cb3b8" +content-hash = "3380a5a646776e0fc0d895b5271bb769872ac1cdb09a842af61ba1741d1c03b3" diff --git a/pyproject.toml b/pyproject.toml index 847ae4c80b..78305d97af 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -58,10 +58,6 @@ psycopg2cffi = {version = ">=2.9.0", optional = true, markers="platform_python_i grpcio = {version = ">=1.50.0", optional = true} google-cloud-bigquery = {version = ">=2.26.0", optional = true} pyarrow = {version = ">=12.0.0", optional = true} -numpy = [ - { version = ">=1.21", python = ">=3.8,<3.12" }, - { version = ">=1.26", python = ">=3.12" } -] duckdb = [ {version = ">=0.6.1,<0.10.0", python = ">=3.8,<3.12", optional = true}, {version = ">=0.10.0,<0.11.0", python = ">=3.12", optional = true} From f951fc093313d2a67585c19c87152e31984af493 Mon Sep 17 00:00:00 2001 From: Marcin Rudolf Date: Sat, 24 Feb 2024 15:12:40 +0100 Subject: [PATCH 013/105] adds docs on async sources and resources --- docs/website/docs/general-usage/resource.md | 20 ++++++++++++++++++- docs/website/docs/general-usage/source.md | 8 +++++++- .../deploy-with-airflow-composer.md | 7 +++++++ 3 files changed, 33 insertions(+), 2 deletions(-) diff --git a/docs/website/docs/general-usage/resource.md b/docs/website/docs/general-usage/resource.md index 3b08a0b8ab..b7026c454e 100644 --- a/docs/website/docs/general-usage/resource.md +++ b/docs/website/docs/general-usage/resource.md @@ -8,7 +8,7 @@ keywords: [resource, api endpoint, dlt.resource] ## Declare a resource -A [resource](glossary.md#resource) is a ([optionally async](https://dlthub.com/docs/reference/performance#parallelism)) function that yields data. To create a +A [resource](glossary.md#resource) is an ([optionally async](../reference/performance.md#parallelism)) function that yields data. To create a resource, we add the `@dlt.resource` decorator to that function. Commonly used arguments: @@ -223,6 +223,24 @@ In the example above, `user_details` will receive data from default instance of pipeline.run(users(limit=100) | user_details) ``` +:::tip +Transformers are allowed not only to **yield** but also to **return** values and can decorate **async** functions and [**async generators**](../reference/performance.md#extract). Below we decorate an async function and request details on two pokemons. Http calls are made in parallel via httpx library. +```python +import dlt +import httpx + + +@dlt.transformer +async def pokemon(id): + async with httpx.AsyncClient() as client: + r = await client.get(f"https://pokeapi.co/api/v2/pokemon/{id}") + return r.json() + +# get bulbasaur and ivysaur (you need dlt 0.4.6 for pipe operator working with lists) +print(list([1,2] | pokemon())) +``` +::: + ### Declare a standalone resource A standalone resource is defined on a function that is top level in a module (not inner function) that accepts config and secrets values. Additionally if `standalone` flag is specified, the decorated function signature and docstring will be preserved. `dlt.resource` will just wrap the diff --git a/docs/website/docs/general-usage/source.md b/docs/website/docs/general-usage/source.md index 17c87f6b5f..1b3d1ce0cc 100644 --- a/docs/website/docs/general-usage/source.md +++ b/docs/website/docs/general-usage/source.md @@ -18,7 +18,7 @@ single API. The most common approach is to define it in a separate Python module ## Declare sources -You declare source by decorating a function returning one or more resource with `dlt.source`. Our +You declare source by decorating an (optionally async) function that return or yields one or more resource with `dlt.source`. Our [Create a pipeline](../walkthroughs/create-a-pipeline.md) how to guide teaches you how to do that. ### Create resources dynamically @@ -46,6 +46,12 @@ def hubspot(api_key=dlt.secrets.value): You can [create, attach and configure schema](schema.md#attaching-schemas-to-sources) that will be used when loading the source. +### Avoid long lasting operations in source function +Do not extract data in source function. Leave that task to your resources if possible. Source function is executed immediately when called (contrary to resources which delay execution - like Python generators). There are several benefits (error handling, execution metrics, parallelization) you get when you extract data in `pipeline.run` or `pipeline.extract`. + +If this is impractical (for example you want to reflect a database to create resources for tables) make sure you do not call source function too often. [See this note if you plan to deploy on Airflow](../walkthroughs/deploy-a-pipeline/deploy-with-airflow-composer.md#2-modify-dag-file) + + ## Customize sources ### Access and select resources to load diff --git a/docs/website/docs/walkthroughs/deploy-a-pipeline/deploy-with-airflow-composer.md b/docs/website/docs/walkthroughs/deploy-a-pipeline/deploy-with-airflow-composer.md index 2e8f0877f6..365f6747dc 100644 --- a/docs/website/docs/walkthroughs/deploy-a-pipeline/deploy-with-airflow-composer.md +++ b/docs/website/docs/walkthroughs/deploy-a-pipeline/deploy-with-airflow-composer.md @@ -9,6 +9,10 @@ keywords: [how to, deploy a pipeline, airflow, gcp] Before you can deploy a pipeline, you will need to [install dlt](../../reference/installation.md) and [create a pipeline](../create-a-pipeline.md). +:::tip +While this walkthrough deals specifically with Google Composer, it will generate DAGs and configuration files that you can use on any Airflow deployment. DAGs are generated using **dlt Airflow helper** that maps `dlt` resources into Airflow tasks, provides clean working environment, retry mechanism, metrics and logging via Airflow loggers. +::: + ## 1. Add your `dlt` project directory to GitHub You will need a GitHub repository for your project. If you don't have one yet, you need to @@ -178,6 +182,9 @@ load_data() retry_policy=Retrying(stop=stop_after_attempt(3), reraise=True), ) ``` +:::tip +When you run `load_data` DAG above, Airflow will call `source` function every 30 seconds (by default) to be able to monitor the tasks. Make sure that your source function does not do any long lasting operations ie. reflecting source database. In case of [sql_database](../../dlt-ecosystem/verified-sources/sql_database.md) we added an option to delay database reflection until data is accessed by a resource. +::: ### 3. Import sources and move the relevant code from the pipeline script From 387a7c772b942a8602f45ab50362abf675fbe82b Mon Sep 17 00:00:00 2001 From: Marcin Rudolf Date: Sun, 25 Feb 2024 13:38:08 +0100 Subject: [PATCH 014/105] normalizes default hints and preferred types in schema --- dlt/common/schema/schema.py | 45 ++++++++++++++++++++++++++----------- 1 file changed, 32 insertions(+), 13 deletions(-) diff --git a/dlt/common/schema/schema.py b/dlt/common/schema/schema.py index b73e45d489..75187f41fe 100644 --- a/dlt/common/schema/schema.py +++ b/dlt/common/schema/schema.py @@ -24,7 +24,7 @@ SCHEMA_ENGINE_VERSION, LOADS_TABLE_NAME, VERSION_TABLE_NAME, - STATE_TABLE_NAME, + PIPELINE_STATE_TABLE_NAME, TPartialTableSchema, TSchemaContractEntities, TSchemaEvolutionMode, @@ -390,8 +390,11 @@ def resolve_contract_settings_for_table( # expand settings, empty settings will expand into default settings return Schema.expand_schema_contract_settings(settings) - def update_table(self, partial_table: TPartialTableSchema) -> TPartialTableSchema: - """Adds or merges `partial_table` into the schema. Identifiers are not normalized""" + def update_table(self, partial_table: TPartialTableSchema, normalize_identifiers: bool = True) -> TPartialTableSchema: + """Adds or merges `partial_table` into the schema. Identifiers are normalized by default""" + if normalize_identifiers: + partial_table = self.normalize_table_identifiers(partial_table) + table_name = partial_table["name"] parent_table_name = partial_table.get("parent") # check if parent table present @@ -417,14 +420,13 @@ def update_table(self, partial_table: TPartialTableSchema) -> TPartialTableSchem def update_schema(self, schema: "Schema") -> None: """Updates this schema from an incoming schema. Normalizes identifiers after updating normalizers.""" - # update all tables - for table in schema.tables.values(): - self.update_table(table) # pass normalizer config - self._configure_normalizers(schema._normalizers_config) - # update and compile settings self._settings = deepcopy(schema.settings) + self._configure_normalizers(schema._normalizers_config) self._compile_settings() + # update all tables + for table in schema.tables.values(): + self.update_table(table) def bump_version(self) -> Tuple[int, str]: """Computes schema hash in order to check if schema content was modified. In such case the schema ``stored_version`` and ``stored_version_hash`` are updated. @@ -457,7 +459,10 @@ def filter_row_with_hint(self, table_name: str, hint_type: TColumnHint, row: Str # dicts are ordered and we will return the rows with hints in the same order as they appear in the columns return rv_row - def merge_hints(self, new_hints: Mapping[TColumnHint, Sequence[TSimpleRegex]]) -> None: + def merge_hints(self, new_hints: Mapping[TColumnHint, Sequence[TSimpleRegex]], normalize_identifiers: bool = True) -> None: + """Merges existing default hints with `new_hint`. Normalizes names in column regexes if possible""" + if normalize_identifiers: + new_hints = self._normalize_default_hints(new_hints) # validate regexes validate_dict( TSchemaSettings, @@ -484,7 +489,6 @@ def normalize_table_identifiers(self, table: TTableSchema) -> TTableSchema: where the column that is defined later in the dictionary overrides earlier column. Note that resource name is not normalized. - """ # normalize all identifiers in table according to name normalizer of the schema table["name"] = self.naming.normalize_tables_path(table["name"]) @@ -652,6 +656,7 @@ def update_normalizers(self) -> None: normalizers["names"] = normalizers["names"] or self._normalizers_config["names"] normalizers["json"] = normalizers["json"] or self._normalizers_config["json"] self._configure_normalizers(normalizers) + self._compile_settings() def set_schema_contract(self, settings: TSchemaContract) -> None: if not settings: @@ -794,13 +799,21 @@ def _add_standard_tables(self) -> None: ) def _add_standard_hints(self) -> None: - default_hints = utils.standard_hints() + default_hints = utils.default_hints() if default_hints: - self._settings["default_hints"] = default_hints + self.merge_hints(default_hints) type_detections = utils.standard_type_detections() if type_detections: self._settings["detections"] = type_detections + def _normalize_default_hints(self, default_hints: Mapping[TColumnHint, Sequence[TSimpleRegex]]) -> Mapping[TColumnHint, Sequence[TSimpleRegex]]: + """Normalizes the column names in default hints. In case of column names that are regexes, normalization is skipped""" + return {hint: [utils.normalize_simple_regex_column(self.naming, regex) for regex in regexes] for hint, regexes in default_hints.items()} + + def _normalize_preferred_types(self, preferred_types: Dict[TSimpleRegex, TDataType]) -> Dict[TSimpleRegex, TDataType]: + """Normalizes the column names in preferred types mapping. In case of column names that are regexes, normalization is skipped""" + return {utils.normalize_simple_regex_column(self.naming, regex): data_type for regex, data_type in preferred_types.items()} + def _configure_normalizers(self, normalizers: TNormalizersConfig) -> None: # import desired modules self._normalizers_config, naming_module, item_normalizer_class = import_normalizers( @@ -819,7 +832,13 @@ def _configure_normalizers(self, normalizers: TNormalizersConfig) -> None: self._dlt_tables_prefix = self.naming.normalize_table_identifier(DLT_NAME_PREFIX) self.version_table_name = self.naming.normalize_table_identifier(VERSION_TABLE_NAME) self.loads_table_name = self.naming.normalize_table_identifier(LOADS_TABLE_NAME) - self.state_table_name = self.naming.normalize_table_identifier(STATE_TABLE_NAME) + self.state_table_name = self.naming.normalize_table_identifier(PIPELINE_STATE_TABLE_NAME) + # normalize default hints + if default_hints := self._settings.get("default_hints"): + self._settings["default_hints"] = self._normalize_default_hints(default_hints) + # normalized preferred types + if preferred_types := self.settings.get("preferred_types"): + self._settings["preferred_types"] = self._normalize_preferred_types(preferred_types) # data item normalization function self.data_item_normalizer = item_normalizer_class(self) self.data_item_normalizer.extend_schema() From 88728e1bf8bcf0d4365a9362c39546c93784d13d Mon Sep 17 00:00:00 2001 From: Marcin Rudolf Date: Sun, 25 Feb 2024 13:39:46 +0100 Subject: [PATCH 015/105] defines pipeline state table in utils, column normalization in simple regex --- dlt/common/schema/typing.py | 9 ++++--- dlt/common/schema/utils.py | 48 +++++++++++++++++++++++++++++++++++-- dlt/pipeline/state_sync.py | 20 +++------------- 3 files changed, 55 insertions(+), 22 deletions(-) diff --git a/dlt/common/schema/typing.py b/dlt/common/schema/typing.py index fcabeb409a..5823c9e956 100644 --- a/dlt/common/schema/typing.py +++ b/dlt/common/schema/typing.py @@ -31,7 +31,7 @@ # dlt tables VERSION_TABLE_NAME = "_dlt_version" LOADS_TABLE_NAME = "_dlt_loads" -STATE_TABLE_NAME = "_dlt_pipeline_state" +PIPELINE_STATE_TABLE_NAME = "_dlt_pipeline_state" DLT_NAME_PREFIX = "_dlt" TColumnProp = Literal[ @@ -46,6 +46,7 @@ "unique", "merge_key", "root_key", + "hard_delete", "dedup_sort", ] """Known properties and hints of the column""" @@ -58,11 +59,13 @@ "foreign_key", "sort", "unique", - "root_key", "merge_key", + "root_key", + "hard_delete", "dedup_sort", ] """Known hints of a column used to declare hint regexes.""" + TWriteDisposition = Literal["skip", "append", "replace", "merge"] TTableFormat = Literal["iceberg"] TTypeDetections = Literal[ @@ -73,7 +76,7 @@ """A string representing a column name or a list of""" TSortOrder = Literal["asc", "desc"] -COLUMN_PROPS: Set[TColumnProp] = set(get_args(TColumnProp)) +# COLUMN_PROPS: Set[TColumnProp] = set(get_args(TColumnProp)) COLUMN_HINTS: Set[TColumnHint] = set( [ "partition", diff --git a/dlt/common/schema/utils.py b/dlt/common/schema/utils.py index 835fe4279e..8e9ab929d0 100644 --- a/dlt/common/schema/utils.py +++ b/dlt/common/schema/utils.py @@ -19,6 +19,7 @@ LOADS_TABLE_NAME, SIMPLE_REGEX_PREFIX, VERSION_TABLE_NAME, + PIPELINE_STATE_TABLE_NAME, TColumnName, TPartialTableSchema, TSchemaTables, @@ -214,6 +215,26 @@ def verify_schema_hash( return hash_ == stored_schema["version_hash"] +def normalize_simple_regex_column(naming: NamingConvention, regex: TSimpleRegex) -> TSimpleRegex: + """Assumes that regex applies to column name and normalizes it.""" + + def _normalize(r_: str) -> str: + is_exact = len(r_) >= 2 and r_[0] == "^" and r_[-1] == "$" + if is_exact: + r_ = r_[1:-1] + # if this a simple string then normalize it + if r_ == re.escape(r_): + r_ = naming.normalize_identifier(r_) + if is_exact: + r_ = "^" + r_ + "$" + return r_ + + if regex.startswith(SIMPLE_REGEX_PREFIX): + return SIMPLE_REGEX_PREFIX + _normalize(regex[3:]) + else: + return _normalize(regex) + + def simple_regex_validator(path: str, pk: str, pv: Any, t: Any) -> bool: # custom validator on type TSimpleRegex if t is TSimpleRegex: @@ -247,7 +268,7 @@ def simple_regex_validator(path: str, pk: str, pv: Any, t: Any) -> bool: # we know how to validate that type return True else: - # don't know how to validate t + # don't know how to validate this return False @@ -615,6 +636,7 @@ def group_tables_by_resource( def version_table() -> TTableSchema: # NOTE: always add new columns at the end of the table so we have identical layout # after an update of existing tables (always at the end) + # set to nullable so we can migrate existing tables table = new_table( VERSION_TABLE_NAME, columns=[ @@ -638,6 +660,7 @@ def version_table() -> TTableSchema: def load_table() -> TTableSchema: # NOTE: always add new columns at the end of the table so we have identical layout # after an update of existing tables (always at the end) + # set to nullable so we can migrate existing tables table = new_table( LOADS_TABLE_NAME, columns=[ @@ -657,6 +680,27 @@ def load_table() -> TTableSchema: return table +def pipeline_state_table() -> TTableSchema: + # NOTE: always add new columns at the end of the table so we have identical layout + # after an update of existing tables (always at the end) + # set to nullable so we can migrate existing tables + table = new_table( + PIPELINE_STATE_TABLE_NAME, + columns = [ + {"name": "version", "data_type": "bigint", "nullable": False}, + {"name": "engine_version", "data_type": "bigint", "nullable": False}, + {"name": "pipeline_name", "data_type": "text", "nullable": False}, + {"name": "state", "data_type": "text", "nullable": False}, + {"name": "created_at", "data_type": "timestamp", "nullable": False}, + {"name": "version_hash", "data_type": "text", "nullable": True}, + {"name": "_dlt_load_id", "data_type": "text", "nullable": False}, + ], + ) + table["write_disposition"] = "append" + table["description"] = "Created by DLT. Tracks pipeline state" + return table + + def new_table( table_name: str, parent_table_name: str = None, @@ -713,7 +757,7 @@ def new_column( return column -def standard_hints() -> Dict[TColumnHint, List[TSimpleRegex]]: +def default_hints() -> Dict[TColumnHint, List[TSimpleRegex]]: return None diff --git a/dlt/pipeline/state_sync.py b/dlt/pipeline/state_sync.py index fa3939969b..75d45e7fd1 100644 --- a/dlt/pipeline/state_sync.py +++ b/dlt/pipeline/state_sync.py @@ -9,7 +9,8 @@ from dlt.common import json from dlt.common.pipeline import TPipelineState from dlt.common.typing import DictStrAny -from dlt.common.schema.typing import STATE_TABLE_NAME, TTableSchemaColumns +from dlt.common.schema.typing import PIPELINE_STATE_TABLE_NAME +from dlt.common.schema.utils import pipeline_state_table from dlt.common.destination.reference import WithStateSync, Destination from dlt.common.utils import compressed_b64decode, compressed_b64encode @@ -21,21 +22,6 @@ # allows to upgrade state when restored with a new version of state logic/schema STATE_ENGINE_VERSION = 4 -# state table columns -STATE_TABLE_COLUMNS: TTableSchemaColumns = { - "version": {"name": "version", "data_type": "bigint", "nullable": False}, - "engine_version": {"name": "engine_version", "data_type": "bigint", "nullable": False}, - "pipeline_name": {"name": "pipeline_name", "data_type": "text", "nullable": False}, - "state": {"name": "state", "data_type": "text", "nullable": False}, - "created_at": {"name": "created_at", "data_type": "timestamp", "nullable": False}, - "version_hash": { - "name": "version_hash", - "data_type": "text", - "nullable": True, - }, # set to nullable so we can migrate existing tables -} - - def json_encode_state(state: TPipelineState) -> str: return json.typed_dumps(state) @@ -96,7 +82,7 @@ def state_resource(state: TPipelineState) -> DltResource: "version_hash": state["_version_hash"], } return dlt.resource( - [state_doc], name=STATE_TABLE_NAME, write_disposition="append", columns=STATE_TABLE_COLUMNS + [state_doc], name=PIPELINE_STATE_TABLE_NAME, write_disposition="append", columns=pipeline_state_table()["columns"] ) From 1a534251e170392b5bf01261f89cd3f465743869 Mon Sep 17 00:00:00 2001 From: Marcin Rudolf Date: Sun, 25 Feb 2024 13:45:45 +0100 Subject: [PATCH 016/105] normalizes all identifiers used by relational normalizer, fixes other modules --- dlt/common/normalizers/json/relational.py | 139 +++++++++++------- dlt/extract/source.py | 20 ++- dlt/normalize/items_normalizers.py | 12 +- dlt/normalize/normalize.py | 10 +- .../normalizers/test_json_relational.py | 3 +- tests/common/schema/test_inference.py | 22 +-- tests/common/schema/test_schema.py | 21 +-- 7 files changed, 135 insertions(+), 92 deletions(-) diff --git a/dlt/common/normalizers/json/relational.py b/dlt/common/normalizers/json/relational.py index e33bf2ab35..c55ce888ba 100644 --- a/dlt/common/normalizers/json/relational.py +++ b/dlt/common/normalizers/json/relational.py @@ -1,5 +1,4 @@ from typing import Dict, List, Mapping, Optional, Sequence, Tuple, cast, TypedDict, Any -from dlt.common.data_types.typing import TDataType from dlt.common.normalizers.exceptions import InvalidJsonNormalizer from dlt.common.normalizers.typing import TJSONNormalizer from dlt.common.normalizers.utils import generate_dlt_id, DLT_ID_LENGTH_BYTES @@ -16,28 +15,10 @@ ) from dlt.common.validation import validate_dict -EMPTY_KEY_IDENTIFIER = "_empty" # replace empty keys with this - - -class TDataItemRow(TypedDict, total=False): - _dlt_id: str # unique id of current row - - -class TDataItemRowRoot(TDataItemRow, total=False): - _dlt_load_id: str # load id to identify records loaded together that ie. need to be processed - # _dlt_meta: TEventDLTMeta # stores metadata, should never be sent to the normalizer - - -class TDataItemRowChild(TDataItemRow, total=False): - _dlt_root_id: str # unique id of top level parent - _dlt_parent_id: str # unique id of parent row - _dlt_list_idx: int # position in the list of rows - value: Any # for lists of simple types - class RelationalNormalizerConfigPropagation(TypedDict, total=False): - root: Optional[Mapping[str, TColumnName]] - tables: Optional[Mapping[str, Mapping[str, TColumnName]]] + root: Optional[Mapping[TColumnName, TColumnName]] + tables: Optional[Mapping[str, Mapping[TColumnName, TColumnName]]] class RelationalNormalizerConfig(TypedDict, total=False): @@ -47,6 +28,25 @@ class RelationalNormalizerConfig(TypedDict, total=False): class DataItemNormalizer(DataItemNormalizerBase[RelationalNormalizerConfig]): + + # known normalizer props + C_DLT_ID = "_dlt_id" + """unique id of current row""" + C_DLT_LOAD_ID = "_dlt_load_id" + """load id to identify records loaded together that ie. need to be processed""" + C_DLT_ROOT_ID = "_dlt_root_id" + """unique id of top level parent""" + C_DLT_PARENT_ID = "_dlt_parent_id" + """unique id of parent row""" + C_DLT_LIST_IDX = "_dlt_list_idx" + """position in the list of rows""" + C_VALUE = "value" + """for lists of simple types""" + + # other constants + EMPTY_KEY_IDENTIFIER = "_empty" # replace empty keys with this + + normalizer_config: RelationalNormalizerConfig propagation_config: RelationalNormalizerConfigPropagation max_nesting: int @@ -56,9 +56,20 @@ def __init__(self, schema: Schema) -> None: """This item normalizer works with nested dictionaries. It flattens dictionaries and descends into lists. It yields row dictionaries at each nesting level.""" self.schema = schema + self.naming = schema.naming self._reset() def _reset(self) -> None: + # normalize known normalizer column identifiers + self.c_dlt_id = self.naming.normalize_identifier(self.C_DLT_ID) + self.c_dlt_load_id = self.naming.normalize_identifier(self.C_DLT_LOAD_ID) + self.c_dlt_root_id = self.naming.normalize_identifier(self.C_DLT_ROOT_ID) + self.c_dlt_parent_id = self.naming.normalize_identifier(self.C_DLT_PARENT_ID) + self.c_dlt_list_idx = self.naming.normalize_identifier(self.C_DLT_LIST_IDX) + self.c_value = self.naming.normalize_identifier(self.C_VALUE) + + # normalize config + self.normalizer_config = self.schema._normalizers_config["json"].get("config") or {} # type: ignore self.propagation_config = self.normalizer_config.get("propagation", None) self.max_nesting = self.normalizer_config.get("max_nesting", 1000) @@ -89,8 +100,8 @@ def _is_complex_type(self, table_name: str, field_name: str, _r_lvl: int) -> boo return data_type == "complex" def _flatten( - self, table: str, dict_row: TDataItemRow, _r_lvl: int - ) -> Tuple[TDataItemRow, Dict[Tuple[str, ...], Sequence[Any]]]: + self, table: str, dict_row: DictStrAny, _r_lvl: int + ) -> Tuple[DictStrAny, Dict[Tuple[str, ...], Sequence[Any]]]: out_rec_row: DictStrAny = {} out_rec_list: Dict[Tuple[str, ...], Sequence[Any]] = {} schema_naming = self.schema.naming @@ -101,7 +112,7 @@ def norm_row_dicts(dict_row: StrAny, __r_lvl: int, path: Tuple[str, ...] = ()) - norm_k = schema_naming.normalize_identifier(k) else: # for empty keys in the data use _ - norm_k = EMPTY_KEY_IDENTIFIER + norm_k = self.EMPTY_KEY_IDENTIFIER # if norm_k != k: # print(f"{k} -> {norm_k}") child_name = ( @@ -125,7 +136,7 @@ def norm_row_dicts(dict_row: StrAny, __r_lvl: int, path: Tuple[str, ...] = ()) - out_rec_row[child_name] = v norm_row_dicts(dict_row, _r_lvl) - return cast(TDataItemRow, out_rec_row), out_rec_list + return cast(DictStrAny, out_rec_row), out_rec_list @staticmethod def _get_child_row_hash(parent_row_id: str, child_table: str, list_idx: int) -> str: @@ -134,7 +145,7 @@ def _get_child_row_hash(parent_row_id: str, child_table: str, list_idx: int) -> return digest128(f"{parent_row_id}_{child_table}_{list_idx}", DLT_ID_LENGTH_BYTES) @staticmethod - def _link_row(row: TDataItemRowChild, parent_row_id: str, list_idx: int) -> TDataItemRowChild: + def _link_row(row: DictStrAny, parent_row_id: str, list_idx: int) -> DictStrAny: assert parent_row_id row["_dlt_parent_id"] = parent_row_id row["_dlt_list_idx"] = list_idx @@ -142,11 +153,11 @@ def _link_row(row: TDataItemRowChild, parent_row_id: str, list_idx: int) -> TDat return row @staticmethod - def _extend_row(extend: DictStrAny, row: TDataItemRow) -> None: + def _extend_row(extend: DictStrAny, row: DictStrAny) -> None: row.update(extend) # type: ignore def _add_row_id( - self, table: str, row: TDataItemRow, parent_row_id: str, pos: int, _r_lvl: int + self, table: str, row: DictStrAny, parent_row_id: str, pos: int, _r_lvl: int ) -> str: # row_id is always random, no matter if primary_key is present or not row_id = generate_dlt_id() @@ -156,11 +167,11 @@ def _add_row_id( # child table row deterministic hash row_id = DataItemNormalizer._get_child_row_hash(parent_row_id, table, pos) # link to parent table - DataItemNormalizer._link_row(cast(TDataItemRowChild, row), parent_row_id, pos) - row["_dlt_id"] = row_id + DataItemNormalizer._link_row(cast(DictStrAny, row), parent_row_id, pos) + row[self.c_dlt_id] = row_id return row_id - def _get_propagated_values(self, table: str, row: TDataItemRow, _r_lvl: int) -> StrAny: + def _get_propagated_values(self, table: str, row: DictStrAny, _r_lvl: int) -> StrAny: extend: DictStrAny = {} config = self.propagation_config @@ -188,7 +199,7 @@ def _normalize_list( parent_row_id: Optional[str] = None, _r_lvl: int = 0, ) -> TNormalizedRowIterator: - v: TDataItemRowChild = None + v: DictStrAny = None table = self.schema.naming.shorten_fragments(*parent_path, *ident_path) for idx, v in enumerate(seq): @@ -206,14 +217,14 @@ def _normalize_list( # list of simple types child_row_hash = DataItemNormalizer._get_child_row_hash(parent_row_id, table, idx) wrap_v = wrap_in_dict(v) - wrap_v["_dlt_id"] = child_row_hash + wrap_v[self.c_dlt_id] = child_row_hash e = DataItemNormalizer._link_row(wrap_v, parent_row_id, idx) DataItemNormalizer._extend_row(extend, e) yield (table, self.schema.naming.shorten_fragments(*parent_path)), e def _normalize_row( self, - dict_row: TDataItemRow, + dict_row: DictStrAny, extend: DictStrAny, ident_path: Tuple[str, ...], parent_path: Tuple[str, ...] = (), @@ -229,7 +240,7 @@ def _normalize_row( # always extend row DataItemNormalizer._extend_row(extend, flattened_row) # infer record hash or leave existing primary key if present - row_id = flattened_row.get("_dlt_id", None) + row_id = flattened_row.get(self.c_dlt_id, None) if not row_id: row_id = self._add_row_id(table, flattened_row, parent_row_id, pos, _r_lvl) @@ -256,34 +267,38 @@ def extend_schema(self) -> None: # quick check to see if hints are applied default_hints = self.schema.settings.get("default_hints") or {} - if "not_null" in default_hints and "^_dlt_id$" in default_hints["not_null"]: + if "not_null" in default_hints and self.c_dlt_id in default_hints["not_null"]: return # add hints self.schema.merge_hints( { "not_null": [ - TSimpleRegex("_dlt_id"), - TSimpleRegex("_dlt_root_id"), - TSimpleRegex("_dlt_parent_id"), - TSimpleRegex("_dlt_list_idx"), - TSimpleRegex("_dlt_load_id"), + TSimpleRegex(self.c_dlt_id), + TSimpleRegex(self.c_dlt_root_id), + TSimpleRegex(self.c_dlt_parent_id), + TSimpleRegex(self.c_dlt_list_idx), + TSimpleRegex(self.c_dlt_load_id), ], - "foreign_key": [TSimpleRegex("_dlt_parent_id")], - "root_key": [TSimpleRegex("_dlt_root_id")], - "unique": [TSimpleRegex("_dlt_id")], - } + "foreign_key": [TSimpleRegex(self.c_dlt_parent_id)], + "root_key": [TSimpleRegex(self.c_dlt_root_id)], + "unique": [TSimpleRegex(self.c_dlt_id)], + }, + normalize_identifiers=False # already normalized ) for table_name in self.schema.tables.keys(): self.extend_table(table_name) def extend_table(self, table_name: str) -> None: - # if the table has a merge w_d, add propagation info to normalizer + """If the table has a merge write disposition, add propagation info to normalizer + + Table name should be normalized. + """ table = self.schema.tables.get(table_name) if not table.get("parent") and table.get("write_disposition") == "merge": DataItemNormalizer.update_normalizer_config( self.schema, - {"propagation": {"tables": {table_name: {"_dlt_id": TColumnName("_dlt_root_id")}}}}, + {"propagation": {"tables": {table_name: {self.c_dlt_id: TColumnName(self.c_dlt_root_id)}}}}, ) def normalize_data_item( @@ -293,11 +308,11 @@ def normalize_data_item( if not isinstance(item, dict): item = wrap_in_dict(item) # we will extend event with all the fields necessary to load it as root row - row = cast(TDataItemRowRoot, item) + row = cast(DictStrAny, item) # identify load id if loaded data must be processed after loading incrementally - row["_dlt_load_id"] = load_id + row[self.c_dlt_load_id] = load_id yield from self._normalize_row( - cast(TDataItemRowChild, row), + row, {}, (self.schema.naming.normalize_table_identifier(table_name),), ) @@ -312,12 +327,12 @@ def ensure_this_normalizer(cls, norm_config: TJSONNormalizer) -> None: @classmethod def update_normalizer_config(cls, schema: Schema, config: RelationalNormalizerConfig) -> None: cls._validate_normalizer_config(schema, config) - norm_config = schema._normalizers_config["json"] - cls.ensure_this_normalizer(norm_config) - if "config" in norm_config: - update_dict_nested(norm_config["config"], config) # type: ignore + existing_config = schema._normalizers_config["json"] + cls.ensure_this_normalizer(existing_config) + if "config" in existing_config: + update_dict_nested(existing_config["config"], config) # type: ignore else: - norm_config["config"] = config + existing_config["config"] = config @classmethod def get_normalizer_config(cls, schema: Schema) -> RelationalNormalizerConfig: @@ -327,6 +342,20 @@ def get_normalizer_config(cls, schema: Schema) -> RelationalNormalizerConfig: @staticmethod def _validate_normalizer_config(schema: Schema, config: RelationalNormalizerConfig) -> None: + """Normalizes all known column identifiers according to the schema and then validates the configuration""" + + def _normalize_prop(mapping: Mapping[TColumnName, TColumnName]) -> Mapping[TColumnName, TColumnName]: + return {schema.naming.normalize_identifier(from_col): schema.naming.normalize_identifier(to_col) for from_col, to_col in mapping.items()} + + # normalize the identifiers first + propagation_config = config.get("propagation") + if propagation_config: + if "root" in propagation_config: + propagation_config["root"] = _normalize_prop(propagation_config["root"]) + if "tables" in propagation_config: + for table_name in propagation_config["tables"]: + propagation_config["tables"][table_name] = _normalize_prop(propagation_config["tables"][table_name]) + validate_dict( RelationalNormalizerConfig, config, diff --git a/dlt/extract/source.py b/dlt/extract/source.py index bc33394d4d..f3f4c580df 100644 --- a/dlt/extract/source.py +++ b/dlt/extract/source.py @@ -243,26 +243,32 @@ def exhausted(self) -> bool: @property def root_key(self) -> bool: """Enables merging on all resources by propagating root foreign key to child tables. This option is most useful if you plan to change write disposition of a resource to disable/enable merge""" + # this also check the normalizer type config = RelationalNormalizer.get_normalizer_config(self._schema).get("propagation") + data_normalizer = self._schema.data_item_normalizer + assert isinstance(data_normalizer, RelationalNormalizer) return ( config is not None and "root" in config - and "_dlt_id" in config["root"] - and config["root"]["_dlt_id"] == "_dlt_root_id" + and data_normalizer.c_dlt_id in config["root"] + and config["root"][data_normalizer.c_dlt_id] == data_normalizer.c_dlt_root_id ) @root_key.setter def root_key(self, value: bool) -> None: + # this also check the normalizer type + config = RelationalNormalizer.get_normalizer_config(self._schema) + data_normalizer = self._schema.data_item_normalizer + assert isinstance(data_normalizer, RelationalNormalizer) + if value is True: RelationalNormalizer.update_normalizer_config( - self._schema, {"propagation": {"root": {"_dlt_id": TColumnName("_dlt_root_id")}}} + self._schema, {"propagation": {"root": {data_normalizer.c_dlt_id: TColumnName(data_normalizer.c_dlt_root_id)}}} ) else: if self.root_key: - propagation_config = RelationalNormalizer.get_normalizer_config(self._schema)[ - "propagation" - ] - propagation_config["root"].pop("_dlt_id") # type: ignore + propagation_config = config["propagation"] + propagation_config["root"].pop(data_normalizer.c_dlt_id) # type: ignore @property def resources(self) -> DltResourceDict: diff --git a/dlt/normalize/items_normalizers.py b/dlt/normalize/items_normalizers.py index 56d38a5a64..e6c68847d3 100644 --- a/dlt/normalize/items_normalizers.py +++ b/dlt/normalize/items_normalizers.py @@ -4,6 +4,7 @@ from dlt.common import json, logger from dlt.common.data_writers import DataWriterMetrics from dlt.common.json import custom_pua_decode, may_have_pua +from dlt.common.normalizers.json.relational import DataItemNormalizer as RelationalNormalizer from dlt.common.runtime import signals from dlt.common.schema.typing import TSchemaEvolutionMode, TTableSchemaColumns, TSchemaContractDict from dlt.common.schema.utils import has_table_seen_data @@ -223,8 +224,10 @@ def _write_with_dlt_columns( schema = self.schema load_id = self.load_id schema_update: TSchemaUpdate = {} + data_normalizer = schema.data_item_normalizer - if add_load_id: + if add_load_id and isinstance(data_normalizer, RelationalNormalizer): + # NOTE: update table will normalize names table_update = schema.update_table( { "name": root_table_name, @@ -243,12 +246,13 @@ def _write_with_dlt_columns( new_columns.append( ( -1, - pa.field("_dlt_load_id", load_id_type, nullable=False), + pa.field(data_normalizer.c_dlt_load_id, load_id_type, nullable=False), lambda batch: pa.array([load_id] * batch.num_rows, type=load_id_type), ) ) - if add_dlt_id: + if add_dlt_id and isinstance(data_normalizer, RelationalNormalizer): + # NOTE: update table will normalize names table_update = schema.update_table( { "name": root_table_name, @@ -262,7 +266,7 @@ def _write_with_dlt_columns( new_columns.append( ( -1, - pa.field("_dlt_id", pyarrow.pyarrow.string(), nullable=False), + pa.field(data_normalizer.c_dlt_id, pyarrow.pyarrow.string(), nullable=False), lambda batch: pa.array(generate_dlt_ids(batch.num_rows)), ) ) diff --git a/dlt/normalize/normalize.py b/dlt/normalize/normalize.py index d360a1c7c4..4c4cc3aae5 100644 --- a/dlt/normalize/normalize.py +++ b/dlt/normalize/normalize.py @@ -177,15 +177,15 @@ def _get_items_normalizer(file_format: TLoaderFileFormat) -> ItemsNormalizer: logger.info(f"Processed all items in {len(extracted_items_files)} files") return TWorkerRV(schema_updates, writer_metrics) - def update_table(self, schema: Schema, schema_updates: List[TSchemaUpdate]) -> None: + def update_schema(self, schema: Schema, schema_updates: List[TSchemaUpdate]) -> None: for schema_update in schema_updates: for table_name, table_updates in schema_update.items(): logger.info( f"Updating schema for table {table_name} with {len(table_updates)} deltas" ) for partial_table in table_updates: - # merge columns - schema.update_table(partial_table) + # merge columns where we expect identifiers to be normalized + schema.update_table(partial_table, normalize_identifiers=False) @staticmethod def group_worker_files(files: Sequence[str], no_groups: int) -> List[Sequence[str]]: @@ -238,7 +238,7 @@ def map_parallel(self, schema: Schema, load_id: str, files: Sequence[str]) -> TW ) # Exception in task (if any) is raised here try: # gather schema from all manifests, validate consistency and combine - self.update_table(schema, result[0]) + self.update_schema(schema, result[0]) summary.schema_updates.extend(result.schema_updates) summary.file_metrics.extend(result.file_metrics) # update metrics @@ -277,7 +277,7 @@ def map_single(self, schema: Schema, load_id: str, files: Sequence[str]) -> TWor load_id, files, ) - self.update_table(schema, result.schema_updates) + self.update_schema(schema, result.schema_updates) self.collector.update("Files", len(result.file_metrics)) self.collector.update( "Items", sum(result.file_metrics, EMPTY_DATA_WRITER_METRICS).items_count diff --git a/tests/common/normalizers/test_json_relational.py b/tests/common/normalizers/test_json_relational.py index 502ce619dd..cee9988ce2 100644 --- a/tests/common/normalizers/test_json_relational.py +++ b/tests/common/normalizers/test_json_relational.py @@ -4,14 +4,13 @@ from dlt.common.normalizers.naming import NamingConvention from dlt.common.schema.typing import TSimpleRegex from dlt.common.utils import digest128, uniq_id -from dlt.common.schema import Schema, TTableSchema +from dlt.common.schema import Schema from dlt.common.schema.utils import new_table from dlt.common.normalizers.json.relational import ( RelationalNormalizerConfigPropagation, DataItemNormalizer as RelationalNormalizer, DLT_ID_LENGTH_BYTES, - TDataItemRow, ) # _flatten, _get_child_row_hash, _normalize_row, normalize_data_item, diff --git a/tests/common/schema/test_inference.py b/tests/common/schema/test_inference.py index da5c809827..dce9eba149 100644 --- a/tests/common/schema/test_inference.py +++ b/tests/common/schema/test_inference.py @@ -1,3 +1,4 @@ +import os import pytest from copy import deepcopy from typing import Any, List @@ -14,6 +15,7 @@ TablePropertiesConflictException, ) from tests.common.utils import load_json_case +from tests.utils import preserve_environ @pytest.fixture @@ -204,11 +206,10 @@ def test_shorten_variant_column(schema: Schema) -> None: } _, new_table = schema.coerce_row("event_user", None, row_1) # schema assumes that identifiers are already normalized so confidence even if it is longer than 9 chars - schema.update_table(new_table) + schema.update_table(new_table, normalize_identifiers=False) assert "confidence" in schema.tables["event_user"]["columns"] # confidence_123456 # now variant is created and this will be normalized - # TODO: we should move the handling of variants to normalizer new_row_2, new_table = schema.coerce_row("event_user", None, {"confidence": False}) tag = schema.naming._compute_tag( "confidence__v_bool", collision_prob=schema.naming._DEFAULT_COLLISION_PROB @@ -219,6 +220,9 @@ def test_shorten_variant_column(schema: Schema) -> None: def test_coerce_complex_variant(schema: Schema) -> None: + # for this test use case sensitive naming convention + os.environ["SCHEMA__NAMING"] = "direct" + schema.update_normalizers() # create two columns to which complex type cannot be coerced row = {"floatX": 78172.128, "confidenceX": 1.2, "strX": "STR"} new_row, new_table = schema.coerce_row("event_user", None, row) @@ -252,12 +256,12 @@ def test_coerce_complex_variant(schema: Schema) -> None: c_new_columns_v = list(c_new_table_v["columns"].values()) # two new variant columns added assert len(c_new_columns_v) == 2 - assert c_new_columns_v[0]["name"] == "floatX__v_complex" - assert c_new_columns_v[1]["name"] == "confidenceX__v_complex" + assert c_new_columns_v[0]["name"] == "floatX▶v_complex" + assert c_new_columns_v[1]["name"] == "confidenceX▶v_complex" assert c_new_columns_v[0]["variant"] is True assert c_new_columns_v[1]["variant"] is True - assert c_new_row_v["floatX__v_complex"] == v_list - assert c_new_row_v["confidenceX__v_complex"] == v_dict + assert c_new_row_v["floatX▶v_complex"] == v_list + assert c_new_row_v["confidenceX▶v_complex"] == v_dict assert c_new_row_v["strX"] == json.dumps(v_dict) schema.update_table(c_new_table_v) @@ -265,8 +269,8 @@ def test_coerce_complex_variant(schema: Schema) -> None: c_row_v = {"floatX": v_list, "confidenceX": v_dict, "strX": v_dict} c_new_row_v, c_new_table_v = schema.coerce_row("event_user", None, c_row_v) assert c_new_table_v is None - assert c_new_row_v["floatX__v_complex"] == v_list - assert c_new_row_v["confidenceX__v_complex"] == v_dict + assert c_new_row_v["floatX▶v_complex"] == v_list + assert c_new_row_v["confidenceX▶v_complex"] == v_dict assert c_new_row_v["strX"] == json.dumps(v_dict) @@ -539,7 +543,7 @@ def test_infer_on_incomplete_column(schema: Schema) -> None: incomplete_col["primary_key"] = True incomplete_col["x-special"] = "spec" # type: ignore[typeddict-unknown-key] table = utils.new_table("table", columns=[incomplete_col]) - schema.update_table(table) + schema.update_table(table, normalize_identifiers=False) # make sure that column is still incomplete and has no default hints assert schema.get_table("table")["columns"]["I"] == { "name": "I", diff --git a/tests/common/schema/test_schema.py b/tests/common/schema/test_schema.py index ba817b946f..6b79517a49 100644 --- a/tests/common/schema/test_schema.py +++ b/tests/common/schema/test_schema.py @@ -293,7 +293,7 @@ def test_save_load_incomplete_column( incomplete_col["primary_key"] = True incomplete_col["x-special"] = "spec" # type: ignore[typeddict-unknown-key] table = utils.new_table("table", columns=[incomplete_col]) - schema.update_table(table) + schema.update_table(table, normalize_identifiers=False) schema_storage_no_import.save_schema(schema) schema_copy = schema_storage_no_import.load_schema("event") assert schema_copy.get_table("table")["columns"]["I"] == { @@ -736,20 +736,21 @@ def test_group_tables_by_resource(schema: Schema) -> None: schema.update_table(utils.new_table("a_events", columns=[])) schema.update_table(utils.new_table("b_events", columns=[])) schema.update_table(utils.new_table("c_products", columns=[], resource="products")) - schema.update_table(utils.new_table("a_events__1", columns=[], parent_table_name="a_events")) + schema.update_table(utils.new_table("a_events___1", columns=[], parent_table_name="a_events")) schema.update_table( - utils.new_table("a_events__1__2", columns=[], parent_table_name="a_events__1") + utils.new_table("a_events___1___2", columns=[], parent_table_name="a_events___1") ) - schema.update_table(utils.new_table("b_events__1", columns=[], parent_table_name="b_events")) + schema.update_table(utils.new_table("b_events___1", columns=[], parent_table_name="b_events")) + # print(schema.to_pretty_yaml()) # All resources without filter expected_tables = { "a_events": [ schema.tables["a_events"], - schema.tables["a_events__1"], - schema.tables["a_events__1__2"], + schema.tables["a_events___1"], + schema.tables["a_events___1___2"], ], - "b_events": [schema.tables["b_events"], schema.tables["b_events__1"]], + "b_events": [schema.tables["b_events"], schema.tables["b_events___1"]], "products": [schema.tables["c_products"]], "_dlt_version": [schema.tables["_dlt_version"]], "_dlt_loads": [schema.tables["_dlt_loads"]], @@ -764,10 +765,10 @@ def test_group_tables_by_resource(schema: Schema) -> None: assert result == { "a_events": [ schema.tables["a_events"], - schema.tables["a_events__1"], - schema.tables["a_events__1__2"], + schema.tables["a_events___1"], + schema.tables["a_events___1___2"], ], - "b_events": [schema.tables["b_events"], schema.tables["b_events__1"]], + "b_events": [schema.tables["b_events"], schema.tables["b_events___1"]], } # With resources that has many top level tables From 8835023c3cf9c6e7e5553c49ec645802c3dd203d Mon Sep 17 00:00:00 2001 From: Marcin Rudolf Date: Sun, 25 Feb 2024 13:46:32 +0100 Subject: [PATCH 017/105] fixes sql job client to use normalized identifiers in queries --- dlt/destinations/impl/redshift/redshift.py | 4 +- dlt/destinations/impl/snowflake/snowflake.py | 8 +- dlt/destinations/job_client_impl.py | 77 ++++++++++---------- dlt/destinations/sql_jobs.py | 2 +- 4 files changed, 42 insertions(+), 49 deletions(-) diff --git a/dlt/destinations/impl/redshift/redshift.py b/dlt/destinations/impl/redshift/redshift.py index eaa1968133..3426d96690 100644 --- a/dlt/destinations/impl/redshift/redshift.py +++ b/dlt/destinations/impl/redshift/redshift.py @@ -14,7 +14,7 @@ # from psycopg2.sql import SQL, Composed -from typing import ClassVar, Dict, List, Optional, Sequence, Any +from typing import ClassVar, Dict, List, Optional, Sequence, Any, Tuple from dlt.common.destination import DestinationCapabilitiesContext from dlt.common.destination.reference import ( @@ -24,7 +24,7 @@ ) from dlt.common.data_types import TDataType from dlt.common.schema import TColumnSchema, TColumnHint, Schema -from dlt.common.schema.typing import TTableSchema, TColumnType, TTableFormat +from dlt.common.schema.typing import TTableSchema, TColumnType, TTableFormat, TTableSchemaColumns from dlt.common.configuration.specs import AwsCredentialsWithoutDefaults from dlt.destinations.insert_job_client import InsertValuesJobClient diff --git a/dlt/destinations/impl/snowflake/snowflake.py b/dlt/destinations/impl/snowflake/snowflake.py index 7fafbf83b7..6f454f070a 100644 --- a/dlt/destinations/impl/snowflake/snowflake.py +++ b/dlt/destinations/impl/snowflake/snowflake.py @@ -262,10 +262,4 @@ def _get_column_def_sql(self, c: TColumnSchema, table_format: TTableFormat = Non def get_storage_table(self, table_name: str) -> Tuple[bool, TTableSchemaColumns]: table_name = table_name.upper() # All snowflake tables are uppercased in information schema - exists, table = super().get_storage_table(table_name) - if not exists: - return exists, table - # Snowflake converts all unquoted columns to UPPER CASE - # Convert back to lower case to enable comparison with dlt schema - table = {col_name.lower(): dict(col, name=col_name.lower()) for col_name, col in table.items()} # type: ignore - return exists, table + return super().get_storage_table(table_name) diff --git a/dlt/destinations/job_client_impl.py b/dlt/destinations/job_client_impl.py index 7896fa2cc4..933ed758e3 100644 --- a/dlt/destinations/job_client_impl.py +++ b/dlt/destinations/job_client_impl.py @@ -24,15 +24,14 @@ import re from dlt.common import json, pendulum, logger -from dlt.common.data_types import TDataType from dlt.common.schema.typing import ( COLUMN_HINTS, TColumnType, TColumnSchemaBase, TTableSchema, - TWriteDisposition, TTableFormat, ) +from dlt.common.schema.utils import pipeline_state_table from dlt.common.storages import FileStorage from dlt.common.schema import TColumnSchema, Schema, TTableSchemaColumns, TSchemaTables from dlt.common.destination.reference import ( @@ -41,7 +40,6 @@ WithStateSync, DestinationClientConfiguration, DestinationClientDwhConfiguration, - DestinationClientDwhWithStagingConfiguration, NewLoadJob, WithStagingDataset, TLoadJobState, @@ -53,11 +51,9 @@ from dlt.destinations.exceptions import ( DatabaseUndefinedRelation, DestinationSchemaTampered, - DestinationSchemaWillNotUpdate, ) from dlt.destinations.job_impl import EmptyLoadJobWithoutFollowup, NewReferenceJob from dlt.destinations.sql_jobs import SqlMergeJob, SqlStagingCopyJob -from dlt.common.schema.typing import LOADS_TABLE_NAME, VERSION_TABLE_NAME from dlt.destinations.typing import TNativeConn from dlt.destinations.sql_client import SqlClientBase @@ -135,22 +131,6 @@ def state(self) -> TLoadJobState: class SqlJobClientBase(JobClientBase, WithStateSync): - _VERSION_TABLE_SCHEMA_COLUMNS: ClassVar[Tuple[str, ...]] = ( - "version_hash", - "schema_name", - "version", - "engine_version", - "inserted_at", - "schema", - ) - _STATE_TABLE_COLUMNS: ClassVar[Tuple[str, ...]] = ( - "version", - "engine_version", - "pipeline_name", - "state", - "created_at", - "_dlt_load_id", - ) def __init__( self, @@ -159,12 +139,16 @@ def __init__( sql_client: SqlClientBase[TNativeConn], ) -> None: self.version_table_schema_columns = ", ".join( - sql_client.escape_column_name(col) for col in self._VERSION_TABLE_SCHEMA_COLUMNS + sql_client.escape_column_name(col) for col in schema.get_table_columns(schema.version_table_name) ) + self.loads_table_schema_columns = ", ".join( + sql_client.escape_column_name(col) for col in schema.get_table_columns(schema.loads_table_name) + ) + # get definition of state table (may not be present in the schema) + state_table = schema.tables.get(schema.state_table_name, schema.normalize_table_identifiers(pipeline_state_table())) self.state_table_columns = ", ".join( - sql_client.escape_column_name(col) for col in self._STATE_TABLE_COLUMNS + sql_client.escape_column_name(col) for col in state_table["columns"] ) - super().__init__(schema, config) self.sql_client = sql_client assert isinstance(config, DestinationClientDwhConfiguration) @@ -281,7 +265,7 @@ def complete_load(self, load_id: str) -> None: name = self.sql_client.make_qualified_table_name(self.schema.loads_table_name) now_ts = pendulum.now() self.sql_client.execute_sql( - f"INSERT INTO {name}(load_id, schema_name, status, inserted_at, schema_version_hash)" + f"INSERT INTO {name}({self.loads_table_schema_columns})" " VALUES(%s, %s, %s, %s, %s);", load_id, self.schema.name, @@ -328,7 +312,7 @@ def _null_to_bool(v: str) -> bool: query += "table_catalog = %s AND " query += "table_schema = %s AND table_name = %s ORDER BY ordinal_position;" rows = self.sql_client.execute_sql(query, *db_params) - + print(rows) # if no rows we assume that table does not exist schema_table: TTableSchemaColumns = {} if len(rows) == 0: @@ -336,16 +320,17 @@ def _null_to_bool(v: str) -> bool: return False, schema_table # TODO: pull more data to infer indexes, PK and uniques attributes/constraints for c in rows: + col_name = self.schema.naming.normalize_identifier(c[0]) numeric_precision = ( c[3] if self.capabilities.schema_supports_numeric_precision else None ) numeric_scale = c[4] if self.capabilities.schema_supports_numeric_precision else None schema_c: TColumnSchemaBase = { - "name": c[0], + "name": col_name, "nullable": _null_to_bool(c[2]), **self._from_db_type(c[1], numeric_precision, numeric_scale), } - schema_table[c[0]] = schema_c # type: ignore + schema_table[col_name] = schema_c # type: ignore return True, schema_table @abstractmethod @@ -356,19 +341,28 @@ def _from_db_type( def get_stored_schema(self) -> StorageSchemaInfo: name = self.sql_client.make_qualified_table_name(self.schema.version_table_name) + c_schema_name, c_inserted_at = self._norm_and_escape_columns("schema_name", "inserted_at") + # c_schema_name = self.schema.naming.normalize_identifier("schema_name") + # c_inserted_at = self.schema.naming.normalize_identifier("inserted_at") query = ( - f"SELECT {self.version_table_schema_columns} FROM {name} WHERE schema_name = %s ORDER" - " BY inserted_at DESC;" + f"SELECT {self.version_table_schema_columns} FROM {name} WHERE {c_schema_name} = %s ORDER" + f" BY {c_inserted_at} DESC;" ) return self._row_to_schema_info(query, self.schema.name) def get_stored_state(self, pipeline_name: str) -> StateInfo: state_table = self.sql_client.make_qualified_table_name(self.schema.state_table_name) loads_table = self.sql_client.make_qualified_table_name(self.schema.loads_table_name) + c_load_id, c_dlt_load_id, c_pipeline_name, c_status, c_created_at = self._norm_and_escape_columns("load_id", "_dlt_load_id", "pipeline_name", "status", "created_at") + # c_load_id = self.schema.naming.normalize_identifier("load_id") + # c_dlt_load_id = self.schema.naming.normalize_identifier("_dlt_load_id") + # c_pipeline_name = self.schema.naming.normalize_identifier("pipeline_name") + # c_status = self.schema.naming.normalize_identifier("status") + # c_created_at = self.schema.naming.normalize_identifier("created_at") query = ( f"SELECT {self.state_table_columns} FROM {state_table} AS s JOIN {loads_table} AS l ON" - " l.load_id = s._dlt_load_id WHERE pipeline_name = %s AND l.status = 0 ORDER BY" - " created_at DESC" + f" l.{c_load_id} = s.{c_dlt_load_id} WHERE {c_pipeline_name} = %s AND l.{c_status} = 0 ORDER BY" + f" {c_created_at} DESC" ) with self.sql_client.execute_query(query, pipeline_name) as cur: row = cur.fetchone() @@ -376,6 +370,9 @@ def get_stored_state(self, pipeline_name: str) -> StateInfo: return None return StateInfo(row[0], row[1], row[2], row[3], pendulum.instance(row[4])) + def _norm_and_escape_columns(self, *columns: str): + return map(self.sql_client.escape_column_name, map(self.schema.naming.normalize_identifier, columns)) + # def get_stored_states(self, state_table: str) -> List[StateInfo]: # """Loads list of compressed states from destination storage, optionally filtered by pipeline name""" # query = f"SELECT {self.STATE_TABLE_COLUMNS} FROM {state_table} AS s ORDER BY created_at DESC" @@ -386,8 +383,9 @@ def get_stored_state(self, pipeline_name: str) -> StateInfo: # return result def get_stored_schema_by_hash(self, version_hash: str) -> StorageSchemaInfo: - name = self.sql_client.make_qualified_table_name(self.schema.version_table_name) - query = f"SELECT {self.version_table_schema_columns} FROM {name} WHERE version_hash = %s;" + table_name = self.sql_client.make_qualified_table_name(self.schema.version_table_name) + c_version_hash, = self._norm_and_escape_columns("version_hash") + query = f"SELECT {self.version_table_schema_columns} FROM {table_name} WHERE {c_version_hash} = %s;" return self._row_to_schema_info(query, version_hash) def _execute_schema_update_sql(self, only_tables: Iterable[str]) -> TSchemaTables: @@ -526,16 +524,17 @@ def _row_to_schema_info(self, query: str, *args: Any) -> StorageSchemaInfo: pass # make utc datetime - inserted_at = pendulum.instance(row[4]) + inserted_at = pendulum.instance(row[2]) - return StorageSchemaInfo(row[0], row[1], row[2], row[3], inserted_at, schema_str) + return StorageSchemaInfo(row[4], row[3], row[0], row[1], inserted_at, schema_str) def _replace_schema_in_storage(self, schema: Schema) -> None: """ Save the given schema in storage and remove all previous versions with the same name """ name = self.sql_client.make_qualified_table_name(self.schema.version_table_name) - self.sql_client.execute_sql(f"DELETE FROM {name} WHERE schema_name = %s;", schema.name) + c_schema_name, = self._norm_and_escape_columns("schema_name") + self.sql_client.execute_sql(f"DELETE FROM {name} WHERE {c_schema_name} = %s;", schema.name) self._update_schema_in_storage(schema) def _update_schema_in_storage(self, schema: Schema) -> None: @@ -559,11 +558,11 @@ def _commit_schema_update(self, schema: Schema, schema_str: str) -> None: self.sql_client.execute_sql( f"INSERT INTO {name}({self.version_table_schema_columns}) VALUES (%s, %s, %s, %s, %s," " %s);", - schema.stored_version_hash, - schema.name, schema.version, schema.ENGINE_VERSION, now_ts, + schema.name, + schema.stored_version_hash, schema_str, ) diff --git a/dlt/destinations/sql_jobs.py b/dlt/destinations/sql_jobs.py index 215bcf9fe5..f81576c8d3 100644 --- a/dlt/destinations/sql_jobs.py +++ b/dlt/destinations/sql_jobs.py @@ -415,7 +415,7 @@ def gen_merge_sql( ) ) - # delete from top table now that child tables have been prcessed + # delete from top table now that child tables have been processed sql.append( cls.gen_delete_from_sql( root_table_name, unique_column, delete_temp_table_name, unique_column From f4c504f6ed6c156caceaaefdc866f30638a016e9 Mon Sep 17 00:00:00 2001 From: Marcin Rudolf Date: Sun, 25 Feb 2024 13:47:52 +0100 Subject: [PATCH 018/105] runs state sync tests for lower and upper case naming conventions --- tests/load/pipeline/test_restore_state.py | 26 ++++++++++++++++------- 1 file changed, 18 insertions(+), 8 deletions(-) diff --git a/tests/load/pipeline/test_restore_state.py b/tests/load/pipeline/test_restore_state.py index 73c651688d..66fa8e8927 100644 --- a/tests/load/pipeline/test_restore_state.py +++ b/tests/load/pipeline/test_restore_state.py @@ -7,13 +7,13 @@ import dlt from dlt.common import pendulum from dlt.common.schema.schema import Schema, utils -from dlt.common.utils import custom_environ, uniq_id +from dlt.common.utils import uniq_id from dlt.common.exceptions import DestinationUndefinedEntity + from dlt.load import Load from dlt.pipeline.exceptions import SqlClientNotAvailable - from dlt.pipeline.pipeline import Pipeline -from dlt.pipeline.state_sync import STATE_TABLE_COLUMNS, load_state_from_destination, state_resource +from dlt.pipeline.state_sync import load_state_from_destination, state_resource from dlt.destinations.job_client_impl import SqlJobClientBase from tests.utils import TEST_STORAGE_ROOT @@ -72,7 +72,7 @@ def test_restore_state_utils(destination_config: DestinationTestConfiguration) - columns={ "_dlt_id": {"name": "_dlt_id", "data_type": "text", "nullable": False}, "_dlt_load_id": {"name": "_dlt_load_id", "data_type": "text", "nullable": False}, - **STATE_TABLE_COLUMNS, + **utils.pipeline_state_table()["columns"], } ) schema.update_table(schema.normalize_table_identifiers(resource.compute_table_schema())) @@ -179,9 +179,13 @@ def test_silently_skip_on_invalid_credentials( ids=lambda x: x.name, ) @pytest.mark.parametrize("use_single_dataset", [True, False]) +@pytest.mark.parametrize("naming_convention", ["sql_upper", "snake_case"]) def test_get_schemas_from_destination( - destination_config: DestinationTestConfiguration, use_single_dataset: bool + destination_config: DestinationTestConfiguration, use_single_dataset: bool, naming_convention: str ) -> None: + # use specific naming convention + os.environ["SCHEMA__NAMING"] = naming_convention + pipeline_name = "pipe_" + uniq_id() dataset_name = "state_test_" + uniq_id() @@ -260,7 +264,11 @@ def _make_dn_name(schema_name: str) -> str: destinations_configs(default_sql_configs=True, default_vector_configs=True), ids=lambda x: x.name, ) -def test_restore_state_pipeline(destination_config: DestinationTestConfiguration) -> None: +@pytest.mark.parametrize("naming_convention", ["sql_upper"]) +def test_restore_state_pipeline(destination_config: DestinationTestConfiguration, naming_convention: str) -> None: + # use specific naming convention + os.environ["SCHEMA__NAMING"] = naming_convention + # enable restoring from destination os.environ["RESTORE_FROM_DESTINATION"] = "True" pipeline_name = "pipe_" + uniq_id() dataset_name = "state_test_" + uniq_id() @@ -580,10 +588,12 @@ def some_data(param: str) -> Any: # get all the states, notice version 4 twice (one from production, the other from local) try: with p.sql_client() as client: + # use sql_client to escape identifiers properly state_table = client.make_qualified_table_name(p.default_schema.state_table_name) - + c_version = client.escape_column_name(p.default_schema.naming.normalize_identifier("version")) + c_created_at = client.escape_column_name(p.default_schema.naming.normalize_identifier("created_at")) assert_query_data( - p, f"SELECT version FROM {state_table} ORDER BY created_at DESC", [5, 4, 4, 3, 2] + p, f"SELECT {c_version} FROM {state_table} ORDER BY {c_created_at} DESC", [5, 4, 4, 3, 2] ) except SqlClientNotAvailable: pytest.skip(f"destination {destination_config.destination} does not support sql client") From 874cc29edbb695184a0f81063cf52e20bda9f57f Mon Sep 17 00:00:00 2001 From: Marcin Rudolf Date: Sun, 25 Feb 2024 13:48:59 +0100 Subject: [PATCH 019/105] fixes weaviate to use normalized identifiers in queries --- .../impl/weaviate/weaviate_client.py | 60 +++++++++---------- 1 file changed, 29 insertions(+), 31 deletions(-) diff --git a/dlt/destinations/impl/weaviate/weaviate_client.py b/dlt/destinations/impl/weaviate/weaviate_client.py index 2d23dc38f7..d00746151a 100644 --- a/dlt/destinations/impl/weaviate/weaviate_client.py +++ b/dlt/destinations/impl/weaviate/weaviate_client.py @@ -29,7 +29,7 @@ from dlt.common.time import ensure_pendulum_datetime from dlt.common.schema import Schema, TTableSchema, TSchemaTables, TTableSchemaColumns from dlt.common.schema.typing import TColumnSchema, TColumnType -from dlt.common.schema.utils import get_columns_names_with_prop +from dlt.common.schema.utils import get_columns_names_with_prop, pipeline_state_table from dlt.common.destination import DestinationCapabilitiesContext from dlt.common.destination.reference import TLoadJobState, LoadJob, JobClientBase, WithStateSync from dlt.common.data_types import TDataType @@ -232,17 +232,15 @@ class WeaviateClient(JobClientBase, WithStateSync): """Weaviate client implementation.""" capabilities: ClassVar[DestinationCapabilitiesContext] = capabilities() - state_properties: ClassVar[List[str]] = [ - "version", - "engine_version", - "pipeline_name", - "state", - "created_at", - "_dlt_load_id", - ] def __init__(self, schema: Schema, config: WeaviateClientConfiguration) -> None: super().__init__(schema, config) + self.version_collection_properties = list(schema.get_table_columns(schema.version_table_name).keys()) + self.loads_collection_properties = list(schema.get_table_columns(schema.loads_table_name).keys()) + # get definition of state table (may not be present in the schema) + state_table = schema.tables.get(schema.state_table_name, schema.normalize_table_identifiers(pipeline_state_table())) + # column names are pipeline properties + self.pipeline_state_properties = list(state_table["columns"].keys()) self.config: WeaviateClientConfiguration = config self.db_client = self.create_db_client(config) @@ -482,6 +480,11 @@ def get_storage_table(self, table_name: str) -> Tuple[bool, TTableSchemaColumns] def get_stored_state(self, pipeline_name: str) -> Optional[StateInfo]: """Loads compressed state from destination storage""" + # normalize properties + p_load_id = self.schema.naming.normalize_identifier("load_id") + p_pipeline_name = self.schema.naming.normalize_identifier("pipeline_name") + p_created_at = self.schema.naming.normalize_identifier("created_at") + p_status = self.schema.naming.normalize_identifier("status") # we need to find a stored state that matches a load id that was completed # we retrieve the state in blocks of 10 for this @@ -490,15 +493,15 @@ def get_stored_state(self, pipeline_name: str) -> Optional[StateInfo]: while True: state_records = self.get_records( self.schema.state_table_name, - sort={"path": ["created_at"], "order": "desc"}, + sort={"path": [p_created_at], "order": "desc"}, where={ - "path": ["pipeline_name"], + "path": [p_pipeline_name], "operator": "Equal", "valueString": pipeline_name, }, limit=stepsize, offset=offset, - properties=self.state_properties, + properties=self.pipeline_state_properties, ) offset += stepsize if len(state_records) == 0: @@ -508,12 +511,12 @@ def get_stored_state(self, pipeline_name: str) -> Optional[StateInfo]: load_records = self.get_records( self.schema.loads_table_name, where={ - "path": ["load_id"], + "path": [p_load_id], "operator": "Equal", "valueString": load_id, }, limit=1, - properties=["load_id", "status"], + properties=[p_load_id, p_status], ) # if there is a load for this state which was successful, return the state if len(load_records): @@ -533,12 +536,14 @@ def get_stored_state(self, pipeline_name: str) -> Optional[StateInfo]: def get_stored_schema(self) -> Optional[StorageSchemaInfo]: """Retrieves newest schema from destination storage""" + p_schema_name = self.schema.naming.normalize_identifier("schema_name") + p_inserted_at = self.schema.naming.normalize_identifier("inserted_at") try: record = self.get_records( self.schema.version_table_name, - sort={"path": ["inserted_at"], "order": "desc"}, + sort={"path": [p_inserted_at], "order": "desc"}, where={ - "path": ["schema_name"], + "path": [p_schema_name], "operator": "Equal", "valueString": self.schema.name, }, @@ -549,11 +554,12 @@ def get_stored_schema(self) -> Optional[StorageSchemaInfo]: return None def get_stored_schema_by_hash(self, schema_hash: str) -> Optional[StorageSchemaInfo]: + p_version_hash = self.schema.naming.normalize_identifier("version_hash") try: record = self.get_records( self.schema.version_table_name, where={ - "path": ["version_hash"], + "path": [p_version_hash], "operator": "Equal", "valueString": schema_hash, }, @@ -660,12 +666,9 @@ def restore_file_load(self, file_path: str) -> LoadJob: @wrap_weaviate_error def complete_load(self, load_id: str) -> None: - properties = { - "load_id": load_id, - "schema_name": self.schema.name, - "status": 0, - "inserted_at": pendulum.now().isoformat(), - } + values = [load_id, self.schema.name, 0, pendulum.now().isoformat()] + assert len(values) == len(self.loads_collection_properties) + properties = {k:v for k,v in zip(self.loads_collection_properties, values)} self.create_object(properties, self.schema.loads_table_name) def __enter__(self) -> "WeaviateClient": @@ -681,14 +684,9 @@ def __exit__( def _update_schema_in_storage(self, schema: Schema) -> None: schema_str = json.dumps(schema.to_dict()) - properties = { - "version_hash": schema.stored_version_hash, - "schema_name": schema.name, - "version": schema.version, - "engine_version": schema.ENGINE_VERSION, - "inserted_at": pendulum.now().isoformat(), - "schema": schema_str, - } + values = [schema.stored_version_hash, schema.name, schema.version, schema.ENGINE_VERSION, str(pendulum.now()), schema_str] + assert len(values) == len(self.version_collection_properties) + properties = {k:v for k,v in zip(self.version_collection_properties, values)} self.create_object(properties, self.schema.version_table_name) def _from_db_type( From c4e9f3532cf03c9211f0ec3ad84d33b47df33f68 Mon Sep 17 00:00:00 2001 From: Marcin Rudolf Date: Sun, 25 Feb 2024 13:49:30 +0100 Subject: [PATCH 020/105] partially fixes qdrant incorrect state and version retrieval queries --- dlt/destinations/impl/qdrant/qdrant_client.py | 74 ++++++++++--------- 1 file changed, 41 insertions(+), 33 deletions(-) diff --git a/dlt/destinations/impl/qdrant/qdrant_client.py b/dlt/destinations/impl/qdrant/qdrant_client.py index 2df3023d86..fcba49883c 100644 --- a/dlt/destinations/impl/qdrant/qdrant_client.py +++ b/dlt/destinations/impl/qdrant/qdrant_client.py @@ -1,12 +1,14 @@ + from types import TracebackType from typing import ClassVar, Optional, Sequence, List, Dict, Type, Iterable, Any, IO from dlt.common import json, pendulum, logger from dlt.common.schema import Schema, TTableSchema, TSchemaTables -from dlt.common.schema.utils import get_columns_names_with_prop +from dlt.common.schema.utils import get_columns_names_with_prop, pipeline_state_table from dlt.common.destination import DestinationCapabilitiesContext from dlt.common.destination.reference import TLoadJobState, LoadJob, JobClientBase, WithStateSync from dlt.common.storages import FileStorage +from dlt.common.time import precise_time from dlt.destinations.job_impl import EmptyLoadJob from dlt.destinations.job_client_impl import StorageSchemaInfo, StateInfo @@ -124,7 +126,7 @@ def _generate_uuid( collection_name (str): Qdrant collection name. Returns: - str: A string representation of the genrated UUID + str: A string representation of the generated UUID """ data_id = "_".join(str(data[key]) for key in unique_identifiers) return str(uuid.uuid5(uuid.NAMESPACE_DNS, collection_name + data_id)) @@ -140,17 +142,15 @@ class QdrantClient(JobClientBase, WithStateSync): """Qdrant Destination Handler""" capabilities: ClassVar[DestinationCapabilitiesContext] = capabilities() - state_properties: ClassVar[List[str]] = [ - "version", - "engine_version", - "pipeline_name", - "state", - "created_at", - "_dlt_load_id", - ] def __init__(self, schema: Schema, config: QdrantClientConfiguration) -> None: super().__init__(schema, config) + self.version_collection_properties = list(schema.get_table_columns(schema.version_table_name).keys()) + self.loads_collection_properties = list(schema.get_table_columns(schema.loads_table_name).keys()) + # get definition of state table (may not be present in the schema) + state_table = schema.tables.get(schema.state_table_name, schema.normalize_table_identifiers(pipeline_state_table())) + # column names are pipeline properties + self.pipeline_state_properties = list(state_table["columns"].keys()) self.config: QdrantClientConfiguration = config self.db_client: QC = QdrantClient._create_db_client(config) self.model = config.model @@ -215,18 +215,21 @@ def _create_collection(self, full_collection_name: str) -> None: collection_name=full_collection_name, vectors_config=vectors_config ) - def _create_point(self, obj: Dict[str, Any], collection_name: str) -> None: + def _create_point_no_vector(self, obj: Dict[str, Any], collection_name: str) -> None: """Inserts a point into a Qdrant collection without a vector. Args: obj (Dict[str, Any]): The arbitrary data to be inserted as payload. collection_name (str): The name of the collection to insert the point into. """ + # we want decreased ids because the point scroll functions orders by id ASC + # so we want newest first + id_ = 2**64 - int(precise_time() * 10**6) self.db_client.upsert( collection_name, points=[ models.PointStruct( - id=str(uuid.uuid4()), + id=id_, payload=obj, vector={}, ) @@ -303,6 +306,15 @@ def get_stored_state(self, pipeline_name: str) -> Optional[StateInfo]: """Loads compressed state from destination storage By finding a load id that was completed """ + # normalize property names + p_load_id = self.schema.naming.normalize_identifier("load_id") + p_pipeline_name = self.schema.naming.normalize_identifier("pipeline_name") + + # this works only because we create points that have no vectors + # with decreasing ids. so newest (lowest ids) go first + # TODO: this does not work because we look for state first and state has UUID4 + # TODO: look for 10 last load ids and find the state associated with them + limit = 10 offset = None while True: @@ -312,11 +324,11 @@ def get_stored_state(self, pipeline_name: str) -> Optional[StateInfo]: ) state_records, offset = self.db_client.scroll( scroll_table_name, - with_payload=self.state_properties, + with_payload=self.pipeline_state_properties, scroll_filter=models.Filter( must=[ models.FieldCondition( - key="pipeline_name", match=models.MatchValue(value=pipeline_name) + key=p_pipeline_name, match=models.MatchValue(value=pipeline_name) ) ] ), @@ -337,7 +349,7 @@ def get_stored_state(self, pipeline_name: str) -> Optional[StateInfo]: count_filter=models.Filter( must=[ models.FieldCondition( - key="load_id", match=models.MatchValue(value=load_id) + key=p_load_id, match=models.MatchValue(value=load_id) ) ] ), @@ -352,13 +364,16 @@ def get_stored_schema(self) -> Optional[StorageSchemaInfo]: """Retrieves newest schema from destination storage""" try: scroll_table_name = self._make_qualified_collection_name(self.schema.version_table_name) + p_schema_name = self.schema.naming.normalize_identifier("schema_name") + # this works only because we create points that have no vectors + # with decreasing ids. so newest (lowest ids) go first response = self.db_client.scroll( scroll_table_name, with_payload=True, scroll_filter=models.Filter( must=[ models.FieldCondition( - key="schema_name", + key=p_schema_name, match=models.MatchValue(value=self.schema.name), ) ] @@ -373,13 +388,14 @@ def get_stored_schema(self) -> Optional[StorageSchemaInfo]: def get_stored_schema_by_hash(self, schema_hash: str) -> Optional[StorageSchemaInfo]: try: scroll_table_name = self._make_qualified_collection_name(self.schema.version_table_name) + p_version_hash = self.schema.naming.normalize_identifier("version_hash") response = self.db_client.scroll( scroll_table_name, with_payload=True, scroll_filter=models.Filter( must=[ models.FieldCondition( - key="version_hash", match=models.MatchValue(value=schema_hash) + key=p_version_hash, match=models.MatchValue(value=schema_hash) ) ] ), @@ -403,14 +419,11 @@ def restore_file_load(self, file_path: str) -> LoadJob: return EmptyLoadJob.from_file_path(file_path, "completed") def complete_load(self, load_id: str) -> None: - properties = { - "load_id": load_id, - "schema_name": self.schema.name, - "status": 0, - "inserted_at": str(pendulum.now()), - } + values = [load_id, self.schema.name, 0, str(pendulum.now())] + assert len(values) == len(self.loads_collection_properties) + properties = {k:v for k,v in zip(self.loads_collection_properties, values)} loads_table_name = self._make_qualified_collection_name(self.schema.loads_table_name) - self._create_point(properties, loads_table_name) + self._create_point_no_vector(properties, loads_table_name) def __enter__(self) -> "QdrantClient": return self @@ -425,16 +438,11 @@ def __exit__( def _update_schema_in_storage(self, schema: Schema) -> None: schema_str = json.dumps(schema.to_dict()) - properties = { - "version_hash": schema.stored_version_hash, - "schema_name": schema.name, - "version": schema.version, - "engine_version": schema.ENGINE_VERSION, - "inserted_at": str(pendulum.now()), - "schema": schema_str, - } + values = [schema.stored_version_hash, schema.name, schema.version, schema.ENGINE_VERSION, str(pendulum.now()), schema_str] + assert len(values) == len(self.version_collection_properties) + properties = {k:v for k,v in zip(self.version_collection_properties, values)} version_table_name = self._make_qualified_collection_name(self.schema.version_table_name) - self._create_point(properties, version_table_name) + self._create_point_no_vector(properties, version_table_name) def _execute_schema_update(self, only_tables: Iterable[str]) -> None: for table_name in only_tables or self.schema.tables: From 63453778d03ae1c59abf97697dfe450f3009c900 Mon Sep 17 00:00:00 2001 From: Marcin Rudolf Date: Sun, 25 Feb 2024 13:50:00 +0100 Subject: [PATCH 021/105] initial sql uppercase naming convention --- dlt/common/normalizers/naming/sql_upper.py | 20 ++++++++++++++++++++ tests/load/test_job_client.py | 7 ++++++- tests/load/utils.py | 2 ++ tests/pipeline/test_dlt_versions.py | 12 ++++++------ 4 files changed, 34 insertions(+), 7 deletions(-) create mode 100644 dlt/common/normalizers/naming/sql_upper.py diff --git a/dlt/common/normalizers/naming/sql_upper.py b/dlt/common/normalizers/naming/sql_upper.py new file mode 100644 index 0000000000..992940d9a2 --- /dev/null +++ b/dlt/common/normalizers/naming/sql_upper.py @@ -0,0 +1,20 @@ +from typing import Any, Sequence + +from dlt.common.normalizers.naming.naming import NamingConvention as BaseNamingConvention + + +class NamingConvention(BaseNamingConvention): + PATH_SEPARATOR = "__" + + _CLEANUP_TABLE = str.maketrans(".\n\r'\"▶", "______") + + def normalize_identifier(self, identifier: str) -> str: + identifier = super().normalize_identifier(identifier) + norm_identifier = identifier.translate(self._CLEANUP_TABLE).upper() + return self.shorten_identifier(norm_identifier, identifier, self.max_length) + + def make_path(self, *identifiers: Any) -> str: + return self.PATH_SEPARATOR.join(filter(lambda x: x.strip(), identifiers)) + + def break_path(self, path: str) -> Sequence[str]: + return [ident for ident in path.split(self.PATH_SEPARATOR) if ident.strip()] diff --git a/tests/load/test_job_client.py b/tests/load/test_job_client.py index 63f9d3c28d..3db2acb11d 100644 --- a/tests/load/test_job_client.py +++ b/tests/load/test_job_client.py @@ -29,7 +29,7 @@ from dlt.common.destination.reference import WithStagingDataset from tests.cases import table_update_and_row, assert_all_data_types_row -from tests.utils import TEST_STORAGE_ROOT, autouse_test_storage +from tests.utils import TEST_STORAGE_ROOT, autouse_test_storage, preserve_environ from tests.common.utils import load_json_case from tests.load.utils import ( TABLE_UPDATE, @@ -45,6 +45,10 @@ from tests.load.pipeline.utils import destinations_configs, DestinationTestConfiguration +@pytest.fixture(autouse=True) +def set_environ(): + os.environ["SCHEMA__NAMING"] = "sql_upper" + @pytest.fixture def file_storage() -> FileStorage: return FileStorage(TEST_STORAGE_ROOT, file_type="b", makedirs=True) @@ -374,6 +378,7 @@ def test_get_storage_table_with_all_types(client: SqlJobClientBase) -> None: # now get the actual schema from the db exists, storage_table = client.get_storage_table(table_name) assert exists is True + print(storage_table) # column order must match TABLE_UPDATE storage_columns = list(storage_table.values()) for c, expected_c in zip(TABLE_UPDATE, storage_columns): diff --git a/tests/load/utils.py b/tests/load/utils.py index 50dca88248..877b32fd2f 100644 --- a/tests/load/utils.py +++ b/tests/load/utils.py @@ -488,6 +488,8 @@ def yield_client( ) schema_storage = SchemaStorage(storage_config) schema = schema_storage.load_schema(schema_name) + schema.update_normalizers() + schema.bump_version() # create client and dataset client: SqlJobClientBase = None diff --git a/tests/pipeline/test_dlt_versions.py b/tests/pipeline/test_dlt_versions.py index 8906958e0c..1fecc0eeaa 100644 --- a/tests/pipeline/test_dlt_versions.py +++ b/tests/pipeline/test_dlt_versions.py @@ -14,7 +14,7 @@ from dlt.common.storages import FileStorage from dlt.common.schema.typing import ( LOADS_TABLE_NAME, - STATE_TABLE_NAME, + PIPELINE_STATE_TABLE_NAME, VERSION_TABLE_NAME, TStoredSchema, ) @@ -66,7 +66,7 @@ def test_pipeline_with_dlt_update(test_storage: FileStorage) -> None: ) # check the dlt state table assert { - "version_hash" not in github_schema["tables"][STATE_TABLE_NAME]["columns"] + "version_hash" not in github_schema["tables"][PIPELINE_STATE_TABLE_NAME]["columns"] } # check loads table without attaching to pipeline duckdb_cfg = resolve_configuration( @@ -79,7 +79,7 @@ def test_pipeline_with_dlt_update(test_storage: FileStorage) -> None: assert len(rows[0]) == 4 rows = client.execute_sql("SELECT * FROM issues") assert len(rows) == 20 - rows = client.execute_sql(f"SELECT * FROM {STATE_TABLE_NAME}") + rows = client.execute_sql(f"SELECT * FROM {PIPELINE_STATE_TABLE_NAME}") # only 5 columns + 2 dlt columns assert len(rows[0]) == 5 + 2 # inspect old state @@ -131,7 +131,7 @@ def test_pipeline_with_dlt_update(test_storage: FileStorage) -> None: # two schema versions rows = client.execute_sql(f"SELECT * FROM {VERSION_TABLE_NAME}") assert len(rows) == 2 - rows = client.execute_sql(f"SELECT * FROM {STATE_TABLE_NAME} ORDER BY version") + rows = client.execute_sql(f"SELECT * FROM {PIPELINE_STATE_TABLE_NAME} ORDER BY version") # we have hash columns assert len(rows[0]) == 6 + 2 assert len(rows) == 2 @@ -217,7 +217,7 @@ def test_load_package_with_dlt_update(test_storage: FileStorage) -> None: assert pipeline.state["_version_hash"] is not None # but in db there's no hash - we loaded an old package with backward compatible schema with pipeline.sql_client() as client: - rows = client.execute_sql(f"SELECT * FROM {STATE_TABLE_NAME}") + rows = client.execute_sql(f"SELECT * FROM {PIPELINE_STATE_TABLE_NAME}") # no hash assert len(rows[0]) == 5 + 2 assert len(rows) == 1 @@ -227,7 +227,7 @@ def test_load_package_with_dlt_update(test_storage: FileStorage) -> None: # this will sync schema to destination pipeline.sync_schema() # we have hash now - rows = client.execute_sql(f"SELECT * FROM {STATE_TABLE_NAME}") + rows = client.execute_sql(f"SELECT * FROM {PIPELINE_STATE_TABLE_NAME}") assert len(rows[0]) == 6 + 2 From aef8cc21becc15893f836c8497df1f587756adb2 Mon Sep 17 00:00:00 2001 From: Marcin Rudolf Date: Sat, 9 Mar 2024 01:42:39 +0100 Subject: [PATCH 022/105] adds native df readers to databricks and bigquery --- dlt/destinations/impl/bigquery/sql_client.py | 37 ++++++++++++------ .../impl/databricks/sql_client.py | 38 +++++++++++++------ 2 files changed, 52 insertions(+), 23 deletions(-) diff --git a/dlt/destinations/impl/bigquery/sql_client.py b/dlt/destinations/impl/bigquery/sql_client.py index 95cb7ea73b..5fdbc12029 100644 --- a/dlt/destinations/impl/bigquery/sql_client.py +++ b/dlt/destinations/impl/bigquery/sql_client.py @@ -1,5 +1,5 @@ from contextlib import contextmanager -from typing import Any, AnyStr, ClassVar, Iterator, List, Optional, Sequence +from typing import Any, AnyStr, ClassVar, Iterator, List, Optional, Sequence, Generator import google.cloud.bigquery as bigquery # noqa: I250 from google.api_core import exceptions as api_core_exceptions @@ -8,6 +8,7 @@ from google.cloud.bigquery.dbapi import Connection as DbApiConnection, Cursor as BQDbApiCursor from google.cloud.bigquery.dbapi import exceptions as dbapi_exceptions +from dlt.common import logger from dlt.common.configuration.specs import GcpServiceAccountCredentialsWithoutDefaults from dlt.common.destination import DestinationCapabilitiesContext from dlt.common.typing import StrAny @@ -44,17 +45,30 @@ class BigQueryDBApiCursorImpl(DBApiCursorImpl): """Use native BigQuery data frame support if available""" native_cursor: BQDbApiCursor # type: ignore + df_iterator: Generator[Any, None, None] - def df(self, chunk_size: int = None, **kwargs: Any) -> DataFrame: - if chunk_size is not None: - return super().df(chunk_size=chunk_size) - query_job: bigquery.QueryJob = self.native_cursor._query_job + def __init__(self, curr: DBApiCursor) -> None: + super().__init__(curr) + self.df_iterator = None + def df(self, chunk_size: int = None, **kwargs: Any) -> DataFrame: + query_job: bigquery.QueryJob = self.native_cursor.query_job + if self.df_iterator: + return next(self.df_iterator, None) try: + if chunk_size is not None: + # create iterator with given page size + self.df_iterator = query_job.result(page_size=chunk_size).to_dataframe_iterable() + return next(self.df_iterator, None) return query_job.to_dataframe(**kwargs) - except ValueError: + except ValueError as ex: # no pyarrow/db-types, fallback to our implementation - return super().df() + logger.warning(f"Native BigQuery pandas reader could not be used: {str(ex)}") + return super().df(chunk_size=chunk_size) + + def close(self) -> None: + if self.df_iterator: + self.df_iterator.close() class BigQuerySqlClient(SqlClientBase[bigquery.Client], DBTransaction): @@ -220,12 +234,11 @@ def execute_query(self, query: AnyStr, *args: Any, **kwargs: Any) -> Iterator[DB conn.close() def fully_qualified_dataset_name(self, escape: bool = True) -> str: + project_id = self.capabilities.case_identifier(self.credentials.project_id) + dataset_name = self.capabilities.case_identifier(self.dataset_name) if escape: - project_id = self.capabilities.escape_identifier(self.credentials.project_id) - dataset_name = self.capabilities.escape_identifier(self.dataset_name) - else: - project_id = self.credentials.project_id - dataset_name = self.dataset_name + project_id = self.capabilities.escape_identifier(project_id) + dataset_name = self.capabilities.escape_identifier(dataset_name) return f"{project_id}.{dataset_name}" @classmethod diff --git a/dlt/destinations/impl/databricks/sql_client.py b/dlt/destinations/impl/databricks/sql_client.py index 68ea863cc4..30e8e7c867 100644 --- a/dlt/destinations/impl/databricks/sql_client.py +++ b/dlt/destinations/impl/databricks/sql_client.py @@ -8,8 +8,6 @@ ) from databricks.sql.exc import Error as DatabricksSqlError -from dlt.common import pendulum -from dlt.common import logger from dlt.common.destination import DestinationCapabilitiesContext from dlt.destinations.exceptions import ( DatabaseTerminalException, @@ -22,10 +20,26 @@ raise_database_error, raise_open_connection_error, ) -from dlt.destinations.typing import DBApi, DBApiCursor, DBTransaction +from dlt.destinations.typing import DBApi, DBApiCursor, DBTransaction, DataFrame from dlt.destinations.impl.databricks.configuration import DatabricksCredentials from dlt.destinations.impl.databricks import capabilities -from dlt.common.time import to_py_date, to_py_datetime + + +class DatabricksCursorImpl(DBApiCursorImpl): + """Use native data frame support if available""" + + native_cursor: DatabricksSqlCursor + vector_size: ClassVar[int] = 2048 + + def df(self, chunk_size: int = None, **kwargs: Any) -> DataFrame: + if chunk_size is None: + return self.native_cursor.fetchall_arrow().to_pandas() + else: + df = self.native_cursor.fetchmany_arrow(chunk_size).to_pandas() + if df.shape[0] == 0: + return None + else: + return df class DatabricksSqlClient(SqlClientBase[DatabricksSqlConnection], DBTransaction): @@ -39,7 +53,9 @@ def __init__(self, dataset_name: str, credentials: DatabricksCredentials) -> Non def open_connection(self) -> DatabricksSqlConnection: conn_params = self.credentials.to_connector_params() - self._conn = databricks_lib.connect(**conn_params, schema=self.dataset_name) + self._conn = databricks_lib.connect( + **conn_params, schema=self.dataset_name, use_inline_params="silent" + ) return self._conn @raise_open_connection_error @@ -91,6 +107,7 @@ def execute_sql( def execute_query(self, query: AnyStr, *args: Any, **kwargs: Any) -> Iterator[DBApiCursor]: curr: DBApiCursor = None # TODO: databricks connector 3.0.0 will use :named paramstyle only + # NOTE: we were able to use the old style until they get deprecated # if args: # keys = [f"arg{i}" for i in range(len(args))] # # Replace position arguments (%s) with named arguments (:arg0, :arg1, ...) @@ -114,15 +131,14 @@ def execute_query(self, query: AnyStr, *args: Any, **kwargs: Any) -> Iterator[DB db_args = None with self._conn.cursor() as curr: curr.execute(query, db_args) - yield DBApiCursorImpl(curr) # type: ignore[abstract] + yield DatabricksCursorImpl(curr) # type: ignore[abstract] def fully_qualified_dataset_name(self, escape: bool = True) -> str: + catalog = self.capabilities.case_identifier(self.credentials.catalog) + dataset_name = self.capabilities.case_identifier(self.dataset_name) if escape: - catalog = self.capabilities.escape_identifier(self.credentials.catalog) - dataset_name = self.capabilities.escape_identifier(self.dataset_name) - else: - catalog = self.credentials.catalog - dataset_name = self.dataset_name + catalog = self.capabilities.escape_identifier(catalog) + dataset_name = self.capabilities.escape_identifier(dataset_name) return f"{catalog}.{dataset_name}" @staticmethod From a53c00b49f1f6766921565a2f932f4a301ce7071 Mon Sep 17 00:00:00 2001 From: Marcin Rudolf Date: Sat, 9 Mar 2024 01:44:41 +0100 Subject: [PATCH 023/105] adds casing identifier capability to support different casing in naming conventions, fixes how identifiers are normalized in destinations --- dlt/common/destination/capabilities.py | 3 +++ tests/load/pipeline/test_restore_state.py | 20 +++++++++++++++----- tests/load/test_job_client.py | 7 ++++--- 3 files changed, 22 insertions(+), 8 deletions(-) diff --git a/dlt/common/destination/capabilities.py b/dlt/common/destination/capabilities.py index a78a31fdf3..b3bc8e5109 100644 --- a/dlt/common/destination/capabilities.py +++ b/dlt/common/destination/capabilities.py @@ -36,6 +36,8 @@ class DestinationCapabilitiesContext(ContainerInjectableContext): supported_staging_file_formats: List[TLoaderFileFormat] escape_identifier: Callable[[str], str] escape_literal: Callable[[Any], Any] + case_identifier: Callable[[str], str] = identity + """Controls identifier casing on top of naming convention. Used to generate case insensitive casing.""" decimal_precision: Tuple[int, int] wei_precision: Tuple[int, int] max_identifier_length: int @@ -71,6 +73,7 @@ def generic_capabilities( caps.supported_staging_file_formats = [] caps.escape_identifier = identity caps.escape_literal = serialize_value + caps.case_identifier = identity caps.decimal_precision = (DEFAULT_NUMERIC_PRECISION, DEFAULT_NUMERIC_SCALE) caps.wei_precision = (EVM_DECIMAL_PRECISION, 0) caps.max_identifier_length = 65536 diff --git a/tests/load/pipeline/test_restore_state.py b/tests/load/pipeline/test_restore_state.py index 8c334756db..6d323c94c5 100644 --- a/tests/load/pipeline/test_restore_state.py +++ b/tests/load/pipeline/test_restore_state.py @@ -181,7 +181,9 @@ def test_silently_skip_on_invalid_credentials( @pytest.mark.parametrize("use_single_dataset", [True, False]) @pytest.mark.parametrize("naming_convention", ["sql_upper", "snake_case"]) def test_get_schemas_from_destination( - destination_config: DestinationTestConfiguration, use_single_dataset: bool, naming_convention: str + destination_config: DestinationTestConfiguration, + use_single_dataset: bool, + naming_convention: str, ) -> None: # use specific naming convention os.environ["SCHEMA__NAMING"] = naming_convention @@ -265,7 +267,9 @@ def _make_dn_name(schema_name: str) -> str: ids=lambda x: x.name, ) @pytest.mark.parametrize("naming_convention", ["sql_upper"]) -def test_restore_state_pipeline(destination_config: DestinationTestConfiguration, naming_convention: str) -> None: +def test_restore_state_pipeline( + destination_config: DestinationTestConfiguration, naming_convention: str +) -> None: # use specific naming convention os.environ["SCHEMA__NAMING"] = naming_convention # enable restoring from destination @@ -602,10 +606,16 @@ def some_data(param: str) -> Any: with p.sql_client() as client: # use sql_client to escape identifiers properly state_table = client.make_qualified_table_name(p.default_schema.state_table_name) - c_version = client.escape_column_name(p.default_schema.naming.normalize_identifier("version")) - c_created_at = client.escape_column_name(p.default_schema.naming.normalize_identifier("created_at")) + c_version = client.escape_column_name( + p.default_schema.naming.normalize_identifier("version") + ) + c_created_at = client.escape_column_name( + p.default_schema.naming.normalize_identifier("created_at") + ) assert_query_data( - p, f"SELECT {c_version} FROM {state_table} ORDER BY {c_created_at} DESC", [5, 4, 4, 3, 2] + p, + f"SELECT {c_version} FROM {state_table} ORDER BY {c_created_at} DESC", + [5, 4, 4, 3, 2], ) except SqlClientNotAvailable: pytest.skip(f"destination {destination_config.destination} does not support sql client") diff --git a/tests/load/test_job_client.py b/tests/load/test_job_client.py index 3db2acb11d..91f177b6f6 100644 --- a/tests/load/test_job_client.py +++ b/tests/load/test_job_client.py @@ -45,9 +45,10 @@ from tests.load.pipeline.utils import destinations_configs, DestinationTestConfiguration -@pytest.fixture(autouse=True) -def set_environ(): - os.environ["SCHEMA__NAMING"] = "sql_upper" +# @pytest.fixture(autouse=True) +# def set_environ(): +# os.environ["SCHEMA__NAMING"] = "sql_upper" + @pytest.fixture def file_storage() -> FileStorage: From 91f57802d788dfb424dc910104e03bb216aef9ad Mon Sep 17 00:00:00 2001 From: Marcin Rudolf Date: Sat, 9 Mar 2024 01:44:58 +0100 Subject: [PATCH 024/105] cleans typing for relational normalizer --- dlt/common/data_writers/escape.py | 2 +- dlt/common/normalizers/json/relational.py | 67 ++++++++++------ dlt/common/schema/schema.py | 30 +++++-- dlt/common/schema/utils.py | 8 +- dlt/destinations/impl/athena/__init__.py | 1 + dlt/destinations/impl/athena/athena.py | 6 +- dlt/destinations/impl/bigquery/bigquery.py | 12 +-- .../impl/databricks/databricks.py | 13 ++-- dlt/destinations/impl/duckdb/duck.py | 2 +- dlt/destinations/impl/duckdb/sql_client.py | 7 +- .../impl/motherduck/sql_client.py | 12 +-- dlt/destinations/impl/mssql/mssql.py | 2 +- dlt/destinations/impl/mssql/sql_client.py | 9 +-- dlt/destinations/impl/postgres/__init__.py | 1 + dlt/destinations/impl/postgres/postgres.py | 2 +- dlt/destinations/impl/postgres/sql_client.py | 5 -- dlt/destinations/impl/qdrant/qdrant_client.py | 26 +++++-- dlt/destinations/impl/redshift/__init__.py | 1 + dlt/destinations/impl/redshift/redshift.py | 2 +- dlt/destinations/impl/snowflake/__init__.py | 2 +- dlt/destinations/impl/snowflake/snowflake.py | 8 +- dlt/destinations/impl/snowflake/sql_client.py | 6 -- .../impl/weaviate/weaviate_client.py | 25 ++++-- dlt/destinations/job_client_impl.py | 52 ++++++++----- dlt/destinations/sql_client.py | 13 +++- dlt/extract/source.py | 11 ++- dlt/pipeline/state_sync.py | 6 +- .../normalizers/test_json_relational.py | 78 ++++++++++--------- tests/pipeline/test_dlt_versions.py | 7 +- 29 files changed, 241 insertions(+), 175 deletions(-) diff --git a/dlt/common/data_writers/escape.py b/dlt/common/data_writers/escape.py index 5460657253..38ac304f59 100644 --- a/dlt/common/data_writers/escape.py +++ b/dlt/common/data_writers/escape.py @@ -129,7 +129,7 @@ def escape_bigquery_identifier(v: str) -> str: def escape_snowflake_identifier(v: str) -> str: # Snowcase uppercase all identifiers unless quoted. Match this here so queries on information schema work without issue # See also https://docs.snowflake.com/en/sql-reference/identifiers-syntax#double-quoted-identifiers - return escape_postgres_identifier(v.upper()) + return escape_postgres_identifier(v) escape_databricks_identifier = escape_bigquery_identifier diff --git a/dlt/common/normalizers/json/relational.py b/dlt/common/normalizers/json/relational.py index c55ce888ba..2297e6404f 100644 --- a/dlt/common/normalizers/json/relational.py +++ b/dlt/common/normalizers/json/relational.py @@ -3,7 +3,7 @@ from dlt.common.normalizers.typing import TJSONNormalizer from dlt.common.normalizers.utils import generate_dlt_id, DLT_ID_LENGTH_BYTES -from dlt.common.typing import DictStrAny, DictStrStr, TDataItem, StrAny +from dlt.common.typing import DictStrAny, TDataItem, StrAny from dlt.common.schema import Schema from dlt.common.schema.typing import TColumnSchema, TColumnName, TSimpleRegex from dlt.common.schema.utils import column_name_validator @@ -17,8 +17,8 @@ class RelationalNormalizerConfigPropagation(TypedDict, total=False): - root: Optional[Mapping[TColumnName, TColumnName]] - tables: Optional[Mapping[str, Mapping[TColumnName, TColumnName]]] + root: Optional[Dict[TColumnName, TColumnName]] + tables: Optional[Dict[str, Dict[TColumnName, TColumnName]]] class RelationalNormalizerConfig(TypedDict, total=False): @@ -28,7 +28,6 @@ class RelationalNormalizerConfig(TypedDict, total=False): class DataItemNormalizer(DataItemNormalizerBase[RelationalNormalizerConfig]): - # known normalizer props C_DLT_ID = "_dlt_id" """unique id of current row""" @@ -46,7 +45,6 @@ class DataItemNormalizer(DataItemNormalizerBase[RelationalNormalizerConfig]): # other constants EMPTY_KEY_IDENTIFIER = "_empty" # replace empty keys with this - normalizer_config: RelationalNormalizerConfig propagation_config: RelationalNormalizerConfigPropagation max_nesting: int @@ -61,12 +59,20 @@ def __init__(self, schema: Schema) -> None: def _reset(self) -> None: # normalize known normalizer column identifiers - self.c_dlt_id = self.naming.normalize_identifier(self.C_DLT_ID) - self.c_dlt_load_id = self.naming.normalize_identifier(self.C_DLT_LOAD_ID) - self.c_dlt_root_id = self.naming.normalize_identifier(self.C_DLT_ROOT_ID) - self.c_dlt_parent_id = self.naming.normalize_identifier(self.C_DLT_PARENT_ID) - self.c_dlt_list_idx = self.naming.normalize_identifier(self.C_DLT_LIST_IDX) - self.c_value = self.naming.normalize_identifier(self.C_VALUE) + self.c_dlt_id: TColumnName = TColumnName(self.naming.normalize_identifier(self.C_DLT_ID)) + self.c_dlt_load_id: TColumnName = TColumnName( + self.naming.normalize_identifier(self.C_DLT_LOAD_ID) + ) + self.c_dlt_root_id: TColumnName = TColumnName( + self.naming.normalize_identifier(self.C_DLT_ROOT_ID) + ) + self.c_dlt_parent_id: TColumnName = TColumnName( + self.naming.normalize_identifier(self.C_DLT_PARENT_ID) + ) + self.c_dlt_list_idx: TColumnName = TColumnName( + self.naming.normalize_identifier(self.C_DLT_LIST_IDX) + ) + self.c_value: TColumnName = TColumnName(self.naming.normalize_identifier(self.C_VALUE)) # normalize config @@ -136,7 +142,7 @@ def norm_row_dicts(dict_row: StrAny, __r_lvl: int, path: Tuple[str, ...] = ()) - out_rec_row[child_name] = v norm_row_dicts(dict_row, _r_lvl) - return cast(DictStrAny, out_rec_row), out_rec_list + return out_rec_row, out_rec_list @staticmethod def _get_child_row_hash(parent_row_id: str, child_table: str, list_idx: int) -> str: @@ -154,7 +160,7 @@ def _link_row(row: DictStrAny, parent_row_id: str, list_idx: int) -> DictStrAny: @staticmethod def _extend_row(extend: DictStrAny, row: DictStrAny) -> None: - row.update(extend) # type: ignore + row.update(extend) def _add_row_id( self, table: str, row: DictStrAny, parent_row_id: str, pos: int, _r_lvl: int @@ -167,7 +173,7 @@ def _add_row_id( # child table row deterministic hash row_id = DataItemNormalizer._get_child_row_hash(parent_row_id, table, pos) # link to parent table - DataItemNormalizer._link_row(cast(DictStrAny, row), parent_row_id, pos) + DataItemNormalizer._link_row(row, parent_row_id, pos) row[self.c_dlt_id] = row_id return row_id @@ -177,7 +183,7 @@ def _get_propagated_values(self, table: str, row: DictStrAny, _r_lvl: int) -> St config = self.propagation_config if config: # mapping(k:v): propagate property with name "k" as property with name "v" in child table - mappings: DictStrStr = {} + mappings: Dict[TColumnName, TColumnName] = {} if _r_lvl == 0: mappings.update(config.get("root") or {}) if table in (config.get("tables") or {}): @@ -185,7 +191,7 @@ def _get_propagated_values(self, table: str, row: DictStrAny, _r_lvl: int) -> St # look for keys and create propagation as values for prop_from, prop_as in mappings.items(): if prop_from in row: - extend[prop_as] = row[prop_from] # type: ignore + extend[prop_as] = row[prop_from] return extend @@ -283,7 +289,7 @@ def extend_schema(self) -> None: "root_key": [TSimpleRegex(self.c_dlt_root_id)], "unique": [TSimpleRegex(self.c_dlt_id)], }, - normalize_identifiers=False # already normalized + normalize_identifiers=False, # already normalized ) for table_name in self.schema.tables.keys(): @@ -292,13 +298,21 @@ def extend_schema(self) -> None: def extend_table(self, table_name: str) -> None: """If the table has a merge write disposition, add propagation info to normalizer - Table name should be normalized. + Table name should be normalized. """ table = self.schema.tables.get(table_name) if not table.get("parent") and table.get("write_disposition") == "merge": DataItemNormalizer.update_normalizer_config( self.schema, - {"propagation": {"tables": {table_name: {self.c_dlt_id: TColumnName(self.c_dlt_root_id)}}}}, + { + "propagation": { + "tables": { + table_name: { + TColumnName(self.c_dlt_id): TColumnName(self.c_dlt_root_id) + } + } + } + }, ) def normalize_data_item( @@ -344,8 +358,15 @@ def get_normalizer_config(cls, schema: Schema) -> RelationalNormalizerConfig: def _validate_normalizer_config(schema: Schema, config: RelationalNormalizerConfig) -> None: """Normalizes all known column identifiers according to the schema and then validates the configuration""" - def _normalize_prop(mapping: Mapping[TColumnName, TColumnName]) -> Mapping[TColumnName, TColumnName]: - return {schema.naming.normalize_identifier(from_col): schema.naming.normalize_identifier(to_col) for from_col, to_col in mapping.items()} + def _normalize_prop( + mapping: Mapping[TColumnName, TColumnName] + ) -> Dict[TColumnName, TColumnName]: + return { + TColumnName(schema.naming.normalize_path(from_col)): TColumnName( + schema.naming.normalize_path(to_col) + ) + for from_col, to_col in mapping.items() + } # normalize the identifiers first propagation_config = config.get("propagation") @@ -354,7 +375,9 @@ def _normalize_prop(mapping: Mapping[TColumnName, TColumnName]) -> Mapping[TColu propagation_config["root"] = _normalize_prop(propagation_config["root"]) if "tables" in propagation_config: for table_name in propagation_config["tables"]: - propagation_config["tables"][table_name] = _normalize_prop(propagation_config["tables"][table_name]) + propagation_config["tables"][table_name] = _normalize_prop( + propagation_config["tables"][table_name] + ) validate_dict( RelationalNormalizerConfig, diff --git a/dlt/common/schema/schema.py b/dlt/common/schema/schema.py index df86403abf..4a5040dbe3 100644 --- a/dlt/common/schema/schema.py +++ b/dlt/common/schema/schema.py @@ -404,7 +404,9 @@ def resolve_contract_settings_for_table( # expand settings, empty settings will expand into default settings return Schema.expand_schema_contract_settings(settings) - def update_table(self, partial_table: TPartialTableSchema, normalize_identifiers: bool = True) -> TPartialTableSchema: + def update_table( + self, partial_table: TPartialTableSchema, normalize_identifiers: bool = True + ) -> TPartialTableSchema: """Adds or merges `partial_table` into the schema. Identifiers are normalized by default""" if normalize_identifiers: partial_table = self.normalize_table_identifiers(partial_table) @@ -438,7 +440,7 @@ def update_schema(self, schema: "Schema") -> None: self._settings = deepcopy(schema.settings) self._configure_normalizers(schema._normalizers_config) self._compile_settings() - # update all tables + # update all tables for table in schema.tables.values(): self.update_table(table) @@ -473,7 +475,11 @@ def filter_row_with_hint(self, table_name: str, hint_type: TColumnHint, row: Str # dicts are ordered and we will return the rows with hints in the same order as they appear in the columns return rv_row - def merge_hints(self, new_hints: Mapping[TColumnHint, Sequence[TSimpleRegex]], normalize_identifiers: bool = True) -> None: + def merge_hints( + self, + new_hints: Mapping[TColumnHint, Sequence[TSimpleRegex]], + normalize_identifiers: bool = True, + ) -> None: """Merges existing default hints with `new_hint`. Normalizes names in column regexes if possible""" if normalize_identifiers: new_hints = self._normalize_default_hints(new_hints) @@ -820,13 +826,23 @@ def _add_standard_hints(self) -> None: if type_detections: self._settings["detections"] = type_detections - def _normalize_default_hints(self, default_hints: Mapping[TColumnHint, Sequence[TSimpleRegex]]) -> Mapping[TColumnHint, Sequence[TSimpleRegex]]: + def _normalize_default_hints( + self, default_hints: Mapping[TColumnHint, Sequence[TSimpleRegex]] + ) -> Dict[TColumnHint, List[TSimpleRegex]]: """Normalizes the column names in default hints. In case of column names that are regexes, normalization is skipped""" - return {hint: [utils.normalize_simple_regex_column(self.naming, regex) for regex in regexes] for hint, regexes in default_hints.items()} + return { + hint: [utils.normalize_simple_regex_column(self.naming, regex) for regex in regexes] + for hint, regexes in default_hints.items() + } - def _normalize_preferred_types(self, preferred_types: Dict[TSimpleRegex, TDataType]) -> Dict[TSimpleRegex, TDataType]: + def _normalize_preferred_types( + self, preferred_types: Dict[TSimpleRegex, TDataType] + ) -> Dict[TSimpleRegex, TDataType]: """Normalizes the column names in preferred types mapping. In case of column names that are regexes, normalization is skipped""" - return {utils.normalize_simple_regex_column(self.naming, regex): data_type for regex, data_type in preferred_types.items()} + return { + utils.normalize_simple_regex_column(self.naming, regex): data_type + for regex, data_type in preferred_types.items() + } def _configure_normalizers(self, normalizers: TNormalizersConfig) -> None: # import desired modules diff --git a/dlt/common/schema/utils.py b/dlt/common/schema/utils.py index 7465b49454..369f17b3aa 100644 --- a/dlt/common/schema/utils.py +++ b/dlt/common/schema/utils.py @@ -230,15 +230,15 @@ def _normalize(r_: str) -> str: r_ = r_[1:-1] # if this a simple string then normalize it if r_ == re.escape(r_): - r_ = naming.normalize_identifier(r_) + r_ = naming.normalize_path(r_) if is_exact: r_ = "^" + r_ + "$" return r_ if regex.startswith(SIMPLE_REGEX_PREFIX): - return SIMPLE_REGEX_PREFIX + _normalize(regex[3:]) + return cast(TSimpleRegex, SIMPLE_REGEX_PREFIX + _normalize(regex[3:])) else: - return _normalize(regex) + return cast(TSimpleRegex, _normalize(regex)) def simple_regex_validator(path: str, pk: str, pv: Any, t: Any) -> bool: @@ -692,7 +692,7 @@ def pipeline_state_table() -> TTableSchema: # set to nullable so we can migrate existing tables table = new_table( PIPELINE_STATE_TABLE_NAME, - columns = [ + columns=[ {"name": "version", "data_type": "bigint", "nullable": False}, {"name": "engine_version", "data_type": "bigint", "nullable": False}, {"name": "pipeline_name", "data_type": "text", "nullable": False}, diff --git a/dlt/destinations/impl/athena/__init__.py b/dlt/destinations/impl/athena/__init__.py index fafa98c710..0968851918 100644 --- a/dlt/destinations/impl/athena/__init__.py +++ b/dlt/destinations/impl/athena/__init__.py @@ -11,6 +11,7 @@ def capabilities() -> DestinationCapabilitiesContext: caps.preferred_staging_file_format = "parquet" caps.supported_staging_file_formats = ["parquet", "jsonl"] caps.escape_identifier = escape_athena_identifier + caps.case_identifier = str.lower caps.decimal_precision = (DEFAULT_NUMERIC_PRECISION, DEFAULT_NUMERIC_SCALE) caps.wei_precision = (DEFAULT_NUMERIC_PRECISION, 0) caps.max_identifier_length = 255 diff --git a/dlt/destinations/impl/athena/athena.py b/dlt/destinations/impl/athena/athena.py index 9d79d8bf55..efe8ac3ca4 100644 --- a/dlt/destinations/impl/athena/athena.py +++ b/dlt/destinations/impl/athena/athena.py @@ -204,6 +204,7 @@ def escape_ddl_identifier(self, v: str) -> str: # Athena uses HIVE to create tables but for querying it uses PRESTO (so normal escaping) if not v: return v + v = self.capabilities.case_identifier(v) # bigquery uses hive escaping return escape_bigquery_identifier(v) @@ -221,11 +222,6 @@ def create_dataset(self) -> None: def drop_dataset(self) -> None: self.execute_sql(f"DROP DATABASE {self.fully_qualified_ddl_dataset_name()} CASCADE;") - def fully_qualified_dataset_name(self, escape: bool = True) -> str: - return ( - self.capabilities.escape_identifier(self.dataset_name) if escape else self.dataset_name - ) - def drop_tables(self, *tables: str) -> None: if not tables: return diff --git a/dlt/destinations/impl/bigquery/bigquery.py b/dlt/destinations/impl/bigquery/bigquery.py index d4261a1636..29a1854789 100644 --- a/dlt/destinations/impl/bigquery/bigquery.py +++ b/dlt/destinations/impl/bigquery/bigquery.py @@ -256,15 +256,15 @@ def _get_table_update_sql( c for c in new_columns if c.get("partition") or c.get(PARTITION_HINT, False) ]: if len(partition_list) > 1: - col_names = [self.capabilities.escape_identifier(c["name"]) for c in partition_list] + col_names = [self.sql_client.escape_column_name(c["name"]) for c in partition_list] raise DestinationSchemaWillNotUpdate( canonical_name, col_names, "Partition requested for more than one column" ) elif (c := partition_list[0])["data_type"] == "date": - sql[0] += f"\nPARTITION BY {self.capabilities.escape_identifier(c['name'])}" + sql[0] += f"\nPARTITION BY {self.sql_client.escape_column_name(c['name'])}" elif (c := partition_list[0])["data_type"] == "timestamp": sql[0] = ( - f"{sql[0]}\nPARTITION BY DATE({self.capabilities.escape_identifier(c['name'])})" + f"{sql[0]}\nPARTITION BY DATE({self.sql_client.escape_column_name(c['name'])})" ) # Automatic partitioning of an INT64 type requires us to be prescriptive - we treat the column as a UNIX timestamp. # This is due to the bounds requirement of GENERATE_ARRAY function for partitioning. @@ -273,12 +273,12 @@ def _get_table_update_sql( # See: https://dlthub.com/devel/dlt-ecosystem/destinations/bigquery#supported-column-hints elif (c := partition_list[0])["data_type"] == "bigint": sql[0] += ( - f"\nPARTITION BY RANGE_BUCKET({self.capabilities.escape_identifier(c['name'])}," + f"\nPARTITION BY RANGE_BUCKET({self.sql_client.escape_column_name(c['name'])}," " GENERATE_ARRAY(-172800000, 691200000, 86400))" ) if cluster_list := [ - self.capabilities.escape_identifier(c["name"]) + self.sql_client.escape_column_name(c["name"]) for c in new_columns if c.get("cluster") or c.get(CLUSTER_HINT, False) ]: @@ -327,7 +327,7 @@ def prepare_load_table( return table def _get_column_def_sql(self, column: TColumnSchema, table_format: TTableFormat = None) -> str: - name = self.capabilities.escape_identifier(column["name"]) + name = self.sql_client.escape_column_name(column["name"]) column_def_sql = ( f"{name} {self.type_mapper.to_db_type(column, table_format)} {self._gen_not_null(column.get('nullable', True))}" ) diff --git a/dlt/destinations/impl/databricks/databricks.py b/dlt/destinations/impl/databricks/databricks.py index 07e827cd28..53b684eb18 100644 --- a/dlt/destinations/impl/databricks/databricks.py +++ b/dlt/destinations/impl/databricks/databricks.py @@ -1,6 +1,7 @@ from typing import ClassVar, Dict, Optional, Sequence, Tuple, List, Any, Iterable, Type, cast from urllib.parse import urlparse, urlunparse +from dlt import config from dlt.common.destination import DestinationCapabilitiesContext from dlt.common.destination.reference import ( FollowupJob, @@ -15,26 +16,22 @@ AzureCredentials, AzureCredentialsWithoutDefaults, ) -from dlt.common.data_types import TDataType from dlt.common.storages.file_storage import FileStorage from dlt.common.schema import TColumnSchema, Schema, TTableSchemaColumns from dlt.common.schema.typing import TTableSchema, TColumnType, TSchemaTables, TTableFormat from dlt.common.schema.utils import table_schema_has_type +from dlt.common.storages import FilesystemConfiguration, fsspec_from_config from dlt.destinations.insert_job_client import InsertValuesJobClient from dlt.destinations.job_impl import EmptyLoadJob from dlt.destinations.exceptions import LoadJobTerminalException - from dlt.destinations.impl.databricks import capabilities from dlt.destinations.impl.databricks.configuration import DatabricksClientConfiguration from dlt.destinations.impl.databricks.sql_client import DatabricksSqlClient -from dlt.destinations.sql_jobs import SqlMergeJob, SqlJobParams +from dlt.destinations.sql_jobs import SqlMergeJob from dlt.destinations.job_impl import NewReferenceJob -from dlt.destinations.sql_client import SqlClientBase from dlt.destinations.type_mapping import TypeMapper -from dlt.common.storages import FilesystemConfiguration, fsspec_from_config -from dlt import config class DatabricksTypeMapper(TypeMapper): @@ -298,7 +295,7 @@ def _get_table_update_sql( sql = super()._get_table_update_sql(table_name, new_columns, generate_alter) cluster_list = [ - self.capabilities.escape_identifier(c["name"]) for c in new_columns if c.get("cluster") + self.sql_client.escape_column_name(c["name"]) for c in new_columns if c.get("cluster") ] if cluster_list: @@ -312,7 +309,7 @@ def _from_db_type( return self.type_mapper.from_db_type(bq_t, precision, scale) def _get_column_def_sql(self, c: TColumnSchema, table_format: TTableFormat = None) -> str: - name = self.capabilities.escape_identifier(c["name"]) + name = self.sql_client.escape_column_name(c["name"]) return ( f"{name} {self.type_mapper.to_db_type(c)} {self._gen_not_null(c.get('nullable', True))}" ) diff --git a/dlt/destinations/impl/duckdb/duck.py b/dlt/destinations/impl/duckdb/duck.py index 735a4ce7e3..f60f04d459 100644 --- a/dlt/destinations/impl/duckdb/duck.py +++ b/dlt/destinations/impl/duckdb/duck.py @@ -168,7 +168,7 @@ def _get_column_def_sql(self, c: TColumnSchema, table_format: TTableFormat = Non for h in self.active_hints.keys() if c.get(h, False) is True ) - column_name = self.capabilities.escape_identifier(c["name"]) + column_name = self.sql_client.escape_column_name(c["name"]) return ( f"{column_name} {self.type_mapper.to_db_type(c)} {hints_str} {self._gen_not_null(c.get('nullable', True))}" ) diff --git a/dlt/destinations/impl/duckdb/sql_client.py b/dlt/destinations/impl/duckdb/sql_client.py index 2863d4943e..3aab890773 100644 --- a/dlt/destinations/impl/duckdb/sql_client.py +++ b/dlt/destinations/impl/duckdb/sql_client.py @@ -22,7 +22,7 @@ class DuckDBDBApiCursorImpl(DBApiCursorImpl): - """Use native BigQuery data frame support if available""" + """Use native duckdb data frame support if available""" native_cursor: duckdb.DuckDBPyConnection # type: ignore vector_size: ClassVar[int] = 2048 @@ -142,11 +142,6 @@ def execute_query(self, query: AnyStr, *args: Any, **kwargs: Any) -> Iterator[DB # else: # return None - def fully_qualified_dataset_name(self, escape: bool = True) -> str: - return ( - self.capabilities.escape_identifier(self.dataset_name) if escape else self.dataset_name - ) - @classmethod def _make_database_exception(cls, ex: Exception) -> Exception: if isinstance(ex, (duckdb.CatalogException)): diff --git a/dlt/destinations/impl/motherduck/sql_client.py b/dlt/destinations/impl/motherduck/sql_client.py index 7990f90947..c6c86c33cc 100644 --- a/dlt/destinations/impl/motherduck/sql_client.py +++ b/dlt/destinations/impl/motherduck/sql_client.py @@ -30,12 +30,8 @@ def __init__(self, dataset_name: str, credentials: MotherDuckCredentials) -> Non self.database_name = credentials.database def fully_qualified_dataset_name(self, escape: bool = True) -> str: - database_name = ( - self.capabilities.escape_identifier(self.database_name) - if escape - else self.database_name - ) - dataset_name = ( - self.capabilities.escape_identifier(self.dataset_name) if escape else self.dataset_name - ) + dataset_name = super().fully_qualified_dataset_name(escape) + database_name = self.capabilities.case_identifier(self.database_name) + if escape: + database_name = self.capabilities.escape_identifier(database_name) return f"{database_name}.{dataset_name}" diff --git a/dlt/destinations/impl/mssql/mssql.py b/dlt/destinations/impl/mssql/mssql.py index b6af345e36..6165cb2efe 100644 --- a/dlt/destinations/impl/mssql/mssql.py +++ b/dlt/destinations/impl/mssql/mssql.py @@ -175,7 +175,7 @@ def _get_column_def_sql(self, c: TColumnSchema, table_format: TTableFormat = Non for h in self.active_hints.keys() if c.get(h, False) is True ) - column_name = self.capabilities.escape_identifier(c["name"]) + column_name = self.sql_client.escape_column_name(c["name"]) return f"{column_name} {db_type} {hints_str} {self._gen_not_null(c['nullable'])}" def _create_replace_followup_jobs( diff --git a/dlt/destinations/impl/mssql/sql_client.py b/dlt/destinations/impl/mssql/sql_client.py index cd1699adea..459fdb7bc8 100644 --- a/dlt/destinations/impl/mssql/sql_client.py +++ b/dlt/destinations/impl/mssql/sql_client.py @@ -95,14 +95,14 @@ def drop_dataset(self) -> None: # Drop all views rows = self.execute_sql( "SELECT table_name FROM information_schema.views WHERE table_schema = %s;", - self.dataset_name, + self.capabilities.case_identifier(self.dataset_name), ) view_names = [row[0] for row in rows] self._drop_views(*view_names) # Drop all tables rows = self.execute_sql( "SELECT table_name FROM information_schema.tables WHERE table_schema = %s;", - self.dataset_name, + self.capabilities.case_identifier(self.dataset_name), ) table_names = [row[0] for row in rows] self.drop_tables(*table_names) @@ -149,11 +149,6 @@ def execute_query(self, query: AnyStr, *args: Any, **kwargs: Any) -> Iterator[DB except pyodbc.Error as outer: raise outer - def fully_qualified_dataset_name(self, escape: bool = True) -> str: - return ( - self.capabilities.escape_identifier(self.dataset_name) if escape else self.dataset_name - ) - @classmethod def _make_database_exception(cls, ex: Exception) -> Exception: if isinstance(ex, pyodbc.ProgrammingError): diff --git a/dlt/destinations/impl/postgres/__init__.py b/dlt/destinations/impl/postgres/__init__.py index 43e6af1996..1dca5e9774 100644 --- a/dlt/destinations/impl/postgres/__init__.py +++ b/dlt/destinations/impl/postgres/__init__.py @@ -14,6 +14,7 @@ def capabilities() -> DestinationCapabilitiesContext: caps.supported_staging_file_formats = [] caps.escape_identifier = escape_postgres_identifier caps.escape_literal = escape_postgres_literal + caps.case_identifier = str.lower caps.decimal_precision = (DEFAULT_NUMERIC_PRECISION, DEFAULT_NUMERIC_SCALE) caps.wei_precision = (2 * EVM_DECIMAL_PRECISION, EVM_DECIMAL_PRECISION) caps.max_identifier_length = 63 diff --git a/dlt/destinations/impl/postgres/postgres.py b/dlt/destinations/impl/postgres/postgres.py index f8fa3e341a..d93f387e28 100644 --- a/dlt/destinations/impl/postgres/postgres.py +++ b/dlt/destinations/impl/postgres/postgres.py @@ -121,7 +121,7 @@ def _get_column_def_sql(self, c: TColumnSchema, table_format: TTableFormat = Non for h in self.active_hints.keys() if c.get(h, False) is True ) - column_name = self.capabilities.escape_identifier(c["name"]) + column_name = self.sql_client.escape_column_name(c["name"]) return ( f"{column_name} {self.type_mapper.to_db_type(c)} {hints_str} {self._gen_not_null(c.get('nullable', True))}" ) diff --git a/dlt/destinations/impl/postgres/sql_client.py b/dlt/destinations/impl/postgres/sql_client.py index 366ed243ef..a012780f08 100644 --- a/dlt/destinations/impl/postgres/sql_client.py +++ b/dlt/destinations/impl/postgres/sql_client.py @@ -112,11 +112,6 @@ def execute_fragments( composed = Composed(sql if isinstance(sql, Composable) else SQL(sql) for sql in fragments) return self.execute_sql(composed, *args, **kwargs) - def fully_qualified_dataset_name(self, escape: bool = True) -> str: - return ( - self.capabilities.escape_identifier(self.dataset_name) if escape else self.dataset_name - ) - def _reset_connection(self) -> None: # self._conn.autocommit = True self._conn.reset() diff --git a/dlt/destinations/impl/qdrant/qdrant_client.py b/dlt/destinations/impl/qdrant/qdrant_client.py index fcba49883c..89176400ba 100644 --- a/dlt/destinations/impl/qdrant/qdrant_client.py +++ b/dlt/destinations/impl/qdrant/qdrant_client.py @@ -1,4 +1,3 @@ - from types import TracebackType from typing import ClassVar, Optional, Sequence, List, Dict, Type, Iterable, Any, IO @@ -145,10 +144,16 @@ class QdrantClient(JobClientBase, WithStateSync): def __init__(self, schema: Schema, config: QdrantClientConfiguration) -> None: super().__init__(schema, config) - self.version_collection_properties = list(schema.get_table_columns(schema.version_table_name).keys()) - self.loads_collection_properties = list(schema.get_table_columns(schema.loads_table_name).keys()) + self.version_collection_properties = list( + schema.get_table_columns(schema.version_table_name).keys() + ) + self.loads_collection_properties = list( + schema.get_table_columns(schema.loads_table_name).keys() + ) # get definition of state table (may not be present in the schema) - state_table = schema.tables.get(schema.state_table_name, schema.normalize_table_identifiers(pipeline_state_table())) + state_table = schema.tables.get( + schema.state_table_name, schema.normalize_table_identifiers(pipeline_state_table()) + ) # column names are pipeline properties self.pipeline_state_properties = list(state_table["columns"].keys()) self.config: QdrantClientConfiguration = config @@ -421,7 +426,7 @@ def restore_file_load(self, file_path: str) -> LoadJob: def complete_load(self, load_id: str) -> None: values = [load_id, self.schema.name, 0, str(pendulum.now())] assert len(values) == len(self.loads_collection_properties) - properties = {k:v for k,v in zip(self.loads_collection_properties, values)} + properties = {k: v for k, v in zip(self.loads_collection_properties, values)} loads_table_name = self._make_qualified_collection_name(self.schema.loads_table_name) self._create_point_no_vector(properties, loads_table_name) @@ -438,9 +443,16 @@ def __exit__( def _update_schema_in_storage(self, schema: Schema) -> None: schema_str = json.dumps(schema.to_dict()) - values = [schema.stored_version_hash, schema.name, schema.version, schema.ENGINE_VERSION, str(pendulum.now()), schema_str] + values = [ + schema.stored_version_hash, + schema.name, + schema.version, + schema.ENGINE_VERSION, + str(pendulum.now()), + schema_str, + ] assert len(values) == len(self.version_collection_properties) - properties = {k:v for k,v in zip(self.version_collection_properties, values)} + properties = {k: v for k, v in zip(self.version_collection_properties, values)} version_table_name = self._make_qualified_collection_name(self.schema.version_table_name) self._create_point_no_vector(properties, version_table_name) diff --git a/dlt/destinations/impl/redshift/__init__.py b/dlt/destinations/impl/redshift/__init__.py index 8a8cae84b4..74c1adad4d 100644 --- a/dlt/destinations/impl/redshift/__init__.py +++ b/dlt/destinations/impl/redshift/__init__.py @@ -11,6 +11,7 @@ def capabilities() -> DestinationCapabilitiesContext: caps.supported_staging_file_formats = ["jsonl", "parquet"] caps.escape_identifier = escape_redshift_identifier caps.escape_literal = escape_redshift_literal + caps.case_identifier = str.lower caps.decimal_precision = (DEFAULT_NUMERIC_PRECISION, DEFAULT_NUMERIC_SCALE) caps.wei_precision = (DEFAULT_NUMERIC_PRECISION, 0) caps.max_identifier_length = 127 diff --git a/dlt/destinations/impl/redshift/redshift.py b/dlt/destinations/impl/redshift/redshift.py index 3426d96690..cc5e86cffa 100644 --- a/dlt/destinations/impl/redshift/redshift.py +++ b/dlt/destinations/impl/redshift/redshift.py @@ -244,7 +244,7 @@ def _get_column_def_sql(self, c: TColumnSchema, table_format: TTableFormat = Non for h in HINT_TO_REDSHIFT_ATTR.keys() if c.get(h, False) is True ) - column_name = self.capabilities.escape_identifier(c["name"]) + column_name = self.sql_client.escape_column_name(c["name"]) return ( f"{column_name} {self.type_mapper.to_db_type(c)} {hints_str} {self._gen_not_null(c.get('nullable', True))}" ) diff --git a/dlt/destinations/impl/snowflake/__init__.py b/dlt/destinations/impl/snowflake/__init__.py index dde4d5a382..2bc6cec485 100644 --- a/dlt/destinations/impl/snowflake/__init__.py +++ b/dlt/destinations/impl/snowflake/__init__.py @@ -1,4 +1,3 @@ -from dlt.common.data_writers.escape import escape_bigquery_identifier from dlt.common.destination import DestinationCapabilitiesContext from dlt.common.data_writers.escape import escape_snowflake_identifier from dlt.common.arithmetics import DEFAULT_NUMERIC_PRECISION, DEFAULT_NUMERIC_SCALE @@ -11,6 +10,7 @@ def capabilities() -> DestinationCapabilitiesContext: caps.preferred_staging_file_format = "jsonl" caps.supported_staging_file_formats = ["jsonl", "parquet"] caps.escape_identifier = escape_snowflake_identifier + caps.case_identifier = str.upper caps.decimal_precision = (DEFAULT_NUMERIC_PRECISION, DEFAULT_NUMERIC_SCALE) caps.wei_precision = (DEFAULT_NUMERIC_PRECISION, 0) caps.max_identifier_length = 255 diff --git a/dlt/destinations/impl/snowflake/snowflake.py b/dlt/destinations/impl/snowflake/snowflake.py index 6f454f070a..e86b0adbfb 100644 --- a/dlt/destinations/impl/snowflake/snowflake.py +++ b/dlt/destinations/impl/snowflake/snowflake.py @@ -241,7 +241,7 @@ def _get_table_update_sql( sql = super()._get_table_update_sql(table_name, new_columns, generate_alter) cluster_list = [ - self.capabilities.escape_identifier(c["name"]) for c in new_columns if c.get("cluster") + self.sql_client.escape_column_name(c["name"]) for c in new_columns if c.get("cluster") ] if cluster_list: @@ -255,11 +255,7 @@ def _from_db_type( return self.type_mapper.from_db_type(bq_t, precision, scale) def _get_column_def_sql(self, c: TColumnSchema, table_format: TTableFormat = None) -> str: - name = self.capabilities.escape_identifier(c["name"]) + name = self.sql_client.escape_column_name(c["name"]) return ( f"{name} {self.type_mapper.to_db_type(c)} {self._gen_not_null(c.get('nullable', True))}" ) - - def get_storage_table(self, table_name: str) -> Tuple[bool, TTableSchemaColumns]: - table_name = table_name.upper() # All snowflake tables are uppercased in information schema - return super().get_storage_table(table_name) diff --git a/dlt/destinations/impl/snowflake/sql_client.py b/dlt/destinations/impl/snowflake/sql_client.py index ba932277df..b95cf99c17 100644 --- a/dlt/destinations/impl/snowflake/sql_client.py +++ b/dlt/destinations/impl/snowflake/sql_client.py @@ -112,12 +112,6 @@ def execute_query(self, query: AnyStr, *args: Any, **kwargs: Any) -> Iterator[DB self.open_connection() raise outer - def fully_qualified_dataset_name(self, escape: bool = True) -> str: - # Always escape for uppercase - if escape: - return self.capabilities.escape_identifier(self.dataset_name) - return self.dataset_name.upper() - def _reset_connection(self) -> None: self._conn.rollback() self._conn.autocommit(True) diff --git a/dlt/destinations/impl/weaviate/weaviate_client.py b/dlt/destinations/impl/weaviate/weaviate_client.py index 53995fbf57..137d765604 100644 --- a/dlt/destinations/impl/weaviate/weaviate_client.py +++ b/dlt/destinations/impl/weaviate/weaviate_client.py @@ -235,10 +235,16 @@ class WeaviateClient(JobClientBase, WithStateSync): def __init__(self, schema: Schema, config: WeaviateClientConfiguration) -> None: super().__init__(schema, config) - self.version_collection_properties = list(schema.get_table_columns(schema.version_table_name).keys()) - self.loads_collection_properties = list(schema.get_table_columns(schema.loads_table_name).keys()) + self.version_collection_properties = list( + schema.get_table_columns(schema.version_table_name).keys() + ) + self.loads_collection_properties = list( + schema.get_table_columns(schema.loads_table_name).keys() + ) # get definition of state table (may not be present in the schema) - state_table = schema.tables.get(schema.state_table_name, schema.normalize_table_identifiers(pipeline_state_table())) + state_table = schema.tables.get( + schema.state_table_name, schema.normalize_table_identifiers(pipeline_state_table()) + ) # column names are pipeline properties self.pipeline_state_properties = list(state_table["columns"].keys()) self.config: WeaviateClientConfiguration = config @@ -668,7 +674,7 @@ def restore_file_load(self, file_path: str) -> LoadJob: def complete_load(self, load_id: str) -> None: values = [load_id, self.schema.name, 0, pendulum.now().isoformat()] assert len(values) == len(self.loads_collection_properties) - properties = {k:v for k,v in zip(self.loads_collection_properties, values)} + properties = {k: v for k, v in zip(self.loads_collection_properties, values)} self.create_object(properties, self.schema.loads_table_name) def __enter__(self) -> "WeaviateClient": @@ -684,9 +690,16 @@ def __exit__( def _update_schema_in_storage(self, schema: Schema) -> None: schema_str = json.dumps(schema.to_dict()) - values = [schema.stored_version_hash, schema.name, schema.version, schema.ENGINE_VERSION, str(pendulum.now().isoformat()), schema_str] + values = [ + schema.stored_version_hash, + schema.name, + schema.version, + schema.ENGINE_VERSION, + str(pendulum.now().isoformat()), + schema_str, + ] assert len(values) == len(self.version_collection_properties) - properties = {k:v for k,v in zip(self.version_collection_properties, values)} + properties = {k: v for k, v in zip(self.version_collection_properties, values)} self.create_object(properties, self.schema.version_table_name) def _from_db_type( diff --git a/dlt/destinations/job_client_impl.py b/dlt/destinations/job_client_impl.py index 933ed758e3..d4cd04b946 100644 --- a/dlt/destinations/job_client_impl.py +++ b/dlt/destinations/job_client_impl.py @@ -76,7 +76,7 @@ def __init__(self, file_path: str, sql_client: SqlClientBase[Any]) -> None: sql_client.execute_many(self._split_fragments(sql)) # if we detect ddl transactions, only execute transaction if supported by client elif ( - not self._string_containts_ddl_queries(sql) + not self._string_contains_ddl_queries(sql) or sql_client.capabilities.supports_ddl_transactions ): # with sql_client.begin_transaction(): @@ -93,7 +93,7 @@ def exception(self) -> str: # this part of code should be never reached raise NotImplementedError() - def _string_containts_ddl_queries(self, sql: str) -> bool: + def _string_contains_ddl_queries(self, sql: str) -> bool: for cmd in DDL_COMMANDS: if re.search(cmd, sql, re.IGNORECASE): return True @@ -131,7 +131,6 @@ def state(self) -> TLoadJobState: class SqlJobClientBase(JobClientBase, WithStateSync): - def __init__( self, schema: Schema, @@ -139,13 +138,17 @@ def __init__( sql_client: SqlClientBase[TNativeConn], ) -> None: self.version_table_schema_columns = ", ".join( - sql_client.escape_column_name(col) for col in schema.get_table_columns(schema.version_table_name) + sql_client.escape_column_name(col) + for col in schema.get_table_columns(schema.version_table_name) ) self.loads_table_schema_columns = ", ".join( - sql_client.escape_column_name(col) for col in schema.get_table_columns(schema.loads_table_name) + sql_client.escape_column_name(col) + for col in schema.get_table_columns(schema.loads_table_name) ) # get definition of state table (may not be present in the schema) - state_table = schema.tables.get(schema.state_table_name, schema.normalize_table_identifiers(pipeline_state_table())) + state_table = schema.tables.get( + schema.state_table_name, schema.normalize_table_identifiers(pipeline_state_table()) + ) self.state_table_columns = ", ".join( sql_client.escape_column_name(col) for col in state_table["columns"] ) @@ -265,8 +268,7 @@ def complete_load(self, load_id: str) -> None: name = self.sql_client.make_qualified_table_name(self.schema.loads_table_name) now_ts = pendulum.now() self.sql_client.execute_sql( - f"INSERT INTO {name}({self.loads_table_schema_columns})" - " VALUES(%s, %s, %s, %s, %s);", + f"INSERT INTO {name}({self.loads_table_schema_columns}) VALUES(%s, %s, %s, %s, %s);", load_id, self.schema.name, 0, @@ -312,7 +314,6 @@ def _null_to_bool(v: str) -> bool: query += "table_catalog = %s AND " query += "table_schema = %s AND table_name = %s ORDER BY ordinal_position;" rows = self.sql_client.execute_sql(query, *db_params) - print(rows) # if no rows we assume that table does not exist schema_table: TTableSchemaColumns = {} if len(rows) == 0: @@ -320,7 +321,7 @@ def _null_to_bool(v: str) -> bool: return False, schema_table # TODO: pull more data to infer indexes, PK and uniques attributes/constraints for c in rows: - col_name = self.schema.naming.normalize_identifier(c[0]) + col_name = self.schema.naming.normalize_path(c[0]) numeric_precision = ( c[3] if self.capabilities.schema_supports_numeric_precision else None ) @@ -345,15 +346,19 @@ def get_stored_schema(self) -> StorageSchemaInfo: # c_schema_name = self.schema.naming.normalize_identifier("schema_name") # c_inserted_at = self.schema.naming.normalize_identifier("inserted_at") query = ( - f"SELECT {self.version_table_schema_columns} FROM {name} WHERE {c_schema_name} = %s ORDER" - f" BY {c_inserted_at} DESC;" + f"SELECT {self.version_table_schema_columns} FROM {name} WHERE {c_schema_name} = %s" + f" ORDER BY {c_inserted_at} DESC;" ) return self._row_to_schema_info(query, self.schema.name) def get_stored_state(self, pipeline_name: str) -> StateInfo: state_table = self.sql_client.make_qualified_table_name(self.schema.state_table_name) loads_table = self.sql_client.make_qualified_table_name(self.schema.loads_table_name) - c_load_id, c_dlt_load_id, c_pipeline_name, c_status, c_created_at = self._norm_and_escape_columns("load_id", "_dlt_load_id", "pipeline_name", "status", "created_at") + c_load_id, c_dlt_load_id, c_pipeline_name, c_status, c_created_at = ( + self._norm_and_escape_columns( + "load_id", "_dlt_load_id", "pipeline_name", "status", "created_at" + ) + ) # c_load_id = self.schema.naming.normalize_identifier("load_id") # c_dlt_load_id = self.schema.naming.normalize_identifier("_dlt_load_id") # c_pipeline_name = self.schema.naming.normalize_identifier("pipeline_name") @@ -361,8 +366,8 @@ def get_stored_state(self, pipeline_name: str) -> StateInfo: # c_created_at = self.schema.naming.normalize_identifier("created_at") query = ( f"SELECT {self.state_table_columns} FROM {state_table} AS s JOIN {loads_table} AS l ON" - f" l.{c_load_id} = s.{c_dlt_load_id} WHERE {c_pipeline_name} = %s AND l.{c_status} = 0 ORDER BY" - f" {c_created_at} DESC" + f" l.{c_load_id} = s.{c_dlt_load_id} WHERE {c_pipeline_name} = %s AND l.{c_status} = 0" + f" ORDER BY {c_created_at} DESC" ) with self.sql_client.execute_query(query, pipeline_name) as cur: row = cur.fetchone() @@ -370,8 +375,10 @@ def get_stored_state(self, pipeline_name: str) -> StateInfo: return None return StateInfo(row[0], row[1], row[2], row[3], pendulum.instance(row[4])) - def _norm_and_escape_columns(self, *columns: str): - return map(self.sql_client.escape_column_name, map(self.schema.naming.normalize_identifier, columns)) + def _norm_and_escape_columns(self, *columns: str) -> Iterator[str]: + return map( + self.sql_client.escape_column_name, map(self.schema.naming.normalize_path, columns) + ) # def get_stored_states(self, state_table: str) -> List[StateInfo]: # """Loads list of compressed states from destination storage, optionally filtered by pipeline name""" @@ -384,8 +391,11 @@ def _norm_and_escape_columns(self, *columns: str): def get_stored_schema_by_hash(self, version_hash: str) -> StorageSchemaInfo: table_name = self.sql_client.make_qualified_table_name(self.schema.version_table_name) - c_version_hash, = self._norm_and_escape_columns("version_hash") - query = f"SELECT {self.version_table_schema_columns} FROM {table_name} WHERE {c_version_hash} = %s;" + (c_version_hash,) = self._norm_and_escape_columns("version_hash") + query = ( + f"SELECT {self.version_table_schema_columns} FROM {table_name} WHERE" + f" {c_version_hash} = %s;" + ) return self._row_to_schema_info(query, version_hash) def _execute_schema_update_sql(self, only_tables: Iterable[str]) -> TSchemaTables: @@ -470,7 +480,7 @@ def _get_table_update_sql( for hint in COLUMN_HINTS: if any(c.get(hint, False) is True for c in new_columns): hint_columns = [ - self.capabilities.escape_identifier(c["name"]) + self.sql_client.escape_column_name(c["name"]) for c in new_columns if c.get(hint, False) ] @@ -533,7 +543,7 @@ def _replace_schema_in_storage(self, schema: Schema) -> None: Save the given schema in storage and remove all previous versions with the same name """ name = self.sql_client.make_qualified_table_name(self.schema.version_table_name) - c_schema_name, = self._norm_and_escape_columns("schema_name") + (c_schema_name,) = self._norm_and_escape_columns("schema_name") self.sql_client.execute_sql(f"DELETE FROM {name} WHERE {c_schema_name} = %s;", schema.name) self._update_schema_in_storage(schema) diff --git a/dlt/destinations/sql_client.py b/dlt/destinations/sql_client.py index 695f1a0972..7171f52b24 100644 --- a/dlt/destinations/sql_client.py +++ b/dlt/destinations/sql_client.py @@ -138,16 +138,20 @@ def execute_many( ret.append(result) return ret - @abstractmethod def fully_qualified_dataset_name(self, escape: bool = True) -> str: - pass + dataset_name = self.capabilities.case_identifier(self.dataset_name) + if escape: + return self.capabilities.escape_identifier(dataset_name) + return dataset_name def make_qualified_table_name(self, table_name: str, escape: bool = True) -> str: + table_name = self.capabilities.case_identifier(table_name) if escape: table_name = self.capabilities.escape_identifier(table_name) return f"{self.fully_qualified_dataset_name(escape=escape)}.{table_name}" def escape_column_name(self, column_name: str, escape: bool = True) -> str: + column_name = self.capabilities.case_identifier(column_name) if escape: return self.capabilities.escape_identifier(column_name) return column_name @@ -221,6 +225,11 @@ def _get_columns(self) -> List[str]: return [c[0] for c in self.native_cursor.description] def df(self, chunk_size: int = None, **kwargs: Any) -> Optional[DataFrame]: + """Fetches results as data frame in full or in specified chunks. + + May use native pandas/arrow reader if available. Depending on + the native implementation chunk size may vary. + """ from dlt.common.libs.pandas import _wrap_result columns = self._get_columns() diff --git a/dlt/extract/source.py b/dlt/extract/source.py index 15a9d6477d..9138341381 100644 --- a/dlt/extract/source.py +++ b/dlt/extract/source.py @@ -265,12 +265,19 @@ def root_key(self, value: bool) -> None: if value is True: RelationalNormalizer.update_normalizer_config( - self._schema, {"propagation": {"root": {data_normalizer.c_dlt_id: TColumnName(data_normalizer.c_dlt_root_id)}}} + self._schema, + { + "propagation": { + "root": { + data_normalizer.c_dlt_id: TColumnName(data_normalizer.c_dlt_root_id) + } + } + }, ) else: if self.root_key: propagation_config = config["propagation"] - propagation_config["root"].pop(data_normalizer.c_dlt_id) # type: ignore + propagation_config["root"].pop(data_normalizer.c_dlt_id) @property def resources(self) -> DltResourceDict: diff --git a/dlt/pipeline/state_sync.py b/dlt/pipeline/state_sync.py index 75d45e7fd1..5d08cd67f9 100644 --- a/dlt/pipeline/state_sync.py +++ b/dlt/pipeline/state_sync.py @@ -22,6 +22,7 @@ # allows to upgrade state when restored with a new version of state logic/schema STATE_ENGINE_VERSION = 4 + def json_encode_state(state: TPipelineState) -> str: return json.typed_dumps(state) @@ -82,7 +83,10 @@ def state_resource(state: TPipelineState) -> DltResource: "version_hash": state["_version_hash"], } return dlt.resource( - [state_doc], name=PIPELINE_STATE_TABLE_NAME, write_disposition="append", columns=pipeline_state_table()["columns"] + [state_doc], + name=PIPELINE_STATE_TABLE_NAME, + write_disposition="append", + columns=pipeline_state_table()["columns"], ) diff --git a/tests/common/normalizers/test_json_relational.py b/tests/common/normalizers/test_json_relational.py index cee9988ce2..15d77a7f02 100644 --- a/tests/common/normalizers/test_json_relational.py +++ b/tests/common/normalizers/test_json_relational.py @@ -2,7 +2,7 @@ from dlt.common.typing import StrAny, DictStrAny from dlt.common.normalizers.naming import NamingConvention -from dlt.common.schema.typing import TSimpleRegex +from dlt.common.schema.typing import TColumnName, TSimpleRegex from dlt.common.utils import digest128, uniq_id from dlt.common.schema import Schema from dlt.common.schema.utils import new_table @@ -29,7 +29,7 @@ def test_flatten_fix_field_name(norm: RelationalNormalizer) -> None: "f 2": [], "f!3": {"f4": "a", "f-5": "b", "f*6": {"c": 7, "c v": 8, "c x": []}}, } - flattened_row, lists = norm._flatten("mock_table", row, 0) # type: ignore[arg-type] + flattened_row, lists = norm._flatten("mock_table", row, 0) assert "f_1" in flattened_row # assert "f_2" in flattened_row assert "f_3__f4" in flattened_row @@ -62,12 +62,12 @@ def test_preserve_complex_value(norm: RelationalNormalizer) -> None: ) ) row_1 = {"value": 1} - flattened_row, _ = norm._flatten("with_complex", row_1, 0) # type: ignore[arg-type] - assert flattened_row["value"] == 1 # type: ignore[typeddict-item] + flattened_row, _ = norm._flatten("with_complex", row_1, 0) + assert flattened_row["value"] == 1 row_2 = {"value": {"complex": True}} - flattened_row, _ = norm._flatten("with_complex", row_2, 0) # type: ignore[arg-type] - assert flattened_row["value"] == row_2["value"] # type: ignore[typeddict-item] + flattened_row, _ = norm._flatten("with_complex", row_2, 0) + assert flattened_row["value"] == row_2["value"] # complex value is not flattened assert "value__complex" not in flattened_row @@ -78,12 +78,12 @@ def test_preserve_complex_value_with_hint(norm: RelationalNormalizer) -> None: norm.schema._compile_settings() row_1 = {"value": 1} - flattened_row, _ = norm._flatten("any_table", row_1, 0) # type: ignore[arg-type] - assert flattened_row["value"] == 1 # type: ignore[typeddict-item] + flattened_row, _ = norm._flatten("any_table", row_1, 0) + assert flattened_row["value"] == 1 row_2 = {"value": {"complex": True}} - flattened_row, _ = norm._flatten("any_table", row_2, 0) # type: ignore[arg-type] - assert flattened_row["value"] == row_2["value"] # type: ignore[typeddict-item] + flattened_row, _ = norm._flatten("any_table", row_2, 0) + assert flattened_row["value"] == row_2["value"] # complex value is not flattened assert "value__complex" not in flattened_row @@ -93,7 +93,7 @@ def test_child_table_linking(norm: RelationalNormalizer) -> None: # request _dlt_root_id propagation add_dlt_root_id_propagation(norm) - rows = list(norm._normalize_row(row, {}, ("table",))) # type: ignore[arg-type] + rows = list(norm._normalize_row(row, {}, ("table",))) # should have 7 entries (root + level 1 + 3 * list + 2 * object) assert len(rows) == 7 # root elem will not have a root hash if not explicitly added, "extend" is added only to child @@ -141,7 +141,7 @@ def test_child_table_linking_primary_key(norm: RelationalNormalizer) -> None: norm.schema.merge_hints({"primary_key": [TSimpleRegex("id")]}) norm.schema._compile_settings() - rows = list(norm._normalize_row(row, {}, ("table",))) # type: ignore[arg-type] + rows = list(norm._normalize_row(row, {}, ("table",))) root = next(t for t in rows if t[0][0] == "table")[1] # record hash is random for primary keys, not based on their content # this is a change introduced in dlt 0.2.0a30 @@ -171,7 +171,7 @@ def test_yields_parents_first(norm: RelationalNormalizer) -> None: "f": [{"id": "level1", "l": ["a", "b", "c"], "v": 120, "o": [{"a": 1}, {"a": 2}]}], "g": [{"id": "level2_g", "l": ["a"]}], } - rows = list(norm._normalize_row(row, {}, ("table",))) # type: ignore[arg-type] + rows = list(norm._normalize_row(row, {}, ("table",))) tables = list(r[0][0] for r in rows) # child tables are always yielded before parent tables expected_tables = [ @@ -217,7 +217,7 @@ def test_yields_parent_relation(norm: RelationalNormalizer) -> None: } ], } - rows = list(norm._normalize_row(row, {}, ("table",))) # type: ignore[arg-type] + rows = list(norm._normalize_row(row, {}, ("table",))) # normalizer must return parent table first and move in order of the list elements when yielding child tables # the yielding order if fully defined expected_parents = [ @@ -275,10 +275,10 @@ def test_yields_parent_relation(norm: RelationalNormalizer) -> None: def test_list_position(norm: RelationalNormalizer) -> None: - row: StrAny = { + row: DictStrAny = { "f": [{"l": ["a", "b", "c"], "v": 120, "lo": [{"e": "a"}, {"e": "b"}, {"e": "c"}]}] } - rows = list(norm._normalize_row(row, {}, ("table",))) # type: ignore[arg-type] + rows = list(norm._normalize_row(row, {}, ("table",))) # root has no pos root = [t for t in rows if t[0][0] == "table"][0][1] assert "_dlt_list_idx" not in root @@ -289,13 +289,13 @@ def test_list_position(norm: RelationalNormalizer) -> None: # f_l must be ordered as it appears in the list for pos, elem in enumerate(["a", "b", "c"]): - row = next(t[1] for t in rows if t[0][0] == "table__f__l" and t[1]["value"] == elem) - assert row["_dlt_list_idx"] == pos + row_1 = next(t[1] for t in rows if t[0][0] == "table__f__l" and t[1]["value"] == elem) + assert row_1["_dlt_list_idx"] == pos # f_lo must be ordered - list of objects for pos, elem in enumerate(["a", "b", "c"]): - row = next(t[1] for t in rows if t[0][0] == "table__f__lo" and t[1]["e"] == elem) - assert row["_dlt_list_idx"] == pos + row_2 = next(t[1] for t in rows if t[0][0] == "table__f__lo" and t[1]["e"] == elem) + assert row_2["_dlt_list_idx"] == pos # def test_list_of_lists(norm: RelationalNormalizer) -> None: @@ -429,7 +429,7 @@ def test_child_row_deterministic_hash(norm: RelationalNormalizer) -> None: "_dlt_id": row_id, "f": [{"l": ["a", "b", "c"], "v": 120, "lo": [{"e": "a"}, {"e": "b"}, {"e": "c"}]}], } - rows = list(norm._normalize_row(row, {}, ("table",))) # type: ignore[arg-type] + rows = list(norm._normalize_row(row, {}, ("table",))) children = [t for t in rows if t[0][0] != "table"] # all hashes must be different distinct_hashes = set([ch[1]["_dlt_id"] for ch in children]) @@ -448,19 +448,19 @@ def test_child_row_deterministic_hash(norm: RelationalNormalizer) -> None: assert f_lo_p2["_dlt_id"] == digest128(f"{el_f['_dlt_id']}_table__f__lo_2", DLT_ID_LENGTH_BYTES) # same data with same table and row_id - rows_2 = list(norm._normalize_row(row, {}, ("table",))) # type: ignore[arg-type] + rows_2 = list(norm._normalize_row(row, {}, ("table",))) children_2 = [t for t in rows_2 if t[0][0] != "table"] # corresponding hashes must be identical assert all(ch[0][1]["_dlt_id"] == ch[1][1]["_dlt_id"] for ch in zip(children, children_2)) # change parent table and all child hashes must be different - rows_4 = list(norm._normalize_row(row, {}, ("other_table",))) # type: ignore[arg-type] + rows_4 = list(norm._normalize_row(row, {}, ("other_table",))) children_4 = [t for t in rows_4 if t[0][0] != "other_table"] assert all(ch[0][1]["_dlt_id"] != ch[1][1]["_dlt_id"] for ch in zip(children, children_4)) # change parent hash and all child hashes must be different row["_dlt_id"] = uniq_id() - rows_3 = list(norm._normalize_row(row, {}, ("table",))) # type: ignore[arg-type] + rows_3 = list(norm._normalize_row(row, {}, ("table",))) children_3 = [t for t in rows_3 if t[0][0] != "table"] assert all(ch[0][1]["_dlt_id"] != ch[1][1]["_dlt_id"] for ch in zip(children, children_3)) @@ -468,14 +468,16 @@ def test_child_row_deterministic_hash(norm: RelationalNormalizer) -> None: def test_keeps_dlt_id(norm: RelationalNormalizer) -> None: h = uniq_id() row = {"a": "b", "_dlt_id": h} - rows = list(norm._normalize_row(row, {}, ("table",))) # type: ignore[arg-type] + rows = list(norm._normalize_row(row, {}, ("table",))) root = [t for t in rows if t[0][0] == "table"][0][1] assert root["_dlt_id"] == h def test_propagate_hardcoded_context(norm: RelationalNormalizer) -> None: row = {"level": 1, "list": ["a", "b", "c"], "comp": [{"_timestamp": "a"}]} - rows = list(norm._normalize_row(row, {"_timestamp": 1238.9, "_dist_key": "SENDER_3000"}, ("table",))) # type: ignore[arg-type] + rows = list( + norm._normalize_row(row, {"_timestamp": 1238.9, "_dist_key": "SENDER_3000"}, ("table",)) + ) # context is not added to root element root = next(t for t in rows if t[0][0] == "table")[1] assert "_timestamp" in root @@ -505,7 +507,7 @@ def test_propagates_root_context(norm: RelationalNormalizer) -> None: "dependent_list": [1, 2, 3], "dependent_objects": [{"vx": "ax"}], } - normalized_rows = list(norm._normalize_row(row, {}, ("table",))) # type: ignore[arg-type] + normalized_rows = list(norm._normalize_row(row, {}, ("table",))) # all non-root rows must have: non_root = [r for r in normalized_rows if r[0][1] is not None] assert all(r[1]["_dlt_root_id"] == "###" for r in non_root) @@ -521,12 +523,12 @@ def test_propagates_table_context( prop_config: RelationalNormalizerConfigPropagation = norm.schema._normalizers_config["json"][ "config" ]["propagation"] - prop_config["root"]["timestamp"] = "_partition_ts" # type: ignore[index] + prop_config["root"][TColumnName("timestamp")] = TColumnName("_partition_ts") # for table "table__lvl1" request to propagate "vx" and "partition_ovr" as "_partition_ts" (should overwrite root) - prop_config["tables"]["table__lvl1"] = { # type: ignore[index] - "vx": "__vx", - "partition_ovr": "_partition_ts", - "__not_found": "__not_found", + prop_config["tables"]["table__lvl1"] = { + TColumnName("vx"): TColumnName("__vx"), + TColumnName("partition_ovr"): TColumnName("_partition_ts"), + TColumnName("__not_found"): TColumnName("__not_found"), } if add_pk: @@ -544,7 +546,7 @@ def test_propagates_table_context( # to reproduce a bug where rows with _dlt_id set were not extended row["lvl1"][0]["_dlt_id"] = "row_id_lvl1" # type: ignore[index] - normalized_rows = list(norm._normalize_row(row, {}, ("table",))) # type: ignore[arg-type] + normalized_rows = list(norm._normalize_row(row, {}, ("table",))) non_root = [r for r in normalized_rows if r[0][1] is not None] # _dlt_root_id in all non root assert all(r[1]["_dlt_root_id"] == "###" for r in non_root) @@ -573,10 +575,10 @@ def test_propagates_table_context_to_lists(norm: RelationalNormalizer) -> None: prop_config: RelationalNormalizerConfigPropagation = norm.schema._normalizers_config["json"][ "config" ]["propagation"] - prop_config["root"]["timestamp"] = "_partition_ts" # type: ignore[index] + prop_config["root"][TColumnName("timestamp")] = TColumnName("_partition_ts") row = {"_dlt_id": "###", "timestamp": 12918291.1212, "lvl1": [1, 2, 3, [4, 5, 6]]} - normalized_rows = list(norm._normalize_row(row, {}, ("table",))) # type: ignore[arg-type] + normalized_rows = list(norm._normalize_row(row, {}, ("table",))) # _partition_ts == timestamp on all child tables non_root = [r for r in normalized_rows if r[0][1] is not None] assert all(r[1]["_partition_ts"] == 12918291.1212 for r in non_root) @@ -589,7 +591,7 @@ def test_removes_normalized_list(norm: RelationalNormalizer) -> None: # after normalizing the list that got normalized into child table must be deleted row = {"comp": [{"_timestamp": "a"}]} # get iterator - normalized_rows_i = norm._normalize_row(row, {}, ("table",)) # type: ignore[arg-type] + normalized_rows_i = norm._normalize_row(row, {}, ("table",)) # yield just one item root_row = next(normalized_rows_i) # root_row = next(r for r in normalized_rows if r[0][1] is None) @@ -613,7 +615,7 @@ def test_preserves_complex_types_list(norm: RelationalNormalizer) -> None: ) ) row = {"value": ["from", {"complex": True}]} - normalized_rows = list(norm._normalize_row(row, {}, ("event_slot",))) # type: ignore[arg-type] + normalized_rows = list(norm._normalize_row(row, {}, ("event_slot",))) # make sure only 1 row is emitted, the list is not normalized assert len(normalized_rows) == 1 # value is kept in root row -> market as complex @@ -622,7 +624,7 @@ def test_preserves_complex_types_list(norm: RelationalNormalizer) -> None: # same should work for a list row = {"value": ["from", ["complex", True]]} # type: ignore[list-item] - normalized_rows = list(norm._normalize_row(row, {}, ("event_slot",))) # type: ignore[arg-type] + normalized_rows = list(norm._normalize_row(row, {}, ("event_slot",))) # make sure only 1 row is emitted, the list is not normalized assert len(normalized_rows) == 1 # value is kept in root row -> market as complex diff --git a/tests/pipeline/test_dlt_versions.py b/tests/pipeline/test_dlt_versions.py index 1fecc0eeaa..a86de8ed76 100644 --- a/tests/pipeline/test_dlt_versions.py +++ b/tests/pipeline/test_dlt_versions.py @@ -66,7 +66,8 @@ def test_pipeline_with_dlt_update(test_storage: FileStorage) -> None: ) # check the dlt state table assert { - "version_hash" not in github_schema["tables"][PIPELINE_STATE_TABLE_NAME]["columns"] + "version_hash" + not in github_schema["tables"][PIPELINE_STATE_TABLE_NAME]["columns"] } # check loads table without attaching to pipeline duckdb_cfg = resolve_configuration( @@ -131,7 +132,9 @@ def test_pipeline_with_dlt_update(test_storage: FileStorage) -> None: # two schema versions rows = client.execute_sql(f"SELECT * FROM {VERSION_TABLE_NAME}") assert len(rows) == 2 - rows = client.execute_sql(f"SELECT * FROM {PIPELINE_STATE_TABLE_NAME} ORDER BY version") + rows = client.execute_sql( + f"SELECT * FROM {PIPELINE_STATE_TABLE_NAME} ORDER BY version" + ) # we have hash columns assert len(rows[0]) == 6 + 2 assert len(rows) == 2 From 5984824d3f5381a9cde255322ee01c20acae070d Mon Sep 17 00:00:00 2001 From: Marcin Rudolf Date: Mon, 18 Mar 2024 14:23:02 +0100 Subject: [PATCH 025/105] renames escape functions --- dlt/common/configuration/specs/base_configuration.py | 8 +++++--- dlt/common/data_writers/__init__.py | 4 ++-- dlt/common/data_writers/escape.py | 4 ++-- dlt/destinations/impl/athena/athena.py | 6 +++--- tests/common/data_writers/test_data_writers.py | 4 ++-- 5 files changed, 14 insertions(+), 12 deletions(-) diff --git a/dlt/common/configuration/specs/base_configuration.py b/dlt/common/configuration/specs/base_configuration.py index 84f59fa894..a00012c72f 100644 --- a/dlt/common/configuration/specs/base_configuration.py +++ b/dlt/common/configuration/specs/base_configuration.py @@ -197,11 +197,13 @@ def default_factory(att_value=att_value): # type: ignore[no-untyped-def] @configspec class BaseConfiguration(MutableMapping[str, Any]): - __is_resolved__: bool = dataclasses.field(default=False, init=False, repr=False) + __is_resolved__: bool = dataclasses.field(default=False, init=False, repr=False, compare=False) """True when all config fields were resolved and have a specified value type""" - __section__: str = dataclasses.field(default=None, init=False, repr=False) + __section__: str = dataclasses.field(default=None, init=False, repr=False, compare=False) """Obligatory section used by config providers when searching for keys, always present in the search path""" - __exception__: Exception = dataclasses.field(default=None, init=False, repr=False) + __exception__: Exception = dataclasses.field( + default=None, init=False, repr=False, compare=False + ) """Holds the exception that prevented the full resolution""" __config_gen_annotations__: ClassVar[List[str]] = [] """Additional annotations for config generator, currently holds a list of fields of interest that have defaults""" diff --git a/dlt/common/data_writers/__init__.py b/dlt/common/data_writers/__init__.py index 04c5d04328..ae5efe693c 100644 --- a/dlt/common/data_writers/__init__.py +++ b/dlt/common/data_writers/__init__.py @@ -3,7 +3,7 @@ from dlt.common.data_writers.escape import ( escape_redshift_literal, escape_redshift_identifier, - escape_bigquery_identifier, + escape_hive_identifier, ) __all__ = [ @@ -14,5 +14,5 @@ "new_file_id", "escape_redshift_literal", "escape_redshift_identifier", - "escape_bigquery_identifier", + "escape_hive_identifier", ] diff --git a/dlt/common/data_writers/escape.py b/dlt/common/data_writers/escape.py index 38ac304f59..36fab7a1f4 100644 --- a/dlt/common/data_writers/escape.py +++ b/dlt/common/data_writers/escape.py @@ -121,7 +121,7 @@ def escape_redshift_identifier(v: str) -> str: escape_athena_identifier = escape_postgres_identifier -def escape_bigquery_identifier(v: str) -> str: +def escape_hive_identifier(v: str) -> str: # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical return "`" + v.replace("\\", "\\\\").replace("`", "\\`") + "`" @@ -132,7 +132,7 @@ def escape_snowflake_identifier(v: str) -> str: return escape_postgres_identifier(v) -escape_databricks_identifier = escape_bigquery_identifier +escape_databricks_identifier = escape_hive_identifier DATABRICKS_ESCAPE_DICT = {"'": "\\'", "\\": "\\\\", "\n": "\\n", "\r": "\\r"} diff --git a/dlt/destinations/impl/athena/athena.py b/dlt/destinations/impl/athena/athena.py index efe8ac3ca4..0e7e950455 100644 --- a/dlt/destinations/impl/athena/athena.py +++ b/dlt/destinations/impl/athena/athena.py @@ -40,7 +40,7 @@ from dlt.common.destination.reference import LoadJob, FollowupJob from dlt.common.destination.reference import TLoadJobState, NewLoadJob, SupportsStagingDestination from dlt.common.storages import FileStorage -from dlt.common.data_writers.escape import escape_bigquery_identifier +from dlt.common.data_writers.escape import escape_hive_identifier from dlt.destinations.sql_jobs import SqlStagingCopyJob from dlt.destinations.typing import DBApi, DBTransaction @@ -204,9 +204,9 @@ def escape_ddl_identifier(self, v: str) -> str: # Athena uses HIVE to create tables but for querying it uses PRESTO (so normal escaping) if not v: return v - v = self.capabilities.case_identifier(v) + v = self.capabilities.casefold_identifier(v) # bigquery uses hive escaping - return escape_bigquery_identifier(v) + return escape_hive_identifier(v) def fully_qualified_ddl_dataset_name(self) -> str: return self.escape_ddl_identifier(self.dataset_name) diff --git a/tests/common/data_writers/test_data_writers.py b/tests/common/data_writers/test_data_writers.py index ac4f118229..4bde9dbfa9 100644 --- a/tests/common/data_writers/test_data_writers.py +++ b/tests/common/data_writers/test_data_writers.py @@ -10,7 +10,7 @@ from dlt.destinations.impl.redshift import capabilities as redshift_caps from dlt.common.data_writers.escape import ( escape_redshift_identifier, - escape_bigquery_identifier, + escape_hive_identifier, escape_redshift_literal, escape_postgres_literal, escape_duckdb_literal, @@ -146,7 +146,7 @@ def test_identifier_escape() -> None: def test_identifier_escape_bigquery() -> None: assert ( - escape_bigquery_identifier(", NULL'); DROP TABLE\"` -\\-") + escape_hive_identifier(", NULL'); DROP TABLE\"` -\\-") == "`, NULL'); DROP TABLE\"\\` -\\\\-`" ) From 345844132c0cc0f0101b94afda49e679e6857d40 Mon Sep 17 00:00:00 2001 From: Marcin Rudolf Date: Mon, 18 Mar 2024 14:26:13 +0100 Subject: [PATCH 026/105] destination capabilities for case fold and case sensitivity --- dlt/common/destination/capabilities.py | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/dlt/common/destination/capabilities.py b/dlt/common/destination/capabilities.py index b3bc8e5109..7e12f39bb0 100644 --- a/dlt/common/destination/capabilities.py +++ b/dlt/common/destination/capabilities.py @@ -1,12 +1,10 @@ -from typing import Any, Callable, ClassVar, List, Literal, Optional, Tuple, Set, get_args +from typing import Any, Callable, ClassVar, List, Literal, Optional, Tuple, Set, Union, get_args from dlt.common.configuration.utils import serialize_value from dlt.common.configuration import configspec from dlt.common.configuration.specs import ContainerInjectableContext -from dlt.common.utils import identity - from dlt.common.arithmetics import DEFAULT_NUMERIC_PRECISION, DEFAULT_NUMERIC_SCALE - +from dlt.common.normalizers.naming import NamingConvention from dlt.common.wei import EVM_DECIMAL_PRECISION # known loader file formats @@ -35,9 +33,13 @@ class DestinationCapabilitiesContext(ContainerInjectableContext): preferred_staging_file_format: Optional[TLoaderFileFormat] supported_staging_file_formats: List[TLoaderFileFormat] escape_identifier: Callable[[str], str] + "Escapes table name, column name and other identifiers" escape_literal: Callable[[Any], Any] - case_identifier: Callable[[str], str] = identity - """Controls identifier casing on top of naming convention. Used to generate case insensitive casing.""" + "Escapes string literal" + casefold_identifier: Callable[[str], str] = str + """Casing function applied by destination to represent case insensitive identifiers.""" + has_case_sensitive_identifiers: bool + """Tells if identifiers in destination are case sensitive, before case_identifier function is applied""" decimal_precision: Tuple[int, int] wei_precision: Tuple[int, int] max_identifier_length: int @@ -48,7 +50,7 @@ class DestinationCapabilitiesContext(ContainerInjectableContext): is_max_text_data_type_length_in_bytes: bool supports_transactions: bool supports_ddl_transactions: bool - naming_convention: str = "snake_case" + naming_convention: Union[str, NamingConvention] = "snake_case" alter_add_multi_column: bool = True supports_truncate_command: bool = True schema_supports_numeric_precision: bool = True @@ -65,15 +67,18 @@ class DestinationCapabilitiesContext(ContainerInjectableContext): @staticmethod def generic_capabilities( preferred_loader_file_format: TLoaderFileFormat = None, + naming_convention: Union[str, NamingConvention] = None, ) -> "DestinationCapabilitiesContext": caps = DestinationCapabilitiesContext() caps.preferred_loader_file_format = preferred_loader_file_format caps.supported_loader_file_formats = ["jsonl", "insert_values", "parquet"] caps.preferred_staging_file_format = None caps.supported_staging_file_formats = [] - caps.escape_identifier = identity + caps.naming_convention = naming_convention or caps.naming_convention + caps.escape_identifier = str caps.escape_literal = serialize_value - caps.case_identifier = identity + caps.casefold_identifier = str + caps.has_case_sensitive_identifiers = True caps.decimal_precision = (DEFAULT_NUMERIC_PRECISION, DEFAULT_NUMERIC_SCALE) caps.wei_precision = (EVM_DECIMAL_PRECISION, 0) caps.max_identifier_length = 65536 From 55362b0f28a3288c82ff41849330d6b8ee62d056 Mon Sep 17 00:00:00 2001 From: Marcin Rudolf Date: Mon, 18 Mar 2024 14:28:08 +0100 Subject: [PATCH 027/105] drops supports naming module and allows naming to be instance in config and schema --- dlt/common/normalizers/naming/__init__.py | 4 ++-- dlt/common/normalizers/naming/exceptions.py | 11 ++++++----- dlt/common/normalizers/naming/naming.py | 7 ------- tests/common/normalizers/snake_no_x.py | 10 ++++++++++ tests/common/normalizers/test_import_normalizers.py | 8 ++++---- 5 files changed, 22 insertions(+), 18 deletions(-) create mode 100644 tests/common/normalizers/snake_no_x.py diff --git a/dlt/common/normalizers/naming/__init__.py b/dlt/common/normalizers/naming/__init__.py index 967fb9643e..2b3ecd74d0 100644 --- a/dlt/common/normalizers/naming/__init__.py +++ b/dlt/common/normalizers/naming/__init__.py @@ -1,3 +1,3 @@ -from .naming import SupportsNamingConvention, NamingConvention +from .naming import NamingConvention -__all__ = ["SupportsNamingConvention", "NamingConvention"] +__all__ = ["NamingConvention"] diff --git a/dlt/common/normalizers/naming/exceptions.py b/dlt/common/normalizers/naming/exceptions.py index 572fc7e0d0..d8448fa1e0 100644 --- a/dlt/common/normalizers/naming/exceptions.py +++ b/dlt/common/normalizers/naming/exceptions.py @@ -5,21 +5,22 @@ class NormalizersException(DltException): pass -class UnknownNamingModule(NormalizersException): +class UnknownNamingModule(ImportError, NormalizersException): def __init__(self, naming_module: str) -> None: self.naming_module = naming_module if "." in naming_module: msg = f"Naming module {naming_module} could not be found and imported" else: - msg = f"Naming module {naming_module} is not one of the standard dlt naming convention" + msg = f"Naming module {naming_module} is not one of the standard dlt naming conventions" super().__init__(msg) class InvalidNamingModule(NormalizersException): - def __init__(self, naming_module: str) -> None: + def __init__(self, naming_module: str, naming_class: str) -> None: self.naming_module = naming_module + self.naming_class = naming_class msg = ( - f"Naming module {naming_module} does not implement required SupportsNamingConvention" - " protocol" + f"In naming module '{naming_module}' the class '{naming_class}' is not a" + " NamingConvention" ) super().__init__(msg) diff --git a/dlt/common/normalizers/naming/naming.py b/dlt/common/normalizers/naming/naming.py index fccb147981..59728f2a14 100644 --- a/dlt/common/normalizers/naming/naming.py +++ b/dlt/common/normalizers/naming/naming.py @@ -100,10 +100,3 @@ def _trim_and_tag(identifier: str, tag: str, max_length: int) -> str: ) assert len(identifier) == max_length return identifier - - -class SupportsNamingConvention(Protocol): - """Expected of modules defining naming convention""" - - NamingConvention: Type[NamingConvention] - """A class with a name NamingConvention deriving from normalizers.naming.NamingConvention""" diff --git a/tests/common/normalizers/snake_no_x.py b/tests/common/normalizers/snake_no_x.py new file mode 100644 index 0000000000..af3a53cbce --- /dev/null +++ b/tests/common/normalizers/snake_no_x.py @@ -0,0 +1,10 @@ +from dlt.common.normalizers.naming.snake_case import NamingConvention as SnakeCaseNamingConvention + + +class NamingConvention(SnakeCaseNamingConvention): + def normalize_identifier(self, identifier: str) -> str: + identifier = super().normalize_identifier(identifier) + if identifier.endswith("x"): + print(identifier[:-1] + "_") + return identifier[:-1] + "_" + return identifier diff --git a/tests/common/normalizers/test_import_normalizers.py b/tests/common/normalizers/test_import_normalizers.py index df6b973943..065aa4dd3e 100644 --- a/tests/common/normalizers/test_import_normalizers.py +++ b/tests/common/normalizers/test_import_normalizers.py @@ -4,10 +4,9 @@ from dlt.common.configuration.container import Container from dlt.common.destination import DestinationCapabilitiesContext -from dlt.common.normalizers import explicit_normalizers, import_normalizers +from dlt.common.normalizers.utils import explicit_normalizers, import_normalizers from dlt.common.normalizers.json.relational import DataItemNormalizer as RelationalNormalizer -from dlt.common.normalizers.naming import snake_case -from dlt.common.normalizers.naming import direct +from dlt.common.normalizers.naming import snake_case, direct from dlt.common.normalizers.naming.exceptions import InvalidNamingModule, UnknownNamingModule from tests.common.normalizers.custom_normalizers import ( @@ -84,4 +83,5 @@ def test_import_invalid_naming_module() -> None: assert py_ex.value.naming_module == "dlt.common.tests" with pytest.raises(InvalidNamingModule) as py_ex2: import_normalizers(explicit_normalizers("dlt.pipeline")) - assert py_ex2.value.naming_module == "dlt.pipeline" + assert py_ex2.value.naming_module == "dlt" + assert py_ex2.value.naming_class == "pipeline" From b836dfe9991f56d0886e226b796488c4e872f6f9 Mon Sep 17 00:00:00 2001 From: Marcin Rudolf Date: Mon, 18 Mar 2024 14:29:46 +0100 Subject: [PATCH 028/105] checks all tables in information schema in one go, observes case folding and sensitivity in sql destinations --- dlt/destinations/impl/athena/__init__.py | 3 +- dlt/destinations/impl/bigquery/__init__.py | 8 +- dlt/destinations/impl/bigquery/bigquery.py | 43 ++--- dlt/destinations/impl/bigquery/sql_client.py | 4 +- dlt/destinations/impl/databricks/__init__.py | 3 +- .../impl/databricks/databricks.py | 2 +- .../impl/databricks/sql_client.py | 4 +- dlt/destinations/impl/duckdb/__init__.py | 1 + dlt/destinations/impl/dummy/__init__.py | 1 + dlt/destinations/impl/motherduck/__init__.py | 1 + .../impl/motherduck/sql_client.py | 2 +- dlt/destinations/impl/mssql/sql_client.py | 4 +- dlt/destinations/impl/postgres/__init__.py | 3 +- dlt/destinations/impl/qdrant/__init__.py | 2 +- dlt/destinations/impl/qdrant/qdrant_client.py | 9 +- dlt/destinations/impl/redshift/__init__.py | 3 +- dlt/destinations/impl/snowflake/__init__.py | 3 +- dlt/destinations/impl/synapse/__init__.py | 1 + dlt/destinations/impl/weaviate/__init__.py | 2 +- .../impl/weaviate/weaviate_client.py | 15 +- dlt/destinations/job_client_impl.py | 161 ++++++++++++------ dlt/destinations/sql_client.py | 6 +- dlt/destinations/utils.py | 98 ++++++++++- tests/load/pipeline/test_drop.py | 18 +- tests/load/pipeline/test_merge_disposition.py | 2 +- tests/load/pipeline/test_pipelines.py | 18 +- tests/load/pipeline/test_restore_state.py | 5 +- tests/load/test_dummy_client.py | 6 +- tests/load/test_job_client.py | 56 +++--- tests/load/utils.py | 1 + tests/load/weaviate/test_weaviate_client.py | 18 +- tests/pipeline/test_pipeline.py | 32 +++- 32 files changed, 362 insertions(+), 173 deletions(-) diff --git a/dlt/destinations/impl/athena/__init__.py b/dlt/destinations/impl/athena/__init__.py index 0968851918..178147cb9e 100644 --- a/dlt/destinations/impl/athena/__init__.py +++ b/dlt/destinations/impl/athena/__init__.py @@ -11,7 +11,8 @@ def capabilities() -> DestinationCapabilitiesContext: caps.preferred_staging_file_format = "parquet" caps.supported_staging_file_formats = ["parquet", "jsonl"] caps.escape_identifier = escape_athena_identifier - caps.case_identifier = str.lower + caps.casefold_identifier = str.lower + caps.has_case_sensitive_identifiers = False caps.decimal_precision = (DEFAULT_NUMERIC_PRECISION, DEFAULT_NUMERIC_SCALE) caps.wei_precision = (DEFAULT_NUMERIC_PRECISION, 0) caps.max_identifier_length = 255 diff --git a/dlt/destinations/impl/bigquery/__init__.py b/dlt/destinations/impl/bigquery/__init__.py index 6d1491817a..2a6d7b9adb 100644 --- a/dlt/destinations/impl/bigquery/__init__.py +++ b/dlt/destinations/impl/bigquery/__init__.py @@ -1,4 +1,4 @@ -from dlt.common.data_writers.escape import escape_bigquery_identifier +from dlt.common.data_writers.escape import escape_hive_identifier from dlt.common.destination import DestinationCapabilitiesContext from dlt.common.arithmetics import DEFAULT_NUMERIC_PRECISION, DEFAULT_NUMERIC_SCALE @@ -9,8 +9,11 @@ def capabilities() -> DestinationCapabilitiesContext: caps.supported_loader_file_formats = ["jsonl", "parquet"] caps.preferred_staging_file_format = "parquet" caps.supported_staging_file_formats = ["parquet", "jsonl"] - caps.escape_identifier = escape_bigquery_identifier + caps.escape_identifier = escape_hive_identifier caps.escape_literal = None + caps.has_case_sensitive_identifiers = ( + True # there are case insensitive identifiers but dlt does not use them + ) caps.decimal_precision = (DEFAULT_NUMERIC_PRECISION, DEFAULT_NUMERIC_SCALE) caps.wei_precision = (76, 38) caps.max_identifier_length = 1024 @@ -21,5 +24,6 @@ def capabilities() -> DestinationCapabilitiesContext: caps.is_max_text_data_type_length_in_bytes = True caps.supports_ddl_transactions = False caps.supports_clone_table = True + caps.schema_supports_numeric_precision = False # no precision information in BigQuery return caps diff --git a/dlt/destinations/impl/bigquery/bigquery.py b/dlt/destinations/impl/bigquery/bigquery.py index 29a1854789..5c9f292267 100644 --- a/dlt/destinations/impl/bigquery/bigquery.py +++ b/dlt/destinations/impl/bigquery/bigquery.py @@ -43,6 +43,7 @@ from dlt.destinations.job_impl import NewReferenceJob from dlt.destinations.sql_jobs import SqlMergeJob from dlt.destinations.type_mapping import TypeMapper +from dlt.destinations.utils import parse_db_data_type_str_with_precision class BigQueryTypeMapper(TypeMapper): @@ -50,10 +51,10 @@ class BigQueryTypeMapper(TypeMapper): "complex": "JSON", "text": "STRING", "double": "FLOAT64", - "bool": "BOOLEAN", + "bool": "BOOL", "date": "DATE", "timestamp": "TIMESTAMP", - "bigint": "INTEGER", + "bigint": "INT64", "binary": "BYTES", "wei": "BIGNUMERIC", # non-parametrized should hold wei values "time": "TIME", @@ -66,11 +67,11 @@ class BigQueryTypeMapper(TypeMapper): dbt_to_sct = { "STRING": "text", - "FLOAT": "double", - "BOOLEAN": "bool", + "FLOAT64": "double", + "BOOL": "bool", "DATE": "date", "TIMESTAMP": "timestamp", - "INTEGER": "bigint", + "INT64": "bigint", "BYTES": "binary", "NUMERIC": "decimal", "BIGNUMERIC": "decimal", @@ -89,9 +90,10 @@ def to_db_decimal_type(self, precision: Optional[int], scale: Optional[int]) -> def from_db_type( self, db_type: str, precision: Optional[int], scale: Optional[int] ) -> TColumnType: - if db_type == "BIGNUMERIC" and precision is None: + # precision is present in the type name + if db_type == "BIGNUMERIC": return dict(data_type="wei") - return super().from_db_type(db_type, precision, scale) + return super().from_db_type(*parse_db_data_type_str_with_precision(db_type)) class BigQueryLoadJob(LoadJob, FollowupJob): @@ -231,7 +233,7 @@ def start_file_load(self, table: TTableSchema, file_path: str, load_id: str) -> reason = BigQuerySqlClient._get_reason_from_errors(gace) if reason == "notFound": # google.api_core.exceptions.NotFound: 404 – table not found - raise UnknownTableException(table["name"]) from gace + raise UnknownTableException(self.schema.name, table["name"]) from gace elif ( reason == "duplicate" ): # google.api_core.exceptions.Conflict: 409 PUT – already exists @@ -337,31 +339,6 @@ def _get_column_def_sql(self, column: TColumnSchema, table_format: TTableFormat column_def_sql += " OPTIONS (rounding_mode='ROUND_HALF_AWAY_FROM_ZERO')" return column_def_sql - def get_storage_table(self, table_name: str) -> Tuple[bool, TTableSchemaColumns]: - schema_table: TTableSchemaColumns = {} - try: - table = self.sql_client.native_connection.get_table( - self.sql_client.make_qualified_table_name(table_name, escape=False), - retry=self.sql_client._default_retry, - timeout=self.config.http_timeout, - ) - partition_field = table.time_partitioning.field if table.time_partitioning else None - for c in table.schema: - schema_c: TColumnSchema = { - "name": c.name, - "nullable": c.is_nullable, - "unique": False, - "sort": False, - "primary_key": False, - "foreign_key": False, - "cluster": c.name in (table.clustering_fields or []), - "partition": c.name == partition_field, - **self._from_db_type(c.field_type, c.precision, c.scale), - } - schema_table[c.name] = schema_c - return True, schema_table - except gcp_exceptions.NotFound: - return False, schema_table def _create_load_job(self, table: TTableSchema, file_path: str) -> bigquery.LoadJob: # append to table for merge loads (append to stage) and regular appends. diff --git a/dlt/destinations/impl/bigquery/sql_client.py b/dlt/destinations/impl/bigquery/sql_client.py index 5fdbc12029..86e836aa44 100644 --- a/dlt/destinations/impl/bigquery/sql_client.py +++ b/dlt/destinations/impl/bigquery/sql_client.py @@ -234,8 +234,8 @@ def execute_query(self, query: AnyStr, *args: Any, **kwargs: Any) -> Iterator[DB conn.close() def fully_qualified_dataset_name(self, escape: bool = True) -> str: - project_id = self.capabilities.case_identifier(self.credentials.project_id) - dataset_name = self.capabilities.case_identifier(self.dataset_name) + project_id = self.capabilities.casefold_identifier(self.credentials.project_id) + dataset_name = self.capabilities.casefold_identifier(self.dataset_name) if escape: project_id = self.capabilities.escape_identifier(project_id) dataset_name = self.capabilities.escape_identifier(dataset_name) diff --git a/dlt/destinations/impl/databricks/__init__.py b/dlt/destinations/impl/databricks/__init__.py index 81884fae4b..0784335196 100644 --- a/dlt/destinations/impl/databricks/__init__.py +++ b/dlt/destinations/impl/databricks/__init__.py @@ -2,8 +2,6 @@ from dlt.common.data_writers.escape import escape_databricks_identifier, escape_databricks_literal from dlt.common.arithmetics import DEFAULT_NUMERIC_PRECISION, DEFAULT_NUMERIC_SCALE -from dlt.destinations.impl.databricks.configuration import DatabricksClientConfiguration - def capabilities() -> DestinationCapabilitiesContext: caps = DestinationCapabilitiesContext() @@ -13,6 +11,7 @@ def capabilities() -> DestinationCapabilitiesContext: caps.supported_staging_file_formats = ["jsonl", "parquet"] caps.escape_identifier = escape_databricks_identifier caps.escape_literal = escape_databricks_literal + caps.has_case_sensitive_identifiers = False caps.decimal_precision = (DEFAULT_NUMERIC_PRECISION, DEFAULT_NUMERIC_SCALE) caps.wei_precision = (DEFAULT_NUMERIC_PRECISION, 0) caps.max_identifier_length = 255 diff --git a/dlt/destinations/impl/databricks/databricks.py b/dlt/destinations/impl/databricks/databricks.py index 53b684eb18..425b181f9b 100644 --- a/dlt/destinations/impl/databricks/databricks.py +++ b/dlt/destinations/impl/databricks/databricks.py @@ -316,7 +316,7 @@ def _get_column_def_sql(self, c: TColumnSchema, table_format: TTableFormat = Non def _get_storage_table_query_columns(self) -> List[str]: fields = super()._get_storage_table_query_columns() - fields[1] = ( # Override because this is the only way to get data type with precision + fields[2] = ( # Override because this is the only way to get data type with precision "full_data_type" ) return fields diff --git a/dlt/destinations/impl/databricks/sql_client.py b/dlt/destinations/impl/databricks/sql_client.py index 30e8e7c867..55acd10107 100644 --- a/dlt/destinations/impl/databricks/sql_client.py +++ b/dlt/destinations/impl/databricks/sql_client.py @@ -134,8 +134,8 @@ def execute_query(self, query: AnyStr, *args: Any, **kwargs: Any) -> Iterator[DB yield DatabricksCursorImpl(curr) # type: ignore[abstract] def fully_qualified_dataset_name(self, escape: bool = True) -> str: - catalog = self.capabilities.case_identifier(self.credentials.catalog) - dataset_name = self.capabilities.case_identifier(self.dataset_name) + catalog = self.capabilities.casefold_identifier(self.credentials.catalog) + dataset_name = self.capabilities.casefold_identifier(self.dataset_name) if escape: catalog = self.capabilities.escape_identifier(catalog) dataset_name = self.capabilities.escape_identifier(dataset_name) diff --git a/dlt/destinations/impl/duckdb/__init__.py b/dlt/destinations/impl/duckdb/__init__.py index 5cbc8dea53..d127523707 100644 --- a/dlt/destinations/impl/duckdb/__init__.py +++ b/dlt/destinations/impl/duckdb/__init__.py @@ -11,6 +11,7 @@ def capabilities() -> DestinationCapabilitiesContext: caps.supported_staging_file_formats = [] caps.escape_identifier = escape_postgres_identifier caps.escape_literal = escape_duckdb_literal + caps.has_case_sensitive_identifiers = False caps.decimal_precision = (DEFAULT_NUMERIC_PRECISION, DEFAULT_NUMERIC_SCALE) caps.wei_precision = (DEFAULT_NUMERIC_PRECISION, 0) caps.max_identifier_length = 65536 diff --git a/dlt/destinations/impl/dummy/__init__.py b/dlt/destinations/impl/dummy/__init__.py index 37b2e77c8a..ca374b827d 100644 --- a/dlt/destinations/impl/dummy/__init__.py +++ b/dlt/destinations/impl/dummy/__init__.py @@ -28,6 +28,7 @@ def capabilities() -> DestinationCapabilitiesContext: caps.supported_loader_file_formats = additional_formats + [config.loader_file_format] caps.preferred_staging_file_format = None caps.supported_staging_file_formats = additional_formats + [config.loader_file_format] + caps.has_case_sensitive_identifiers = True caps.max_identifier_length = 127 caps.max_column_identifier_length = 127 caps.max_query_length = 8 * 1024 * 1024 diff --git a/dlt/destinations/impl/motherduck/__init__.py b/dlt/destinations/impl/motherduck/__init__.py index 74c0e36ef3..bfcb9cba14 100644 --- a/dlt/destinations/impl/motherduck/__init__.py +++ b/dlt/destinations/impl/motherduck/__init__.py @@ -9,6 +9,7 @@ def capabilities() -> DestinationCapabilitiesContext: caps.supported_loader_file_formats = ["parquet", "insert_values", "jsonl"] caps.escape_identifier = escape_postgres_identifier caps.escape_literal = escape_duckdb_literal + caps.has_case_sensitive_identifiers = False caps.decimal_precision = (DEFAULT_NUMERIC_PRECISION, DEFAULT_NUMERIC_SCALE) caps.wei_precision = (DEFAULT_NUMERIC_PRECISION, 0) caps.max_identifier_length = 65536 diff --git a/dlt/destinations/impl/motherduck/sql_client.py b/dlt/destinations/impl/motherduck/sql_client.py index c6c86c33cc..677fa16098 100644 --- a/dlt/destinations/impl/motherduck/sql_client.py +++ b/dlt/destinations/impl/motherduck/sql_client.py @@ -31,7 +31,7 @@ def __init__(self, dataset_name: str, credentials: MotherDuckCredentials) -> Non def fully_qualified_dataset_name(self, escape: bool = True) -> str: dataset_name = super().fully_qualified_dataset_name(escape) - database_name = self.capabilities.case_identifier(self.database_name) + database_name = self.capabilities.casefold_identifier(self.database_name) if escape: database_name = self.capabilities.escape_identifier(database_name) return f"{database_name}.{dataset_name}" diff --git a/dlt/destinations/impl/mssql/sql_client.py b/dlt/destinations/impl/mssql/sql_client.py index 459fdb7bc8..10eb24b45b 100644 --- a/dlt/destinations/impl/mssql/sql_client.py +++ b/dlt/destinations/impl/mssql/sql_client.py @@ -95,14 +95,14 @@ def drop_dataset(self) -> None: # Drop all views rows = self.execute_sql( "SELECT table_name FROM information_schema.views WHERE table_schema = %s;", - self.capabilities.case_identifier(self.dataset_name), + self.capabilities.casefold_identifier(self.dataset_name), ) view_names = [row[0] for row in rows] self._drop_views(*view_names) # Drop all tables rows = self.execute_sql( "SELECT table_name FROM information_schema.tables WHERE table_schema = %s;", - self.capabilities.case_identifier(self.dataset_name), + self.capabilities.casefold_identifier(self.dataset_name), ) table_names = [row[0] for row in rows] self.drop_tables(*table_names) diff --git a/dlt/destinations/impl/postgres/__init__.py b/dlt/destinations/impl/postgres/__init__.py index 1dca5e9774..b3291d3ef2 100644 --- a/dlt/destinations/impl/postgres/__init__.py +++ b/dlt/destinations/impl/postgres/__init__.py @@ -1,6 +1,5 @@ from dlt.common.data_writers.escape import escape_postgres_identifier, escape_postgres_literal from dlt.common.destination import DestinationCapabilitiesContext -from dlt.common.destination.reference import JobClientBase, DestinationClientConfiguration from dlt.common.arithmetics import DEFAULT_NUMERIC_PRECISION, DEFAULT_NUMERIC_SCALE from dlt.common.wei import EVM_DECIMAL_PRECISION @@ -14,7 +13,7 @@ def capabilities() -> DestinationCapabilitiesContext: caps.supported_staging_file_formats = [] caps.escape_identifier = escape_postgres_identifier caps.escape_literal = escape_postgres_literal - caps.case_identifier = str.lower + caps.has_case_sensitive_identifiers = True caps.decimal_precision = (DEFAULT_NUMERIC_PRECISION, DEFAULT_NUMERIC_SCALE) caps.wei_precision = (2 * EVM_DECIMAL_PRECISION, EVM_DECIMAL_PRECISION) caps.max_identifier_length = 63 diff --git a/dlt/destinations/impl/qdrant/__init__.py b/dlt/destinations/impl/qdrant/__init__.py index 1a2c466b14..331d1725ef 100644 --- a/dlt/destinations/impl/qdrant/__init__.py +++ b/dlt/destinations/impl/qdrant/__init__.py @@ -6,7 +6,7 @@ def capabilities() -> DestinationCapabilitiesContext: caps = DestinationCapabilitiesContext() caps.preferred_loader_file_format = "jsonl" caps.supported_loader_file_formats = ["jsonl"] - + caps.has_case_sensitive_identifiers = True caps.max_identifier_length = 200 caps.max_column_identifier_length = 1024 caps.max_query_length = 8 * 1024 * 1024 diff --git a/dlt/destinations/impl/qdrant/qdrant_client.py b/dlt/destinations/impl/qdrant/qdrant_client.py index 89176400ba..c5f76d711d 100644 --- a/dlt/destinations/impl/qdrant/qdrant_client.py +++ b/dlt/destinations/impl/qdrant/qdrant_client.py @@ -3,7 +3,11 @@ from dlt.common import json, pendulum, logger from dlt.common.schema import Schema, TTableSchema, TSchemaTables -from dlt.common.schema.utils import get_columns_names_with_prop, pipeline_state_table +from dlt.common.schema.utils import ( + get_columns_names_with_prop, + normalize_table_identifiers, + pipeline_state_table, +) from dlt.common.destination import DestinationCapabilitiesContext from dlt.common.destination.reference import TLoadJobState, LoadJob, JobClientBase, WithStateSync from dlt.common.storages import FileStorage @@ -152,7 +156,8 @@ def __init__(self, schema: Schema, config: QdrantClientConfiguration) -> None: ) # get definition of state table (may not be present in the schema) state_table = schema.tables.get( - schema.state_table_name, schema.normalize_table_identifiers(pipeline_state_table()) + schema.state_table_name, + normalize_table_identifiers(pipeline_state_table(), schema.naming), ) # column names are pipeline properties self.pipeline_state_properties = list(state_table["columns"].keys()) diff --git a/dlt/destinations/impl/redshift/__init__.py b/dlt/destinations/impl/redshift/__init__.py index 74c1adad4d..5899fa2e12 100644 --- a/dlt/destinations/impl/redshift/__init__.py +++ b/dlt/destinations/impl/redshift/__init__.py @@ -11,7 +11,8 @@ def capabilities() -> DestinationCapabilitiesContext: caps.supported_staging_file_formats = ["jsonl", "parquet"] caps.escape_identifier = escape_redshift_identifier caps.escape_literal = escape_redshift_literal - caps.case_identifier = str.lower + caps.casefold_identifier = str.lower + caps.has_case_sensitive_identifiers = False caps.decimal_precision = (DEFAULT_NUMERIC_PRECISION, DEFAULT_NUMERIC_SCALE) caps.wei_precision = (DEFAULT_NUMERIC_PRECISION, 0) caps.max_identifier_length = 127 diff --git a/dlt/destinations/impl/snowflake/__init__.py b/dlt/destinations/impl/snowflake/__init__.py index 2bc6cec485..4de37762a6 100644 --- a/dlt/destinations/impl/snowflake/__init__.py +++ b/dlt/destinations/impl/snowflake/__init__.py @@ -10,7 +10,8 @@ def capabilities() -> DestinationCapabilitiesContext: caps.preferred_staging_file_format = "jsonl" caps.supported_staging_file_formats = ["jsonl", "parquet"] caps.escape_identifier = escape_snowflake_identifier - caps.case_identifier = str.upper + caps.casefold_identifier = str.upper + caps.has_case_sensitive_identifiers = True caps.decimal_precision = (DEFAULT_NUMERIC_PRECISION, DEFAULT_NUMERIC_SCALE) caps.wei_precision = (DEFAULT_NUMERIC_PRECISION, 0) caps.max_identifier_length = 255 diff --git a/dlt/destinations/impl/synapse/__init__.py b/dlt/destinations/impl/synapse/__init__.py index 53dbabc090..b31330c473 100644 --- a/dlt/destinations/impl/synapse/__init__.py +++ b/dlt/destinations/impl/synapse/__init__.py @@ -18,6 +18,7 @@ def capabilities() -> DestinationCapabilitiesContext: caps.escape_identifier = escape_postgres_identifier caps.escape_literal = escape_mssql_literal + caps.has_case_sensitive_identifiers = False # Synapse has a max precision of 38 # https://learn.microsoft.com/en-us/sql/t-sql/statements/create-table-azure-sql-data-warehouse?view=aps-pdw-2016-au7#DataTypes diff --git a/dlt/destinations/impl/weaviate/__init__.py b/dlt/destinations/impl/weaviate/__init__.py index 143e0260d2..6ffea5ae76 100644 --- a/dlt/destinations/impl/weaviate/__init__.py +++ b/dlt/destinations/impl/weaviate/__init__.py @@ -6,7 +6,7 @@ def capabilities() -> DestinationCapabilitiesContext: caps = DestinationCapabilitiesContext() caps.preferred_loader_file_format = "jsonl" caps.supported_loader_file_formats = ["jsonl"] - + caps.has_case_sensitive_identifiers = False caps.max_identifier_length = 200 caps.max_column_identifier_length = 1024 caps.max_query_length = 8 * 1024 * 1024 diff --git a/dlt/destinations/impl/weaviate/weaviate_client.py b/dlt/destinations/impl/weaviate/weaviate_client.py index 137d765604..fe1c0d2162 100644 --- a/dlt/destinations/impl/weaviate/weaviate_client.py +++ b/dlt/destinations/impl/weaviate/weaviate_client.py @@ -29,7 +29,11 @@ from dlt.common.time import ensure_pendulum_datetime from dlt.common.schema import Schema, TTableSchema, TSchemaTables, TTableSchemaColumns from dlt.common.schema.typing import TColumnSchema, TColumnType -from dlt.common.schema.utils import get_columns_names_with_prop, pipeline_state_table +from dlt.common.schema.utils import ( + get_columns_names_with_prop, + normalize_table_identifiers, + pipeline_state_table, +) from dlt.common.destination import DestinationCapabilitiesContext from dlt.common.destination.reference import TLoadJobState, LoadJob, JobClientBase, WithStateSync from dlt.common.data_types import TDataType @@ -243,7 +247,8 @@ def __init__(self, schema: Schema, config: WeaviateClientConfiguration) -> None: ) # get definition of state table (may not be present in the schema) state_table = schema.tables.get( - schema.state_table_name, schema.normalize_table_identifiers(pipeline_state_table()) + schema.state_table_name, + normalize_table_identifiers(pipeline_state_table(), schema.naming), ) # column names are pipeline properties self.pipeline_state_properties = list(state_table["columns"].keys()) @@ -453,7 +458,11 @@ def _execute_schema_update(self, only_tables: Iterable[str]) -> None: for table_name in only_tables or self.schema.tables: exists, existing_columns = self.get_storage_table(table_name) # TODO: detect columns where vectorization was added or removed and modify it. currently we ignore change of hints - new_columns = self.schema.get_new_table_columns(table_name, existing_columns) + new_columns = self.schema.get_new_table_columns( + table_name, + existing_columns, + case_sensitive=self.capabilities.has_case_sensitive_identifiers, + ) logger.info(f"Found {len(new_columns)} updates for {table_name} in {self.schema.name}") if len(new_columns) > 0: if exists: diff --git a/dlt/destinations/job_client_impl.py b/dlt/destinations/job_client_impl.py index d4cd04b946..366ba1ce77 100644 --- a/dlt/destinations/job_client_impl.py +++ b/dlt/destinations/job_client_impl.py @@ -1,24 +1,18 @@ import os from abc import abstractmethod import base64 -import binascii import contextlib from copy import copy -import datetime # noqa: 251 from types import TracebackType from typing import ( Any, - ClassVar, List, - NamedTuple, Optional, Sequence, Tuple, Type, Iterable, Iterator, - ContextManager, - cast, ) import zlib import re @@ -31,7 +25,7 @@ TTableSchema, TTableFormat, ) -from dlt.common.schema.utils import pipeline_state_table +from dlt.common.schema.utils import normalize_table_identifiers, pipeline_state_table from dlt.common.storages import FileStorage from dlt.common.schema import TColumnSchema, Schema, TTableSchemaColumns, TSchemaTables from dlt.common.destination.reference import ( @@ -57,6 +51,7 @@ from dlt.destinations.typing import TNativeConn from dlt.destinations.sql_client import SqlClientBase +from dlt.destinations.utils import info_schema_null_to_bool, verify_sql_job_client_schema # this should suffice for now DDL_COMMANDS = ["ALTER", "CREATE", "DROP"] @@ -147,7 +142,8 @@ def __init__( ) # get definition of state table (may not be present in the schema) state_table = schema.tables.get( - schema.state_table_name, schema.normalize_table_identifiers(pipeline_state_table()) + schema.state_table_name, + normalize_table_identifiers(pipeline_state_table(), schema.naming), ) self.state_table_columns = ", ".join( sql_client.escape_column_name(col) for col in state_table["columns"] @@ -285,54 +281,107 @@ def __exit__( ) -> None: self.sql_client.close_connection() - def _get_storage_table_query_columns(self) -> List[str]: - """Column names used when querying table from information schema. - Override for databases that use different namings. - """ - fields = ["column_name", "data_type", "is_nullable"] - if self.capabilities.schema_supports_numeric_precision: - fields += ["numeric_precision", "numeric_scale"] - return fields + def get_storage_tables( + self, table_names: Iterable[str] + ) -> Iterable[Tuple[str, TTableSchemaColumns]]: + """Uses INFORMATION SCHEMA to retrieve table and column information for tables in `table_names` iterator. + Table names should be normalized according to naming convention and will be further converted to desired casing + in order to (in most cases) create case-insensitive name suitable for search in information schema. - def get_storage_table(self, table_name: str) -> Tuple[bool, TTableSchemaColumns]: - def _null_to_bool(v: str) -> bool: - if v == "NO": - return False - elif v == "YES": - return True - raise ValueError(v) - - fields = self._get_storage_table_query_columns() - db_params = self.sql_client.make_qualified_table_name(table_name, escape=False).split( - ".", 3 + The column names are returned as in information schema. To match those with columns in existing table, you'll need to use + `schema.get_new_table_columns` method and pass the correct casing. Most of the casing function are irreversible so it is not + possible to convert identifiers into INFORMATION SCHEMA back into case sensitive dlt schema. + """ + table_names = list(table_names) + if len(table_names) == 0: + # empty generator + return + # create table name conversion lookup table + name_lookup = { + self.capabilities.casefold_identifier(table_name): table_name + for table_name in table_names + } + # this should never happen: we verify schema for name clashes before loading + assert len(name_lookup) == len(table_names), ( + f"One or more of tables in {table_names} after applying" + f" {self.capabilities.casefold_identifier} produced a clashing name." ) + # get components from full table name + db_params = self.sql_client.fully_qualified_dataset_name(escape=False).split(".", 2) + has_catalog = len(db_params) == 2 + # use cased identifier ie. always lower on redshift and upper (by default) on snowflake + db_params = db_params + list(name_lookup.keys()) + query = f""" -SELECT {",".join(fields)} +SELECT {",".join(self._get_storage_table_query_columns())} FROM INFORMATION_SCHEMA.COLUMNS WHERE """ - if len(db_params) == 3: + if has_catalog: query += "table_catalog = %s AND " - query += "table_schema = %s AND table_name = %s ORDER BY ordinal_position;" + # placeholder for each table + table_placeholders = ",".join(["%s"] * len(table_names)) + query += ( + f"table_schema = %s AND table_name IN ({table_placeholders}) ORDER BY table_name," + " ordinal_position;" + ) + print(query) + print(db_params) rows = self.sql_client.execute_sql(query, *db_params) - # if no rows we assume that table does not exist - schema_table: TTableSchemaColumns = {} - if len(rows) == 0: - # TODO: additionally check if table exists - return False, schema_table - # TODO: pull more data to infer indexes, PK and uniques attributes/constraints + print(rows) + prev_table: str = None + storage_columns: TTableSchemaColumns = None for c in rows: - col_name = self.schema.naming.normalize_path(c[0]) + # make sure that new table is known + assert ( + c[0] in name_lookup + ), f"Table name {c[0]} not in expected tables {name_lookup.keys()}" + table_name = name_lookup[c[0]] + if prev_table != table_name: + # yield what we have + if storage_columns: + yield (prev_table, storage_columns) + # we have new table + storage_columns = {} + prev_table = table_name + # remove from table_names + table_names.remove(prev_table) + # add columns + # TODO: in many cases this will not work + col_name = c[1] numeric_precision = ( - c[3] if self.capabilities.schema_supports_numeric_precision else None + c[4] if self.capabilities.schema_supports_numeric_precision else None ) - numeric_scale = c[4] if self.capabilities.schema_supports_numeric_precision else None + numeric_scale = c[5] if self.capabilities.schema_supports_numeric_precision else None + schema_c: TColumnSchemaBase = { "name": col_name, - "nullable": _null_to_bool(c[2]), - **self._from_db_type(c[1], numeric_precision, numeric_scale), + "nullable": info_schema_null_to_bool(c[3]), + **self._from_db_type(c[2], numeric_precision, numeric_scale), } - schema_table[col_name] = schema_c # type: ignore - return True, schema_table + storage_columns[col_name] = schema_c # type: ignore + # yield last table, it must have at least one column or we had no rows + if storage_columns: + yield (prev_table, storage_columns) + # if no columns we assume that table does not exist + for table_name in table_names: + yield (table_name, {}) + + def get_storage_table(self, table_name: str) -> Tuple[bool, TTableSchemaColumns]: + """Uses get_storage_tables to get single `table_name` schema. + + Returns (True, ...) if table exists and (False, {}) when not + """ + storage_table = list(self.get_storage_tables([table_name]))[0] + return len(storage_table[1]) > 0, storage_table[1] + + def _get_storage_table_query_columns(self) -> List[str]: + """Column names used when querying table from information schema. + Override for databases that use different namings. + """ + fields = ["table_name", "column_name", "data_type", "is_nullable"] + if self.capabilities.schema_supports_numeric_precision: + fields += ["numeric_precision", "numeric_scale"] + return fields @abstractmethod def _from_db_type( @@ -424,12 +473,15 @@ def _build_schema_update_sql( """ sql_updates = [] schema_update: TSchemaTables = {} - for table_name in only_tables or self.schema.tables: - exists, storage_table = self.get_storage_table(table_name) - new_columns = self._create_table_update(table_name, storage_table) + for table_name, storage_columns in self.get_storage_tables( + only_tables or self.schema.tables.keys() + ): + new_columns = self._create_table_update(table_name, storage_columns) if len(new_columns) > 0: # build and add sql to execute - sql_statements = self._get_table_update_sql(table_name, new_columns, exists) + sql_statements = self._get_table_update_sql( + table_name, new_columns, len(storage_columns) > 0 + ) for sql in sql_statements: if not sql.endswith(";"): sql += ";" @@ -509,8 +561,12 @@ def _gen_not_null(v: bool) -> str: def _create_table_update( self, table_name: str, storage_columns: TTableSchemaColumns ) -> Sequence[TColumnSchema]: - # compare table with stored schema and produce delta - updates = self.schema.get_new_table_columns(table_name, storage_columns) + """Compares storage columns with schema table and produce delta columns difference""" + updates = self.schema.get_new_table_columns( + table_name, + storage_columns, + case_sensitive=self.capabilities.has_case_sensitive_identifiers, + ) logger.info(f"Found {len(updates)} updates for {table_name} in {self.schema.name}") return updates @@ -576,6 +632,13 @@ def _commit_schema_update(self, schema: Schema, schema_str: str) -> None: schema_str, ) + def _verify_schema(self) -> None: + super()._verify_schema() + if exceptions := verify_sql_job_client_schema(self.schema, warnings=True): + for exception in exceptions: + logger.error(str(exception)) + raise exceptions[0] + class SqlJobClientWithStaging(SqlJobClientBase, WithStagingDataset): in_staging_mode: bool = False diff --git a/dlt/destinations/sql_client.py b/dlt/destinations/sql_client.py index 7171f52b24..f642e69217 100644 --- a/dlt/destinations/sql_client.py +++ b/dlt/destinations/sql_client.py @@ -139,19 +139,19 @@ def execute_many( return ret def fully_qualified_dataset_name(self, escape: bool = True) -> str: - dataset_name = self.capabilities.case_identifier(self.dataset_name) + dataset_name = self.capabilities.casefold_identifier(self.dataset_name) if escape: return self.capabilities.escape_identifier(dataset_name) return dataset_name def make_qualified_table_name(self, table_name: str, escape: bool = True) -> str: - table_name = self.capabilities.case_identifier(table_name) + table_name = self.capabilities.casefold_identifier(table_name) if escape: table_name = self.capabilities.escape_identifier(table_name) return f"{self.fully_qualified_dataset_name(escape=escape)}.{table_name}" def escape_column_name(self, column_name: str, escape: bool = True) -> str: - column_name = self.capabilities.case_identifier(column_name) + column_name = self.capabilities.casefold_identifier(column_name) if escape: return self.capabilities.escape_identifier(column_name) return column_name diff --git a/dlt/destinations/utils.py b/dlt/destinations/utils.py index d4b945a840..e9c3289f54 100644 --- a/dlt/destinations/utils.py +++ b/dlt/destinations/utils.py @@ -1,7 +1,18 @@ -from typing import Any +import re +from typing import Any, List, Optional, Tuple +from dlt.common import logger +from dlt.common.schema import Schema +from dlt.common.schema.exceptions import SchemaCorruptedException +from dlt.common.schema.utils import ( + get_columns_names_with_prop, + get_first_column_name_with_prop, + has_column_with_prop, +) from dlt.extract import DltResource, resource as make_resource +RE_DATA_TYPE = re.compile(r"([A-Z]+)\((\d+)(?:,\s?(\d+))?\)") + def ensure_resource(data: Any) -> DltResource: """Wraps `data` in a DltResource if it's not a DltResource already.""" @@ -14,3 +25,88 @@ def ensure_resource(data: Any) -> DltResource: else: resource = data return resource + + +def info_schema_null_to_bool(v: str) -> bool: + """Converts INFORMATION SCHEMA truth values to Python bool""" + if v == "NO": + return False + elif v == "YES": + return True + raise ValueError(v) + + +def parse_db_data_type_str_with_precision(db_type: str) -> Tuple[str, Optional[int], Optional[int]]: + """Parses a db data type with optional precision or precision and scale information""" + # Search for matches using the regular expression + match = RE_DATA_TYPE.match(db_type) + + # If the pattern matches, extract the type, precision, and scale + if match: + db_type = match.group(1) + precision = int(match.group(2)) + scale = int(match.group(3)) if match.group(3) else None + return db_type, precision, scale + + # If the pattern does not match, return the original type without precision and scale + return db_type, None, None + + +def verify_sql_job_client_schema(schema: Schema, warnings: bool = True) -> List[Exception]: + log = logger.warning if warnings else logger.info + # collect all exceptions to show all problems in the schema + exception_log: List[Exception] = [] + + # verifies schema settings specific to sql job client + for table in schema.data_tables(): + table_name = table["name"] + if has_column_with_prop(table, "hard_delete"): + if len(get_columns_names_with_prop(table, "hard_delete")) > 1: + exception_log.append( + SchemaCorruptedException( + schema.name, + f'Found multiple "hard_delete" column hints for table "{table_name}" in' + f' schema "{schema.name}" while only one is allowed:' + f' {", ".join(get_columns_names_with_prop(table, "hard_delete"))}.', + ) + ) + if table.get("write_disposition") in ("replace", "append"): + log( + f"""The "hard_delete" column hint for column "{get_first_column_name_with_prop(table, 'hard_delete')}" """ + f'in table "{table_name}" with write disposition' + f' "{table.get("write_disposition")}"' + f' in schema "{schema.name}" will be ignored.' + ' The "hard_delete" column hint is only applied when using' + ' the "merge" write disposition.' + ) + if has_column_with_prop(table, "dedup_sort"): + if len(get_columns_names_with_prop(table, "dedup_sort")) > 1: + exception_log.append( + SchemaCorruptedException( + schema.name, + f'Found multiple "dedup_sort" column hints for table "{table_name}" in' + f' schema "{schema.name}" while only one is allowed:' + f' {", ".join(get_columns_names_with_prop(table, "dedup_sort"))}.', + ) + ) + if table.get("write_disposition") in ("replace", "append"): + log( + f"""The "dedup_sort" column hint for column "{get_first_column_name_with_prop(table, 'dedup_sort')}" """ + f'in table "{table_name}" with write disposition' + f' "{table.get("write_disposition")}"' + f' in schema "{schema.name}" will be ignored.' + ' The "dedup_sort" column hint is only applied when using' + ' the "merge" write disposition.' + ) + if table.get("write_disposition") == "merge" and not has_column_with_prop( + table, "primary_key" + ): + log( + f"""The "dedup_sort" column hint for column "{get_first_column_name_with_prop(table, 'dedup_sort')}" """ + f'in table "{table_name}" with write disposition' + f' "{table.get("write_disposition")}"' + f' in schema "{schema.name}" will be ignored.' + ' The "dedup_sort" column hint is only applied when a' + " primary key has been specified." + ) + return exception_log diff --git a/tests/load/pipeline/test_drop.py b/tests/load/pipeline/test_drop.py index cd18454d7c..d4cf79bac9 100644 --- a/tests/load/pipeline/test_drop.py +++ b/tests/load/pipeline/test_drop.py @@ -84,13 +84,14 @@ def assert_dropped_resource_tables(pipeline: Pipeline, resources: List[str]) -> client: SqlJobClientBase with pipeline.destination_client(pipeline.default_schema_name) as client: # type: ignore[assignment] # Check all tables supposed to be dropped are not in dataset - for table in dropped_tables: - exists, _ = client.get_storage_table(table) - assert not exists + storage_tables = list(client.get_storage_tables(dropped_tables)) + # no columns in all tables + assert all(len(table[1]) == 0 for table in storage_tables) + # Check tables not from dropped resources still exist - for table in expected_tables: - exists, _ = client.get_storage_table(table) - assert exists + storage_tables = list(client.get_storage_tables(expected_tables)) + # all tables have columns + assert all(len(table[1]) > 0 for table in storage_tables) def assert_dropped_resource_states(pipeline: Pipeline, resources: List[str]) -> None: @@ -293,9 +294,8 @@ def test_drop_all_flag(destination_config: DestinationTestConfiguration) -> None # Verify original _dlt tables were not deleted with attached._sql_job_client(attached.default_schema) as client: - for tbl in dlt_tables: - exists, _ = client.get_storage_table(tbl) - assert exists + storage_tables = list(client.get_storage_tables(dlt_tables)) + assert all(len(table[1]) > 0 for table in storage_tables) @pytest.mark.parametrize( diff --git a/tests/load/pipeline/test_merge_disposition.py b/tests/load/pipeline/test_merge_disposition.py index 19ee9a34c8..270b062b58 100644 --- a/tests/load/pipeline/test_merge_disposition.py +++ b/tests/load/pipeline/test_merge_disposition.py @@ -37,7 +37,7 @@ def test_merge_on_keys_in_schema(destination_config: DestinationTestConfiguratio # make block uncles unseen to trigger filtering loader in loader for child tables if has_table_seen_data(schema.tables["blocks__uncles"]): - del schema.tables["blocks__uncles"]["x-normalizer"] # type: ignore[typeddict-item] + del schema.tables["blocks__uncles"]["x-normalizer"] assert not has_table_seen_data(schema.tables["blocks__uncles"]) with open( diff --git a/tests/load/pipeline/test_pipelines.py b/tests/load/pipeline/test_pipelines.py index a93599831d..03afa083c5 100644 --- a/tests/load/pipeline/test_pipelines.py +++ b/tests/load/pipeline/test_pipelines.py @@ -926,8 +926,7 @@ def table_3(make_data=False): load_table_counts(pipeline, "table_3") assert "x-normalizer" not in pipeline.default_schema.tables["table_3"] assert ( - pipeline.default_schema.tables["_dlt_pipeline_state"]["x-normalizer"]["seen-data"] # type: ignore[typeddict-item] - is True + pipeline.default_schema.tables["_dlt_pipeline_state"]["x-normalizer"]["seen-data"] is True ) # load with one empty job, table 3 not created @@ -969,18 +968,9 @@ def table_3(make_data=False): # print(v5) # check if seen data is market correctly - assert ( - pipeline.default_schema.tables["table_3"]["x-normalizer"]["seen-data"] # type: ignore[typeddict-item] - is True - ) - assert ( - pipeline.default_schema.tables["table_2"]["x-normalizer"]["seen-data"] # type: ignore[typeddict-item] - is True - ) - assert ( - pipeline.default_schema.tables["table_1"]["x-normalizer"]["seen-data"] # type: ignore[typeddict-item] - is True - ) + assert pipeline.default_schema.tables["table_3"]["x-normalizer"]["seen-data"] is True + assert pipeline.default_schema.tables["table_2"]["x-normalizer"]["seen-data"] is True + assert pipeline.default_schema.tables["table_1"]["x-normalizer"]["seen-data"] is True # @pytest.mark.skip(reason="Finalize the test: compare some_data values to values from database") diff --git a/tests/load/pipeline/test_restore_state.py b/tests/load/pipeline/test_restore_state.py index 6d323c94c5..63c9d89323 100644 --- a/tests/load/pipeline/test_restore_state.py +++ b/tests/load/pipeline/test_restore_state.py @@ -7,6 +7,7 @@ import dlt from dlt.common import pendulum from dlt.common.schema.schema import Schema, utils +from dlt.common.schema.utils import normalize_table_identifiers from dlt.common.utils import uniq_id from dlt.common.exceptions import DestinationUndefinedEntity @@ -75,7 +76,9 @@ def test_restore_state_utils(destination_config: DestinationTestConfiguration) - **utils.pipeline_state_table()["columns"], } ) - schema.update_table(schema.normalize_table_identifiers(resource.compute_table_schema())) + schema.update_table( + normalize_table_identifiers(resource.compute_table_schema(), schema.naming) + ) # do not bump version here or in sync_schema, dlt won't recognize that schema changed and it won't update it in storage # so dlt in normalize stage infers _state_version table again but with different column order and the column order in schema is different # then in database. parquet is created in schema order and in Redshift it must exactly match the order. diff --git a/tests/load/test_dummy_client.py b/tests/load/test_dummy_client.py index d7884abcf0..0cceab8081 100644 --- a/tests/load/test_dummy_client.py +++ b/tests/load/test_dummy_client.py @@ -485,9 +485,7 @@ def test_extend_table_chain() -> None: # no jobs for bot assert _extend_tables_with_table_chain(schema, ["event_bot"], ["event_user"]) == set() # skip unseen tables - del schema.tables["event_user__parse_data__entities"][ # type:ignore[typeddict-item] - "x-normalizer" - ] + del schema.tables["event_user__parse_data__entities"]["x-normalizer"] entities_chain = { name for name in schema.data_table_names() @@ -607,7 +605,7 @@ def test_get_completed_table_chain_cases() -> None: deep_child = schema.tables[ "event_user__parse_data__response_selector__default__response__response_templates" ] - del deep_child["x-normalizer"] # type:ignore[typeddict-item] + del deep_child["x-normalizer"] chain = get_completed_table_chain( schema, [event_user_job], event_user, event_user_job.job_file_info.job_id() ) diff --git a/tests/load/test_job_client.py b/tests/load/test_job_client.py index 91f177b6f6..3628a46655 100644 --- a/tests/load/test_job_client.py +++ b/tests/load/test_job_client.py @@ -74,13 +74,18 @@ def test_initialize_storage(client: SqlJobClientBase) -> None: ) def test_get_schema_on_empty_storage(client: SqlJobClientBase) -> None: # test getting schema on empty dataset without any tables - exists, _ = client.get_storage_table(VERSION_TABLE_NAME) - assert exists is False + table_name, table_columns = list(client.get_storage_tables([VERSION_TABLE_NAME]))[0] + assert table_name == VERSION_TABLE_NAME + assert len(table_columns) == 0 schema_info = client.get_stored_schema() assert schema_info is None schema_info = client.get_stored_schema_by_hash("8a0298298823928939") assert schema_info is None + # now try to get several non existing tables + storage_tables = list(client.get_storage_tables(["no_table_1", "no_table_2"])) + assert [("no_table_1", {}), ("no_table_2", {})] == storage_tables + @pytest.mark.order(3) @pytest.mark.parametrize( @@ -88,6 +93,8 @@ def test_get_schema_on_empty_storage(client: SqlJobClientBase) -> None: ) def test_get_update_basic_schema(client: SqlJobClientBase) -> None: schema = client.schema + print(schema.stored_version) + print(schema.version) schema_update = client.update_stored_schema() # expect dlt tables in schema update assert set(schema_update.keys()) == {VERSION_TABLE_NAME, LOADS_TABLE_NAME, "event_slot"} @@ -95,17 +102,17 @@ def test_get_update_basic_schema(client: SqlJobClientBase) -> None: # check is event slot has variant assert schema_update["event_slot"]["columns"]["value"]["variant"] is True # now we have dlt tables - exists, _ = client.get_storage_table(VERSION_TABLE_NAME) - assert exists is True - exists, _ = client.get_storage_table(LOADS_TABLE_NAME) - assert exists is True + storage_tables = list(client.get_storage_tables([VERSION_TABLE_NAME, LOADS_TABLE_NAME])) + assert set([table[0] for table in storage_tables]) == {VERSION_TABLE_NAME, LOADS_TABLE_NAME} + assert [len(table[1]) > 0 for table in storage_tables] == [True, True] # verify if schemas stored this_schema = client.get_stored_schema_by_hash(schema.version_hash) newest_schema = client.get_stored_schema() # should point to the same schema assert this_schema == newest_schema # check fields - assert this_schema.version == 1 == schema.version + # NOTE: schema version == 2 because we updated default hints after loading the schema + assert this_schema.version == 2 == schema.version assert this_schema.version_hash == schema.stored_version_hash assert this_schema.engine_version == schema.ENGINE_VERSION assert this_schema.schema_name == schema.name @@ -124,7 +131,7 @@ def test_get_update_basic_schema(client: SqlJobClientBase) -> None: this_schema = client.get_stored_schema_by_hash(schema.version_hash) newest_schema = client.get_stored_schema() assert this_schema == newest_schema - assert this_schema.version == schema.version == 2 + assert this_schema.version == schema.version == 3 assert this_schema.version_hash == schema.stored_version_hash # simulate parallel write: initial schema is modified differently and written alongside the first one @@ -132,14 +139,14 @@ def test_get_update_basic_schema(client: SqlJobClientBase) -> None: first_schema = Schema.from_dict(json.loads(first_version_schema)) first_schema.tables["event_bot"]["write_disposition"] = "replace" first_schema.bump_version() - assert first_schema.version == this_schema.version == 2 + assert first_schema.version == this_schema.version == 3 # wait to make load_newest_schema deterministic sleep(0.1) client._update_schema_in_storage(first_schema) this_schema = client.get_stored_schema_by_hash(first_schema.version_hash) newest_schema = client.get_stored_schema() assert this_schema == newest_schema # error - assert this_schema.version == first_schema.version == 2 + assert this_schema.version == first_schema.version == 3 assert this_schema.version_hash == first_schema.stored_version_hash # get schema with non existing hash @@ -218,8 +225,8 @@ def test_schema_update_create_table_redshift(client: SqlJobClientBase) -> None: assert table_update["timestamp"]["sort"] is True assert table_update["sender_id"]["cluster"] is True assert table_update["_dlt_id"]["unique"] is True - exists, _ = client.get_storage_table(table_name) - assert exists is True + _, storage_columns = list(client.get_storage_tables([table_name]))[0] + assert len(storage_columns) > 0 @pytest.mark.parametrize( @@ -244,14 +251,12 @@ def test_schema_update_create_table_bigquery(client: SqlJobClientBase) -> None: table_update = schema_update["event_test_table"]["columns"] assert table_update["timestamp"]["partition"] is True assert table_update["_dlt_id"]["nullable"] is False - exists, storage_table = client.get_storage_table("event_test_table") - assert exists is True - assert storage_table["timestamp"]["partition"] is True - assert storage_table["sender_id"]["cluster"] is True - exists, storage_table = client.get_storage_table("_dlt_version") - assert exists is True - assert storage_table["version"]["partition"] is False - assert storage_table["version"]["cluster"] is False + _, storage_columns = client.get_storage_table("event_test_table") + assert storage_columns["timestamp"]["partition"] is True + assert storage_columns["sender_id"]["cluster"] is True + _, storage_columns = client.get_storage_table("_dlt_version") + assert storage_columns["version"]["partition"] is False + assert storage_columns["version"]["cluster"] is False @pytest.mark.parametrize( @@ -342,9 +347,7 @@ def test_drop_tables(client: SqlJobClientBase) -> None: client.drop_tables(*tables_to_drop, replace_schema=False) # Verify requested tables are dropped - for tbl in tables_to_drop: - exists, _ = client.get_storage_table(tbl) - assert not exists + assert all(len(table[1]) == 0 for table in client.get_storage_tables(tables_to_drop)) # Verify _dlt_version schema is updated and old versions deleted table_name = client.sql_client.make_qualified_table_name(VERSION_TABLE_NAME) @@ -377,15 +380,16 @@ def test_get_storage_table_with_all_types(client: SqlJobClientBase) -> None: for name, column in table_update.items(): assert column.items() >= TABLE_UPDATE_COLUMNS_SCHEMA[name].items() # now get the actual schema from the db - exists, storage_table = client.get_storage_table(table_name) - assert exists is True + _, storage_table = list(client.get_storage_tables([table_name]))[0] + assert len(storage_table) > 0 print(storage_table) # column order must match TABLE_UPDATE storage_columns = list(storage_table.values()) for c, expected_c in zip(TABLE_UPDATE, storage_columns): # print(c["name"]) # print(c["data_type"]) - assert c["name"] == expected_c["name"] + # storage columns are returned with column names as in information schema + assert client.capabilities.casefold_identifier(c["name"]) == expected_c["name"] # athena does not know wei data type and has no JSON type, time is not supported with parquet tables if client.config.destination_type == "athena" and c["data_type"] in ( "wei", diff --git a/tests/load/utils.py b/tests/load/utils.py index b08d5f2d6a..f81919518c 100644 --- a/tests/load/utils.py +++ b/tests/load/utils.py @@ -499,6 +499,7 @@ def yield_client( schema_storage = SchemaStorage(storage_config) schema = schema_storage.load_schema(schema_name) schema.update_normalizers() + # NOTE: schema version is bumped because new default hints are added schema.bump_version() # create client and dataset client: SqlJobClientBase = None diff --git a/tests/load/weaviate/test_weaviate_client.py b/tests/load/weaviate/test_weaviate_client.py index 48153f7706..45868621e7 100644 --- a/tests/load/weaviate/test_weaviate_client.py +++ b/tests/load/weaviate/test_weaviate_client.py @@ -13,7 +13,7 @@ from dlt.destinations.impl.weaviate.weaviate_client import WeaviateClient from dlt.common.storages.file_storage import FileStorage -from dlt.common.schema.utils import new_table +from dlt.common.schema.utils import new_table, normalize_table_identifiers from tests.load.utils import ( TABLE_ROW_ALL_DATA_TYPES, TABLE_UPDATE, @@ -111,7 +111,9 @@ def test_case_sensitive_properties_create(client: WeaviateClient) -> None: {"name": "coL1", "data_type": "double", "nullable": False}, ] client.schema.update_table( - client.schema.normalize_table_identifiers(new_table(class_name, columns=table_create)) + normalize_table_identifiers( + new_table(class_name, columns=table_create), client.schema.naming + ) ) client.schema.bump_version() with pytest.raises(PropertyNameConflict): @@ -126,7 +128,9 @@ def test_case_insensitive_properties_create(ci_client: WeaviateClient) -> None: {"name": "coL1", "data_type": "double", "nullable": False}, ] ci_client.schema.update_table( - ci_client.schema.normalize_table_identifiers(new_table(class_name, columns=table_create)) + normalize_table_identifiers( + new_table(class_name, columns=table_create), ci_client.schema.naming + ) ) ci_client.schema.bump_version() ci_client.update_stored_schema() @@ -143,13 +147,17 @@ def test_case_sensitive_properties_add(client: WeaviateClient) -> None: {"name": "coL1", "data_type": "double", "nullable": False}, ] client.schema.update_table( - client.schema.normalize_table_identifiers(new_table(class_name, columns=table_create)) + normalize_table_identifiers( + new_table(class_name, columns=table_create), client.schema.naming + ) ) client.schema.bump_version() client.update_stored_schema() client.schema.update_table( - client.schema.normalize_table_identifiers(new_table(class_name, columns=table_update)) + normalize_table_identifiers( + new_table(class_name, columns=table_update), client.schema.naming + ) ) client.schema.bump_version() with pytest.raises(PropertyNameConflict): diff --git a/tests/pipeline/test_pipeline.py b/tests/pipeline/test_pipeline.py index 0cebeb2ff7..acc4c56a5c 100644 --- a/tests/pipeline/test_pipeline.py +++ b/tests/pipeline/test_pipeline.py @@ -46,6 +46,7 @@ from tests.utils import TEST_STORAGE_ROOT from tests.extract.utils import expect_extracted_file from tests.pipeline.utils import ( + assert_data_table_counts, assert_load_info, airtable_emojis, load_data_table_counts, @@ -1279,21 +1280,27 @@ def autodetect(): name="numbers", ) + source = autodetect() pipeline = dlt.pipeline(destination="duckdb") - pipeline.run(autodetect()) + pipeline.run(source) # unix ts recognized assert ( pipeline.default_schema.get_table("numbers")["columns"]["value"]["data_type"] == "timestamp" ) + assert "timestamp" in source.schema.settings["detections"] + assert "timestamp" in pipeline.default_schema.settings["detections"] pipeline = pipeline.drop() source = autodetect() + assert "timestamp" in source.schema.settings["detections"] source.schema.remove_type_detection("timestamp") + assert "timestamp" not in source.schema.settings["detections"] pipeline = dlt.pipeline(destination="duckdb") pipeline.run(source) + assert "timestamp" not in pipeline.default_schema.settings["detections"] assert pipeline.default_schema.get_table("numbers")["columns"]["value"]["data_type"] == "bigint" @@ -1715,7 +1722,7 @@ def source(): assert len(load_info.loads_ids) == 1 -def test_pipeline_load_info_metrics_schema_is_not_chaning() -> None: +def test_pipeline_load_info_metrics_schema_is_not_changing() -> None: """Test if load info schema is idempotent throughout multiple load cycles ## Setup @@ -1771,7 +1778,6 @@ def demand_map(): pipeline_name="quick_start", destination="duckdb", dataset_name="mydata", - # export_schema_path="schemas", ) taxi_load_info = pipeline.run( @@ -1857,3 +1863,23 @@ def demand_map(): schema_hashset.add(pipeline.schemas["nice_load_info_schema"].version_hash) assert len(schema_hashset) == 1 + + +@pytest.mark.skip(reason="empty lists are removed in normalized. to be fixed") +def test_yielding_empty_list_creates_table() -> None: + pipeline = dlt.pipeline( + pipeline_name="empty_start", + destination="duckdb", + dataset_name="mydata", + ) + + # empty list should create empty table in the destination but with the required schema + extract_info = pipeline.extract( + [[]], table_name="empty", columns=[{"name": "id", "data_type": "bigint", "nullable": True}] + ) + print(extract_info) + normalize_info = pipeline.normalize() + assert normalize_info.row_counts["empty"] == 0 + load_info = pipeline.load() + assert_load_info(load_info) + assert_data_table_counts(pipeline, {"empty": 0}) From e50bfaaf55cb57f78dac7e1ffe45ba60227006d0 Mon Sep 17 00:00:00 2001 From: Marcin Rudolf Date: Mon, 18 Mar 2024 14:30:30 +0100 Subject: [PATCH 029/105] moves schema verification to destination utils --- dlt/common/destination/reference.py | 90 +++--------------------- dlt/common/destination/utils.py | 105 ++++++++++++++++++++++++++++ 2 files changed, 115 insertions(+), 80 deletions(-) create mode 100644 dlt/common/destination/utils.py diff --git a/dlt/common/destination/reference.py b/dlt/common/destination/reference.py index 5e698347e5..d9f104dcee 100644 --- a/dlt/common/destination/reference.py +++ b/dlt/common/destination/reference.py @@ -25,19 +25,15 @@ import inspect from dlt.common import logger +from dlt.common.destination.utils import verify_schema_capabilities from dlt.common.exceptions import ( - IdentifierTooLongException, InvalidDestinationReference, UnknownDestinationModule, ) from dlt.common.schema import Schema, TTableSchema, TSchemaTables -from dlt.common.schema.exceptions import SchemaException from dlt.common.schema.utils import ( get_write_disposition, get_table_format, - get_columns_names_with_prop, - has_column_with_prop, - get_first_column_name_with_prop, ) from dlt.common.configuration import configspec, resolve_configuration, known_sections from dlt.common.configuration.specs import BaseConfiguration, CredentialsConfiguration @@ -334,79 +330,13 @@ def __exit__( pass def _verify_schema(self) -> None: - """Verifies and cleans up a schema before loading - - * Checks all table and column name lengths against destination capabilities and raises on too long identifiers - * Removes and warns on (unbound) incomplete columns - """ - - for table in self.schema.data_tables(): - table_name = table["name"] - if len(table_name) > self.capabilities.max_identifier_length: - raise IdentifierTooLongException( - self.config.destination_type, - "table", - table_name, - self.capabilities.max_identifier_length, - ) - if has_column_with_prop(table, "hard_delete"): - if len(get_columns_names_with_prop(table, "hard_delete")) > 1: - raise SchemaException( - f'Found multiple "hard_delete" column hints for table "{table_name}" in' - f' schema "{self.schema.name}" while only one is allowed:' - f' {", ".join(get_columns_names_with_prop(table, "hard_delete"))}.' - ) - if table.get("write_disposition") in ("replace", "append"): - logger.warning( - f"""The "hard_delete" column hint for column "{get_first_column_name_with_prop(table, 'hard_delete')}" """ - f'in table "{table_name}" with write disposition' - f' "{table.get("write_disposition")}"' - f' in schema "{self.schema.name}" will be ignored.' - ' The "hard_delete" column hint is only applied when using' - ' the "merge" write disposition.' - ) - if has_column_with_prop(table, "dedup_sort"): - if len(get_columns_names_with_prop(table, "dedup_sort")) > 1: - raise SchemaException( - f'Found multiple "dedup_sort" column hints for table "{table_name}" in' - f' schema "{self.schema.name}" while only one is allowed:' - f' {", ".join(get_columns_names_with_prop(table, "dedup_sort"))}.' - ) - if table.get("write_disposition") in ("replace", "append"): - logger.warning( - f"""The "dedup_sort" column hint for column "{get_first_column_name_with_prop(table, 'dedup_sort')}" """ - f'in table "{table_name}" with write disposition' - f' "{table.get("write_disposition")}"' - f' in schema "{self.schema.name}" will be ignored.' - ' The "dedup_sort" column hint is only applied when using' - ' the "merge" write disposition.' - ) - if table.get("write_disposition") == "merge" and not has_column_with_prop( - table, "primary_key" - ): - logger.warning( - f"""The "dedup_sort" column hint for column "{get_first_column_name_with_prop(table, 'dedup_sort')}" """ - f'in table "{table_name}" with write disposition' - f' "{table.get("write_disposition")}"' - f' in schema "{self.schema.name}" will be ignored.' - ' The "dedup_sort" column hint is only applied when a' - " primary key has been specified." - ) - for column_name, column in dict(table["columns"]).items(): - if len(column_name) > self.capabilities.max_column_identifier_length: - raise IdentifierTooLongException( - self.config.destination_type, - "column", - f"{table_name}.{column_name}", - self.capabilities.max_column_identifier_length, - ) - if not is_complete_column(column): - logger.warning( - f"A column {column_name} in table {table_name} in schema" - f" {self.schema.name} is incomplete. It was not bound to the data during" - " normalizations stage and its data type is unknown. Did you add this" - " column manually in code ie. as a merge key?" - ) + """Verifies schema before loading""" + if exceptions := verify_schema_capabilities( + self.schema, self.capabilities, self.config.destination_type, warnings=False + ): + for exception in exceptions: + logger.error(str(exception)) + raise exceptions[0] def prepare_load_table( self, table_name: str, prepare_for_staging: bool = False @@ -421,7 +351,7 @@ def prepare_load_table( table["table_format"] = get_table_format(self.schema.tables, table_name) return table except KeyError: - raise UnknownTableException(table_name) + raise UnknownTableException(self.schema.name, table_name) class WithStateSync(ABC): @@ -539,7 +469,7 @@ def to_name(ref: TDestinationReferenceArg) -> str: @staticmethod def normalize_type(destination_type: str) -> str: - """Normalizes destination type string into a canonical form. Assumes that type names without dots correspond to build in destinations.""" + """Normalizes destination type string into a canonical form. Assumes that type names without dots correspond to built in destinations.""" if "." not in destination_type: destination_type = "dlt.destinations." + destination_type # the next two lines shorten the dlt internal destination paths to dlt.destinations. diff --git a/dlt/common/destination/utils.py b/dlt/common/destination/utils.py new file mode 100644 index 0000000000..f79a2c9bf0 --- /dev/null +++ b/dlt/common/destination/utils.py @@ -0,0 +1,105 @@ +from typing import List + +from dlt.common import logger +from dlt.common.exceptions import IdentifierTooLongException +from dlt.common.schema import Schema +from dlt.common.schema.exceptions import SchemaCorruptedException +from dlt.common.schema.typing import TTableSchema +from dlt.common.schema.utils import ( + is_complete_column, +) +from dlt.common.typing import DictStrStr + +from .capabilities import DestinationCapabilitiesContext + + +def verify_schema_capabilities( + schema: Schema, + capabilities: DestinationCapabilitiesContext, + destination_type: str, + warnings: bool = True, +) -> List[Exception]: + """Verifies schema tables before loading against capabilities. Returns a list of exceptions representing critical problems with the schema. + It will log warnings by default. It is up to the caller to eventually raise exception + + * Checks all table and column name lengths against destination capabilities and raises on too long identifiers + * Checks if schema has clashes due to case sensitivity of the identifiers + """ + + log = logger.warning if warnings else logger.info + # collect all exceptions to show all problems in the schema + exception_log: List[Exception] = [] + # combined casing function + case_identifier = lambda ident: capabilities.casefold_identifier( + (str if capabilities.has_case_sensitive_identifiers else str.casefold)(ident) # type: ignore + ) + table_name_lookup: DictStrStr = {} + # name clash explanation + clash_msg = "Destination is case " + ( + "sensitive" if capabilities.has_case_sensitive_identifiers else "insensitive" + ) + if capabilities.casefold_identifier is not str: + clash_msg += ( + f" but it uses {capabilities.casefold_identifier} to generate case insensitive" + " identifiers. You may try to change the destination capabilities by changing the" + " `casefold_identifier` to `str`" + ) + + # check for any table clashes + for table in schema.data_tables(): + table_name = table["name"] + # detect table name conflict + cased_table_name = case_identifier(table_name) + if cased_table_name in table_name_lookup: + conflict_table_name = table_name_lookup[cased_table_name] + exception_log.append( + SchemaCorruptedException( + schema.name, + f"A table name {table_name} clashes with {conflict_table_name} after" + f" normalization to {cased_table_name}. " + + clash_msg, + ) + ) + table_name_lookup[cased_table_name] = table_name + if len(table_name) > capabilities.max_identifier_length: + exception_log.append( + IdentifierTooLongException( + destination_type, + "table", + table_name, + capabilities.max_identifier_length, + ) + ) + + column_name_lookup: DictStrStr = {} + for column_name, column in dict(table["columns"]).items(): + # detect table name conflict + cased_column_name = case_identifier(column_name) + if cased_column_name in column_name_lookup: + conflict_column_name = column_name_lookup[cased_column_name] + exception_log.append( + SchemaCorruptedException( + schema.name, + f"A column name {column_name} in table {table_name} clashes with" + f" {conflict_column_name} after normalization to {cased_column_name}. " + + clash_msg, + ) + ) + column_name_lookup[cased_column_name] = column_name + if len(column_name) > capabilities.max_column_identifier_length: + exception_log.append( + IdentifierTooLongException( + destination_type, + "column", + f"{table_name}.{column_name}", + capabilities.max_column_identifier_length, + ) + ) + if not is_complete_column(column): + log( + f"A column {column_name} in table {table_name} in schema" + f" {schema.name} is incomplete. It was not bound to the data during" + " normalizations stage and its data type is unknown. Did you add this" + " column manually in code ie. as a merge key?" + ) + return exception_log From 42d149f943eb237932f403f5f8020f6b92e1acd6 Mon Sep 17 00:00:00 2001 From: Marcin Rudolf Date: Mon, 18 Mar 2024 14:34:46 +0100 Subject: [PATCH 030/105] adds method to remove processing hints from schema, helper functions for schema settings, refactor, tests --- dlt/common/normalizers/json/relational.py | 14 +- dlt/common/schema/exceptions.py | 100 +++-- dlt/common/schema/migrations.py | 2 +- dlt/common/schema/schema.py | 396 ++++++++++++------ dlt/common/schema/typing.py | 14 +- dlt/common/schema/utils.py | 116 +++-- dlt/common/utils.py | 11 +- dlt/extract/extractors.py | 11 +- dlt/extract/items.py | 4 + dlt/extract/source.py | 5 +- dlt/normalize/items_normalizers.py | 3 +- dlt/normalize/normalize.py | 2 +- dlt/pipeline/pipeline.py | 6 +- .../website/docs/general-usage/destination.md | 13 +- docs/website/docs/general-usage/schema.md | 103 ++--- .../normalizers/test_json_relational.py | 2 +- tests/common/schema/conftest.py | 25 ++ tests/common/schema/test_filtering.py | 5 - tests/common/schema/test_inference.py | 6 - tests/common/schema/test_merges.py | 53 ++- .../schema/test_normalize_identifiers.py | 338 +++++++++++++++ tests/common/schema/test_schema.py | 271 +++++------- tests/common/schema/test_versioning.py | 1 - 23 files changed, 1046 insertions(+), 455 deletions(-) create mode 100644 tests/common/schema/conftest.py create mode 100644 tests/common/schema/test_normalize_identifiers.py diff --git a/dlt/common/normalizers/json/relational.py b/dlt/common/normalizers/json/relational.py index 2297e6404f..86bce509fc 100644 --- a/dlt/common/normalizers/json/relational.py +++ b/dlt/common/normalizers/json/relational.py @@ -265,18 +265,17 @@ def _normalize_row( ) def extend_schema(self) -> None: - # validate config + """Extends Schema with normalizer-specific hints and settings. + + This method is called by Schema when instance is created or restored from storage. + """ config = cast( RelationalNormalizerConfig, self.schema._normalizers_config["json"].get("config") or {} ) DataItemNormalizer._validate_normalizer_config(self.schema, config) - # quick check to see if hints are applied - default_hints = self.schema.settings.get("default_hints") or {} - if "not_null" in default_hints and self.c_dlt_id in default_hints["not_null"]: - return - # add hints - self.schema.merge_hints( + # add hints, do not compile. + self.schema._merge_hints( { "not_null": [ TSimpleRegex(self.c_dlt_id), @@ -298,6 +297,7 @@ def extend_schema(self) -> None: def extend_table(self, table_name: str) -> None: """If the table has a merge write disposition, add propagation info to normalizer + Called by Schema when new table is added to schema or table is updated with partial table. Table name should be normalized. """ table = self.schema.tables.get(table_name) diff --git a/dlt/common/schema/exceptions.py b/dlt/common/schema/exceptions.py index 7f73bcbf36..40122ed548 100644 --- a/dlt/common/schema/exceptions.py +++ b/dlt/common/schema/exceptions.py @@ -7,37 +7,45 @@ TSchemaContractEntities, TSchemaEvolutionMode, ) +from dlt.common.normalizers.naming import NamingConvention class SchemaException(DltException): - pass + def __init__(self, schema_name: str, msg: str) -> None: + self.schema_name = schema_name + if schema_name: + msg = f"In schema: {schema_name}: " + msg + super().__init__(msg) class InvalidSchemaName(ValueError, SchemaException): MAXIMUM_SCHEMA_NAME_LENGTH = 64 - def __init__(self, name: str) -> None: - self.name = name + def __init__(self, schema_name: str) -> None: + self.name = schema_name super().__init__( - f"{name} is an invalid schema/source name. The source or schema name must be a valid" - " Python identifier ie. a snake case function name and have maximum" + schema_name, + f"{schema_name} is an invalid schema/source name. The source or schema name must be a" + " valid Python identifier ie. a snake case function name and have maximum" f" {self.MAXIMUM_SCHEMA_NAME_LENGTH} characters. Ideally should contain only small" - " letters, numbers and underscores." + " letters, numbers and underscores.", ) -class InvalidDatasetName(ValueError, SchemaException): - def __init__(self, destination_name: str) -> None: - self.destination_name = destination_name - super().__init__( - f"Destination {destination_name} does not accept empty datasets. Please pass the" - " dataset name to the destination configuration ie. via dlt pipeline." - ) +# TODO: does not look like a SchemaException +# class InvalidDatasetName(ValueError, SchemaException): +# def __init__(self, destination_name: str) -> None: +# self.destination_name = destination_name +# super().__init__( +# f"Destination {destination_name} does not accept empty datasets. Please pass the" +# " dataset name to the destination configuration ie. via dlt pipeline." +# ) class CannotCoerceColumnException(SchemaException): def __init__( self, + schema_name: str, table_name: str, column_name: str, from_type: TDataType, @@ -50,37 +58,43 @@ def __init__( self.to_type = to_type self.coerced_value = coerced_value super().__init__( + schema_name, f"Cannot coerce type in table {table_name} column {column_name} existing type" - f" {from_type} coerced type {to_type} value: {coerced_value}" + f" {from_type} coerced type {to_type} value: {coerced_value}", ) class TablePropertiesConflictException(SchemaException): - def __init__(self, table_name: str, prop_name: str, val1: str, val2: str): + def __init__(self, schema_name: str, table_name: str, prop_name: str, val1: str, val2: str): self.table_name = table_name self.prop_name = prop_name self.val1 = val1 self.val2 = val2 super().__init__( + schema_name, f"Cannot merge partial tables for {table_name} due to property {prop_name}: {val1} !=" - f" {val2}" + f" {val2}", ) class ParentTableNotFoundException(SchemaException): - def __init__(self, table_name: str, parent_table_name: str, explanation: str = "") -> None: + def __init__( + self, schema_name: str, table_name: str, parent_table_name: str, explanation: str = "" + ) -> None: self.table_name = table_name self.parent_table_name = parent_table_name super().__init__( + schema_name, f"Parent table {parent_table_name} for {table_name} was not found in the" - f" schema.{explanation}" + f" schema.{explanation}", ) class CannotCoerceNullException(SchemaException): - def __init__(self, table_name: str, column_name: str) -> None: + def __init__(self, schema_name: str, table_name: str, column_name: str) -> None: super().__init__( - f"Cannot coerce NULL in table {table_name} column {column_name} which is not nullable" + schema_name, + f"Cannot coerce NULL in table {table_name} column {column_name} which is not nullable", ) @@ -92,13 +106,13 @@ class SchemaEngineNoUpgradePathException(SchemaException): def __init__( self, schema_name: str, init_engine: int, from_engine: int, to_engine: int ) -> None: - self.schema_name = schema_name self.init_engine = init_engine self.from_engine = from_engine self.to_engine = to_engine super().__init__( + schema_name, f"No engine upgrade path in schema {schema_name} from {init_engine} to {to_engine}," - f" stopped at {from_engine}" + f" stopped at {from_engine}", ) @@ -131,8 +145,7 @@ def __init__( + f" . Contract on {schema_entity} with mode {contract_mode} is violated. " + (extended_info or "") ) - super().__init__(msg) - self.schema_name = schema_name + super().__init__(schema_name, msg) self.table_name = table_name self.column_name = column_name @@ -146,7 +159,40 @@ def __init__( self.data_item = data_item -class UnknownTableException(SchemaException): - def __init__(self, table_name: str) -> None: +class UnknownTableException(KeyError, SchemaException): + def __init__(self, schema_name: str, table_name: str) -> None: self.table_name = table_name - super().__init__(f"Trying to access unknown table {table_name}.") + super().__init__(schema_name, f"Trying to access unknown table {table_name}.") + + +class TableIdentifiersFrozen(SchemaException): + def __init__( + self, + schema_name: str, + table_name: str, + to_naming: NamingConvention, + from_naming: NamingConvention, + details: str, + ) -> None: + self.table_name = table_name + self.to_naming = to_naming + self.from_naming = from_naming + msg = ( + f"Attempt to normalize identifiers for a table {table_name} from naming" + f" {str(type(from_naming))} to {str(type(to_naming))} changed one or more identifiers. " + ) + msg += ( + " This table already received data and tables were created at the destination. By" + " default changing the identifiers is not allowed. " + ) + msg += ( + " Such changes may result in creation of a new table or a new columns while the old" + " columns with data will still be kept. " + ) + msg += ( + " You may disable this behavior by setting" + " schema.allow_identifier_change_on_table_with_data to True or removing `x-normalizer`" + " hints from particular tables. " + ) + msg += f" Details: {details}" + super().__init__(schema_name, msg) diff --git a/dlt/common/schema/migrations.py b/dlt/common/schema/migrations.py index 9b206d61a6..1b644f2514 100644 --- a/dlt/common/schema/migrations.py +++ b/dlt/common/schema/migrations.py @@ -1,7 +1,7 @@ from typing import Dict, List, cast from dlt.common.data_types import TDataType -from dlt.common.normalizers import explicit_normalizers +from dlt.common.normalizers.utils import explicit_normalizers from dlt.common.typing import DictStrAny from dlt.common.schema.typing import ( LOADS_TABLE_NAME, diff --git a/dlt/common/schema/schema.py b/dlt/common/schema/schema.py index 4a5040dbe3..cda60ed985 100644 --- a/dlt/common/schema/schema.py +++ b/dlt/common/schema/schema.py @@ -1,10 +1,21 @@ import yaml from copy import copy, deepcopy -from typing import ClassVar, Dict, List, Mapping, Optional, Sequence, Tuple, Any, cast, Literal +from typing import ( + Callable, + ClassVar, + Dict, + List, + Mapping, + Optional, + Sequence, + Tuple, + Any, + cast, +) from dlt.common import json from dlt.common.schema.migrations import migrate_schema -from dlt.common.utils import extend_list_deduplicated +from dlt.common.utils import extend_list_deduplicated, get_full_class_name from dlt.common.typing import ( DictStrAny, StrAny, @@ -13,8 +24,8 @@ VARIANT_FIELD_FORMAT, TDataItem, ) -from dlt.common.normalizers import TNormalizersConfig, explicit_normalizers, import_normalizers -from dlt.common.normalizers.naming import NamingConvention +from dlt.common.normalizers import TNormalizersConfig, NamingConvention +from dlt.common.normalizers.utils import explicit_normalizers, import_normalizers from dlt.common.normalizers.json import DataItemNormalizer, TNormalizedRowIterator from dlt.common.schema import utils from dlt.common.data_types import py_type_to_sc_type, coerce_value, TDataType @@ -47,6 +58,7 @@ InvalidSchemaName, ParentTableNotFoundException, SchemaCorruptedException, + TableIdentifiersFrozen, ) from dlt.common.validation import validate_dict from dlt.common.schema.exceptions import DataValidationError @@ -102,13 +114,18 @@ def __init__(self, name: str, normalizers: TNormalizersConfig = None) -> None: self._reset_schema(name, normalizers) @classmethod - def from_dict(cls, d: DictStrAny, bump_version: bool = True) -> "Schema": + def from_dict( + cls, d: DictStrAny, remove_processing_hints: bool = False, bump_version: bool = True + ) -> "Schema": # upgrade engine if needed stored_schema = migrate_schema(d, d["engine_version"], cls.ENGINE_VERSION) # verify schema utils.validate_stored_schema(stored_schema) # add defaults stored_schema = utils.apply_defaults(stored_schema) + # remove processing hints that could be created by normalize and load steps + if remove_processing_hints: + utils.remove_processing_hints(stored_schema["tables"]) # bump version if modified if bump_version: @@ -143,30 +160,6 @@ def replace_schema_content( self._reset_schema(schema.name, schema._normalizers_config) self._from_stored_schema(stored_schema) - def to_dict(self, remove_defaults: bool = False, bump_version: bool = True) -> TStoredSchema: - stored_schema: TStoredSchema = { - "version": self._stored_version, - "version_hash": self._stored_version_hash, - "engine_version": Schema.ENGINE_VERSION, - "name": self._schema_name, - "tables": self._schema_tables, - "settings": self._settings, - "normalizers": self._normalizers_config, - "previous_hashes": self._stored_previous_hashes, - } - if self._imported_version_hash and not remove_defaults: - stored_schema["imported_version_hash"] = self._imported_version_hash - if self._schema_description: - stored_schema["description"] = self._schema_description - - # bump version if modified - if bump_version: - utils.bump_version_if_modified(stored_schema) - # remove defaults after bumping version - if remove_defaults: - utils.remove_defaults(stored_schema) - return stored_schema - def normalize_data_item( self, item: TDataItem, load_id: str, table_name: str ) -> TNormalizedRowIterator: @@ -319,7 +312,7 @@ def apply_schema_contract( column_mode, data_mode = schema_contract["columns"], schema_contract["data_type"] # allow to add new columns when table is new or if columns are allowed to evolve once - if is_new_table or existing_table.get("x-normalizer", {}).get("evolve-columns-once", False): # type: ignore[attr-defined] + if is_new_table or existing_table.get("x-normalizer", {}).get("evolve-columns-once", False): column_mode = "evolve" # check if we should filter any columns, partial table below contains only new columns @@ -409,7 +402,7 @@ def update_table( ) -> TPartialTableSchema: """Adds or merges `partial_table` into the schema. Identifiers are normalized by default""" if normalize_identifiers: - partial_table = self.normalize_table_identifiers(partial_table) + partial_table = utils.normalize_table_identifiers(partial_table, self.naming) table_name = partial_table["name"] parent_table_name = partial_table.get("parent") @@ -417,6 +410,7 @@ def update_table( if parent_table_name is not None: if self._schema_tables.get(parent_table_name) is None: raise ParentTableNotFoundException( + self.name, table_name, parent_table_name, " This may be due to misconfigured excludes filter that fully deletes content" @@ -429,7 +423,7 @@ def update_table( self._schema_tables[table_name] = partial_table else: # merge tables performing additional checks - partial_table = utils.merge_tables(table, partial_table) + partial_table = utils.merge_tables(self.name, table, partial_table) self.data_item_normalizer.extend_table(table_name) return partial_table @@ -480,68 +474,55 @@ def merge_hints( new_hints: Mapping[TColumnHint, Sequence[TSimpleRegex]], normalize_identifiers: bool = True, ) -> None: - """Merges existing default hints with `new_hint`. Normalizes names in column regexes if possible""" - if normalize_identifiers: - new_hints = self._normalize_default_hints(new_hints) - # validate regexes - validate_dict( - TSchemaSettings, - {"default_hints": new_hints}, - ".", - validator_f=utils.simple_regex_validator, - ) - # prepare hints to be added - default_hints = self._settings.setdefault("default_hints", {}) - # add `new_hints` to existing hints - for h, l in new_hints.items(): - if h in default_hints: - extend_list_deduplicated(default_hints[h], l) - else: - # set new hint type - default_hints[h] = l # type: ignore - self._compile_settings() + """Merges existing default hints with `new_hints`. Normalizes names in column regexes if possible. Compiles setting at the end - def normalize_table_identifiers(self, table: TTableSchema) -> TTableSchema: - """Normalizes all table and column names in `table` schema according to current schema naming convention and returns - new normalized TTableSchema instance. + NOTE: you can manipulate default hints collection directly via `Schema.settings` as long as you call Schema._compile_settings() at the end. + """ + self._merge_hints(new_hints, normalize_identifiers) + self._compile_settings() - Naming convention like snake_case may produce name clashes with the column names. Clashing column schemas are merged - where the column that is defined later in the dictionary overrides earlier column. + def update_preferred_types( + self, + new_preferred_types: Mapping[TSimpleRegex, TDataType], + normalize_identifiers: bool = True, + ) -> None: + """Updates preferred types dictionary with `new_preferred_types`. Normalizes names in column regexes if possible. Compiles setting at the end - Note that resource name is not normalized. + NOTE: you can manipulate preferred hints collection directly via `Schema.settings` as long as you call Schema._compile_settings() at the end. """ - # normalize all identifiers in table according to name normalizer of the schema - table["name"] = self.naming.normalize_tables_path(table["name"]) - parent = table.get("parent") - if parent: - table["parent"] = self.naming.normalize_tables_path(parent) - columns = table.get("columns") - if columns: - new_columns: TTableSchemaColumns = {} - for c in columns.values(): - new_col_name = c["name"] = self.naming.normalize_path(c["name"]) - # re-index columns as the name changed, if name space was reduced then - # some columns now clash with each other. so make sure that we merge columns that are already there - if new_col_name in new_columns: - new_columns[new_col_name] = utils.merge_columns( - new_columns[new_col_name], c, merge_defaults=False - ) - else: - new_columns[new_col_name] = c - table["columns"] = new_columns - return table + self._update_preferred_types(new_preferred_types, normalize_identifiers) + self._compile_settings() + + def add_type_detection(self, detection: TTypeDetections) -> None: + """Add type auto detection to the schema.""" + if detection not in self.settings["detections"]: + self.settings["detections"].append(detection) + self._compile_settings() + + def remove_type_detection(self, detection: TTypeDetections) -> None: + """Adds type auto detection to the schema.""" + if detection in self.settings["detections"]: + self.settings["detections"].remove(detection) + self._compile_settings() def get_new_table_columns( self, table_name: str, - exiting_columns: TTableSchemaColumns, + existing_columns: TTableSchemaColumns, + case_sensitive: bool = True, include_incomplete: bool = False, ) -> List[TColumnSchema]: - """Gets new columns to be added to `exiting_columns` to bring them up to date with `table_name` schema. Optionally includes incomplete columns (without data type)""" + """Gets new columns to be added to `existing_columns` to bring them up to date with `table_name` schema. + Columns names are compared case sensitive by default. + Optionally includes incomplete columns (without data type)""" + casefold_f: Callable[[str], str] = str.casefold if not case_sensitive else str # type: ignore[assignment] + casefold_existing = { + casefold_f(col_name): col for col_name, col in existing_columns.items() + } diff_c: List[TColumnSchema] = [] s_t = self.get_table_columns(table_name, include_incomplete=include_incomplete) for c in s_t.values(): - if c["name"] not in exiting_columns: + if casefold_f(c["name"]) not in casefold_existing: diff_c.append(c) return diff_c @@ -646,20 +627,75 @@ def tables(self) -> TSchemaTables: def settings(self) -> TSchemaSettings: return self._settings - def to_pretty_json(self, remove_defaults: bool = True) -> str: - d = self.to_dict(remove_defaults=remove_defaults) + def to_dict( + self, + remove_defaults: bool = False, + remove_processing_hints: bool = False, + bump_version: bool = True, + ) -> TStoredSchema: + # prepare normalizers + if isinstance(self._normalizers_config["names"], NamingConvention): + normalizers_config = deepcopy(self._normalizers_config) + normalizers_config["names"] = get_full_class_name(normalizers_config["names"]) + else: + normalizers_config = self._normalizers_config + + stored_schema: TStoredSchema = { + "version": self._stored_version, + "version_hash": self._stored_version_hash, + "engine_version": Schema.ENGINE_VERSION, + "name": self._schema_name, + "tables": self._schema_tables, + "settings": self._settings, + "normalizers": normalizers_config, + "previous_hashes": self._stored_previous_hashes, + } + if self._imported_version_hash and not remove_defaults: + stored_schema["imported_version_hash"] = self._imported_version_hash + if self._schema_description: + stored_schema["description"] = self._schema_description + + # remove processing hints that could be created by normalize and load steps + if remove_processing_hints: + stored_schema["tables"] = utils.remove_processing_hints( + deepcopy(stored_schema["tables"]) + ) + + # bump version if modified + if bump_version: + utils.bump_version_if_modified(stored_schema) + # remove defaults after bumping version + if remove_defaults: + utils.remove_defaults(stored_schema) + return stored_schema + + def to_pretty_json( + self, remove_defaults: bool = True, remove_processing_hints: bool = False + ) -> str: + d = self.to_dict( + remove_defaults=remove_defaults, remove_processing_hints=remove_processing_hints + ) return json.dumps(d, pretty=True) - def to_pretty_yaml(self, remove_defaults: bool = True) -> str: - d = self.to_dict(remove_defaults=remove_defaults) + def to_pretty_yaml( + self, remove_defaults: bool = True, remove_processing_hints: bool = False + ) -> str: + d = self.to_dict( + remove_defaults=remove_defaults, remove_processing_hints=remove_processing_hints + ) return yaml.dump(d, allow_unicode=True, default_flow_style=False, sort_keys=False) - def clone(self, with_name: str = None, update_normalizers: bool = False) -> "Schema": - """Make a deep copy of the schema, optionally changing the name, and updating normalizers and identifiers in the schema if `update_normalizers` is True - + def clone( + self, + with_name: str = None, + remove_processing_hints: bool = False, + update_normalizers: bool = False, + ) -> "Schema": + """Make a deep copy of the schema, optionally changing the name, removing processing markers and updating normalizers and identifiers in the schema if `update_normalizers` is True + Processing markers are `x-` hints created by normalizer (`x-normalizer`) and loader (`x-loader`) to ie. mark newly inferred tables and tables that seen data. Note that changing of name will break the previous version chain """ - d = deepcopy(self.to_dict()) + d = deepcopy(self.to_dict(remove_processing_hints=remove_processing_hints)) if with_name is not None: d["name"] = with_name d["previous_hashes"] = [] @@ -670,7 +706,13 @@ def clone(self, with_name: str = None, update_normalizers: bool = False) -> "Sch return schema def update_normalizers(self) -> None: - """Looks for new normalizer configuration or for destination capabilities context and updates all identifiers in the schema""" + """Looks for new normalizer configuration or for destination capabilities context and updates all identifiers in the schema + + Table and column names will be normalized with new naming convention, except tables that have seen data ('x-normalizer`) which will + raise if any identifier is to be changed. + Default hints, preferred data types and normalize configs (ie. column propagation) are normalized as well. Regexes are included as long + as textual parts can be extracted from an expression. + """ normalizers = explicit_normalizers() # set the current values as defaults normalizers["names"] = normalizers["names"] or self._normalizers_config["names"] @@ -684,18 +726,6 @@ def set_schema_contract(self, settings: TSchemaContract) -> None: else: self._settings["schema_contract"] = settings - def add_type_detection(self, detection: TTypeDetections) -> None: - """Add type auto detection to the schema.""" - if detection not in self.settings["detections"]: - self.settings["detections"].append(detection) - self._compile_settings() - - def remove_type_detection(self, detection: TTypeDetections) -> None: - """Adds type auto detection to the schema.""" - if detection in self.settings["detections"]: - self.settings["detections"].remove(detection) - self._compile_settings() - def _infer_column( self, k: str, v: Any, data_type: TDataType = None, is_variant: bool = False ) -> TColumnSchema: @@ -721,7 +751,7 @@ def _coerce_null_value( if col_name in table_columns: existing_column = table_columns[col_name] if not existing_column.get("nullable", True): - raise CannotCoerceNullException(table_name, col_name) + raise CannotCoerceNullException(self.name, table_name, col_name) def _coerce_non_null_value( self, @@ -753,7 +783,12 @@ def _coerce_non_null_value( if is_variant: # this is final call: we cannot generate any more auto-variants raise CannotCoerceColumnException( - table_name, col_name, py_type, table_columns[col_name]["data_type"], v + self.name, + table_name, + col_name, + py_type, + table_columns[col_name]["data_type"], + v, ) # otherwise we must create variant extension to the table # pass final=True so no more auto-variants can be created recursively @@ -810,18 +845,69 @@ def _infer_hint(self, hint_type: TColumnHint, _: Any, col_name: str) -> bool: else: return False + def _merge_hints( + self, + new_hints: Mapping[TColumnHint, Sequence[TSimpleRegex]], + normalize_identifiers: bool = True, + ) -> None: + """Used by `merge_hints method, does not compile settings at the end""" + # validate regexes + validate_dict( + TSchemaSettings, + {"default_hints": new_hints}, + ".", + validator_f=utils.simple_regex_validator, + ) + if normalize_identifiers: + new_hints = self._normalize_default_hints(new_hints) + # prepare hints to be added + default_hints = self._settings.setdefault("default_hints", {}) + # add `new_hints` to existing hints + for h, l in new_hints.items(): + if h in default_hints: + extend_list_deduplicated(default_hints[h], l, utils.canonical_simple_regex) + else: + # set new hint type + default_hints[h] = l # type: ignore + + def _update_preferred_types( + self, + new_preferred_types: Mapping[TSimpleRegex, TDataType], + normalize_identifiers: bool = True, + ) -> None: + # validate regexes + validate_dict( + TSchemaSettings, + {"preferred_types": new_preferred_types}, + ".", + validator_f=utils.simple_regex_validator, + ) + if normalize_identifiers: + new_preferred_types = self._normalize_preferred_types(new_preferred_types) + preferred_types = self._settings.setdefault("preferred_types", {}) + # we must update using canonical simple regex + canonical_preferred = { + utils.canonical_simple_regex(rx): rx for rx in preferred_types.keys() + } + for new_rx, new_dt in new_preferred_types.items(): + canonical_new_rx = utils.canonical_simple_regex(new_rx) + if canonical_new_rx not in canonical_preferred: + preferred_types[new_rx] = new_dt + else: + preferred_types[canonical_preferred[canonical_new_rx]] = new_dt + def _add_standard_tables(self) -> None: - self._schema_tables[self.version_table_name] = self.normalize_table_identifiers( - utils.version_table() + self._schema_tables[self.version_table_name] = utils.normalize_table_identifiers( + utils.version_table(), self.naming ) - self._schema_tables[self.loads_table_name] = self.normalize_table_identifiers( - utils.load_table() + self._schema_tables[self.loads_table_name] = utils.normalize_table_identifiers( + utils.load_table(), self.naming ) def _add_standard_hints(self) -> None: default_hints = utils.default_hints() if default_hints: - self.merge_hints(default_hints) + self._merge_hints(default_hints, normalize_identifiers=False) type_detections = utils.standard_type_detections() if type_detections: self._settings["detections"] = type_detections @@ -836,7 +922,7 @@ def _normalize_default_hints( } def _normalize_preferred_types( - self, preferred_types: Dict[TSimpleRegex, TDataType] + self, preferred_types: Mapping[TSimpleRegex, TDataType] ) -> Dict[TSimpleRegex, TDataType]: """Normalizes the column names in preferred types mapping. In case of column names that are regexes, normalization is skipped""" return { @@ -844,31 +930,83 @@ def _normalize_preferred_types( for regex, data_type in preferred_types.items() } - def _configure_normalizers(self, normalizers: TNormalizersConfig) -> None: - # import desired modules - self._normalizers_config, naming_module, item_normalizer_class = import_normalizers( - normalizers - ) - # print(f"{self.name}: {type(self.naming)} {type(naming_module)}") - if self.naming and type(self.naming) is not type(naming_module): - self.naming = naming_module + def _verify_update_normalizers( + self, + normalizers_config: TNormalizersConfig, + to_naming: NamingConvention, + from_naming: NamingConvention, + ) -> TSchemaTables: + """Verifies if normalizers can be updated before schema is changed""" + # print(f"{self.name}: {type(to_naming)} {type(naming_module)}") + if from_naming and type(from_naming) is not type(to_naming): + schema_tables = {} for table in self._schema_tables.values(): - self.normalize_table_identifiers(table) + norm_table = utils.normalize_table_identifiers(table, to_naming) + if utils.has_table_seen_data(norm_table) and not normalizers_config.get( + "allow_identifier_change_on_table_with_data", False + ): + # make sure no identifier got changed in table + if norm_table["name"] != table["name"]: + raise TableIdentifiersFrozen( + self.name, + table["name"], + to_naming, + from_naming, + f"Attempt to rename table name to {norm_table['name']}.", + ) + if len(norm_table["columns"]) != len(table["columns"]): + raise TableIdentifiersFrozen( + self.name, + table["name"], + to_naming, + from_naming, + "Number of columns changed after normalization. Some columns must have" + " merged.", + ) + col_diff = set(norm_table["columns"].keys()).difference(table["columns"].keys()) + if len(col_diff) > 0: + raise TableIdentifiersFrozen( + self.name, + table["name"], + to_naming, + from_naming, + f"Some columns got renamed to {col_diff}.", + ) + schema_tables[norm_table["name"]] = norm_table # re-index the table names - self._schema_tables = {t["name"]: t for t in self._schema_tables.values()} + return schema_tables + else: + return self._schema_tables + def _renormalize_schema_identifiers( + self, + normalizers_config: TNormalizersConfig, + to_naming: NamingConvention, + from_naming: NamingConvention, + ) -> None: + """Normalizes all identifiers in the schema in place""" + self._schema_tables = self._verify_update_normalizers( + normalizers_config, to_naming, from_naming + ) + self._normalizers_config = normalizers_config + self.naming = to_naming # name normalization functions - self.naming = naming_module - self._dlt_tables_prefix = self.naming.normalize_table_identifier(DLT_NAME_PREFIX) - self.version_table_name = self.naming.normalize_table_identifier(VERSION_TABLE_NAME) - self.loads_table_name = self.naming.normalize_table_identifier(LOADS_TABLE_NAME) - self.state_table_name = self.naming.normalize_table_identifier(PIPELINE_STATE_TABLE_NAME) + self._dlt_tables_prefix = to_naming.normalize_table_identifier(DLT_NAME_PREFIX) + self.version_table_name = to_naming.normalize_table_identifier(VERSION_TABLE_NAME) + self.loads_table_name = to_naming.normalize_table_identifier(LOADS_TABLE_NAME) + self.state_table_name = to_naming.normalize_table_identifier(PIPELINE_STATE_TABLE_NAME) # normalize default hints if default_hints := self._settings.get("default_hints"): self._settings["default_hints"] = self._normalize_default_hints(default_hints) # normalized preferred types if preferred_types := self.settings.get("preferred_types"): self._settings["preferred_types"] = self._normalize_preferred_types(preferred_types) + + def _configure_normalizers(self, normalizers: TNormalizersConfig) -> None: + """Gets naming and item normalizer from schema yaml, config providers and destination capabilities and applies them to schema.""" + # import desired modules + normalizers_config, to_naming, item_normalizer_class = import_normalizers(normalizers) + self._renormalize_schema_identifiers(normalizers_config, to_naming, self.naming) # data item normalization function self.data_item_normalizer = item_normalizer_class(self) self.data_item_normalizer.extend_schema() @@ -911,9 +1049,13 @@ def _reset_schema(self, name: str, normalizers: TNormalizersConfig = None) -> No def _from_stored_schema(self, stored_schema: TStoredSchema) -> None: self._schema_tables = stored_schema.get("tables") or {} if self.version_table_name not in self._schema_tables: - raise SchemaCorruptedException(f"Schema must contain table {self.version_table_name}") + raise SchemaCorruptedException( + stored_schema["name"], f"Schema must contain table {self.version_table_name}" + ) if self.loads_table_name not in self._schema_tables: - raise SchemaCorruptedException(f"Schema must contain table {self.loads_table_name}") + raise SchemaCorruptedException( + stored_schema["name"], f"Schema must contain table {self.loads_table_name}" + ) self._stored_version = stored_schema["version"] self._stored_version_hash = stored_schema["version_hash"] self._imported_version_hash = stored_schema.get("imported_version_hash") diff --git a/dlt/common/schema/typing.py b/dlt/common/schema/typing.py index c2c71207e6..127f7defa3 100644 --- a/dlt/common/schema/typing.py +++ b/dlt/common/schema/typing.py @@ -158,10 +158,20 @@ class NormalizerInfo(TypedDict, total=True): new_table: bool -# TypedDict that defines properties of a table +# Part of Table containing processing hints added by pipeline stages +TTableProcessingHints = TypedDict( + "TTableProcessingHints", + { + "x-normalizer": Optional[Dict[str, Any]], + "x-loader": Optional[Dict[str, Any]], + "x-extractor": Optional[Dict[str, Any]], + }, + total=False, +) -class TTableSchema(TypedDict, total=False): +# TypedDict that defines properties of a table +class TTableSchema(TTableProcessingHints, total=False): """TypedDict that defines properties of a table""" name: Optional[str] diff --git a/dlt/common/schema/utils.py b/dlt/common/schema/utils.py index 369f17b3aa..ae295f15c8 100644 --- a/dlt/common/schema/utils.py +++ b/dlt/common/schema/utils.py @@ -5,7 +5,7 @@ from copy import deepcopy, copy from typing import Dict, List, Sequence, Tuple, Type, Any, cast, Iterable, Optional, Union -from dlt.common import json +from dlt.common import json, logger from dlt.common.data_types import TDataType from dlt.common.exceptions import DictValidationException from dlt.common.normalizers.naming import NamingConvention @@ -26,12 +26,14 @@ TSchemaUpdate, TSimpleRegex, TStoredSchema, + TTableProcessingHints, TTableSchema, TColumnSchemaBase, TColumnSchema, TColumnProp, TTableFormat, TColumnHint, + TTableSchemaColumns, TTypeDetectionFunc, TTypeDetections, TWriteDisposition, @@ -92,7 +94,8 @@ def apply_defaults(stored_schema: TStoredSchema) -> TStoredSchema: def remove_defaults(stored_schema: TStoredSchema) -> TStoredSchema: """Removes default values from `stored_schema` in place, returns the input for chaining - Default values are removed from table schemas and complete column schemas. Incomplete columns are preserved intact. + * removes column and able names from the value + * removed resource name if same as table name """ clean_tables = deepcopy(stored_schema["tables"]) for table_name, t in clean_tables.items(): @@ -133,22 +136,22 @@ def remove_column_defaults(column_schema: TColumnSchema) -> TColumnSchema: return column_schema -def add_column_defaults(column: TColumnSchemaBase) -> TColumnSchema: - """Adds default boolean hints to column""" - return { - **{ - "nullable": True, - "partition": False, - "cluster": False, - "unique": False, - "sort": False, - "primary_key": False, - "foreign_key": False, - "root_key": False, - "merge_key": False, - }, - **column, - } +# def add_column_defaults(column: TColumnSchemaBase) -> TColumnSchema: +# """Adds default boolean hints to column""" +# return { +# **{ +# "nullable": True, +# "partition": False, +# "cluster": False, +# "unique": False, +# "sort": False, +# "primary_key": False, +# "foreign_key": False, +# "root_key": False, +# "merge_key": False, +# }, +# **column, +# } # def add_complete_column_defaults(column: TColumnSchemaBase) -> TColumnSchema: @@ -241,6 +244,13 @@ def _normalize(r_: str) -> str: return cast(TSimpleRegex, _normalize(regex)) +def canonical_simple_regex(regex: str) -> TSimpleRegex: + if regex.startswith(SIMPLE_REGEX_PREFIX): + return cast(TSimpleRegex, regex) + else: + return cast(TSimpleRegex, SIMPLE_REGEX_PREFIX + "^" + regex + "$") + + def simple_regex_validator(path: str, pk: str, pv: Any, t: Any) -> bool: # custom validator on type TSimpleRegex if t is TSimpleRegex: @@ -335,7 +345,9 @@ def validate_stored_schema(stored_schema: TStoredSchema) -> None: parent_table_name = table.get("parent") if parent_table_name: if parent_table_name not in stored_schema["tables"]: - raise ParentTableNotFoundException(table_name, parent_table_name) + raise ParentTableNotFoundException( + stored_schema["name"], table_name, parent_table_name + ) def autodetect_sc_type(detection_fs: Sequence[TTypeDetections], t: Type[Any], v: Any) -> TDataType: @@ -375,7 +387,9 @@ def merge_columns( return col_a -def diff_tables(tab_a: TTableSchema, tab_b: TPartialTableSchema) -> TPartialTableSchema: +def diff_tables( + schema_name: str, tab_a: TTableSchema, tab_b: TPartialTableSchema +) -> TPartialTableSchema: """Creates a partial table that contains properties found in `tab_b` that are not present or different in `tab_a`. The name is always present in returned partial. It returns new columns (not present in tab_a) and merges columns from tab_b into tab_a (overriding non-default hint values). @@ -389,7 +403,7 @@ def diff_tables(tab_a: TTableSchema, tab_b: TPartialTableSchema) -> TPartialTabl # check if table properties can be merged if tab_a.get("parent") != tab_b.get("parent"): raise TablePropertiesConflictException( - table_name, "parent", tab_a.get("parent"), tab_b.get("parent") + schema_name, table_name, "parent", tab_a.get("parent"), tab_b.get("parent") ) # get new columns, changes in the column data type or other properties are not allowed @@ -403,6 +417,7 @@ def diff_tables(tab_a: TTableSchema, tab_b: TPartialTableSchema) -> TPartialTabl if not compare_complete_columns(tab_a_columns[col_b_name], col_b): # attempt to update to incompatible columns raise CannotCoerceColumnException( + schema_name, table_name, col_b_name, col_b["data_type"], @@ -431,7 +446,7 @@ def diff_tables(tab_a: TTableSchema, tab_b: TPartialTableSchema) -> TPartialTabl # this should not really happen if tab_a.get("parent") is not None and (resource := tab_b.get("resource")): raise TablePropertiesConflictException( - table_name, "resource", resource, tab_a.get("parent") + schema_name, table_name, "resource", resource, tab_a.get("parent") ) return partial_table @@ -449,7 +464,9 @@ def diff_tables(tab_a: TTableSchema, tab_b: TPartialTableSchema) -> TPartialTabl # return False -def merge_tables(table: TTableSchema, partial_table: TPartialTableSchema) -> TPartialTableSchema: +def merge_tables( + schema_name: str, table: TTableSchema, partial_table: TPartialTableSchema +) -> TPartialTableSchema: """Merges "partial_table" into "table". `table` is merged in place. Returns the diff partial table. `table` and `partial_table` names must be identical. A table diff is generated and applied to `table`: @@ -460,9 +477,9 @@ def merge_tables(table: TTableSchema, partial_table: TPartialTableSchema) -> TPa if table["name"] != partial_table["name"]: raise TablePropertiesConflictException( - table["name"], "name", table["name"], partial_table["name"] + schema_name, table["name"], "name", table["name"], partial_table["name"] ) - diff_table = diff_tables(table, partial_table) + diff_table = diff_tables(schema_name, table, partial_table) # add new columns when all checks passed table["columns"].update(diff_table["columns"]) updated_columns = table["columns"] @@ -472,9 +489,56 @@ def merge_tables(table: TTableSchema, partial_table: TPartialTableSchema) -> TPa return diff_table +def normalize_table_identifiers(table: TTableSchema, naming: NamingConvention) -> TTableSchema: + """Normalizes all table and column names in `table` schema according to current schema naming convention and returns + new instance with modified table schema. + + Naming convention like snake_case may produce name clashes with the column names. Clashing column schemas are merged + where the column that is defined later in the dictionary overrides earlier column. + + Note that resource name is not normalized. + """ + + table = copy(table) + table["name"] = naming.normalize_tables_path(table["name"]) + parent = table.get("parent") + if parent: + table["parent"] = naming.normalize_tables_path(parent) + columns = table.get("columns") + if columns: + new_columns: TTableSchemaColumns = {} + for c in columns.values(): + c = copy(c) + origin_c_name = c["name"] + new_col_name = c["name"] = naming.normalize_path(c["name"]) + # re-index columns as the name changed, if name space was reduced then + # some columns now clash with each other. so make sure that we merge columns that are already there + if new_col_name in new_columns: + new_columns[new_col_name] = merge_columns( + new_columns[new_col_name], c, merge_defaults=False + ) + logger.warning( + f"In schema {naming} column {origin_c_name} got normalized into" + f" {new_col_name} which clashes with other column. Both columns got merged" + " into one." + ) + else: + new_columns[new_col_name] = c + table["columns"] = new_columns + return table + + def has_table_seen_data(table: TTableSchema) -> bool: """Checks if normalizer has seen data coming to the table.""" - return "x-normalizer" in table and table["x-normalizer"].get("seen-data", None) is True # type: ignore[typeddict-item] + return "x-normalizer" in table and table["x-normalizer"].get("seen-data", None) is True + + +def remove_processing_hints(tables: TSchemaTables) -> TSchemaTables: + "Removes processing hints like x-normalizer and x-loader from schema tables. Modifies the input tables and returns it for convenience" + for table in tables.values(): + for hint in TTableProcessingHints.__annotations__.keys(): + table.pop(hint, None) # type: ignore[misc] + return tables def hint_to_column_prop(h: TColumnHint) -> TColumnProp: diff --git a/dlt/common/utils.py b/dlt/common/utils.py index 4ddde87758..8cd1572611 100644 --- a/dlt/common/utils.py +++ b/dlt/common/utils.py @@ -13,6 +13,7 @@ from typing import ( Any, + Callable, ContextManager, Dict, Iterator, @@ -503,11 +504,15 @@ def merge_row_counts(row_counts_1: RowCounts, row_counts_2: RowCounts) -> None: row_counts_1[counter_name] = row_counts_1.get(counter_name, 0) + row_counts_2[counter_name] -def extend_list_deduplicated(original_list: List[Any], extending_list: Iterable[Any]) -> List[Any]: +def extend_list_deduplicated( + original_list: List[Any], + extending_list: Iterable[Any], + normalize_f: Callable[[str], str] = str.__call__, +) -> List[Any]: """extends the first list by the second, but does not add duplicates""" - list_keys = set(original_list) + list_keys = set(normalize_f(s) for s in original_list) for item in extending_list: - if item not in list_keys: + if normalize_f(item) not in list_keys: original_list.append(item) return original_list diff --git a/dlt/extract/extractors.py b/dlt/extract/extractors.py index 84abb4f3a8..033d5dfc84 100644 --- a/dlt/extract/extractors.py +++ b/dlt/extract/extractors.py @@ -7,7 +7,6 @@ from dlt.common.destination.capabilities import DestinationCapabilitiesContext from dlt.common.data_writers import TLoaderFileFormat from dlt.common.exceptions import MissingDependencyException - from dlt.common.runtime.collector import Collector, NULL_COLLECTOR from dlt.common.utils import update_dict_nested from dlt.common.typing import TDataItems, TDataItem @@ -155,7 +154,9 @@ def _write_to_static_table( def _compute_table(self, resource: DltResource, items: TDataItems) -> TTableSchema: """Computes a schema for a new or dynamic table and normalizes identifiers""" - return self.schema.normalize_table_identifiers(resource.compute_table_schema(items)) + return utils.normalize_table_identifiers( + resource.compute_table_schema(items), self.schema.naming + ) def _compute_and_update_table( self, resource: DltResource, table_name: str, items: TDataItems @@ -173,10 +174,10 @@ def _compute_and_update_table( # this is a new table so allow evolve once if schema_contract["columns"] != "evolve" and self.schema.is_new_table(table_name): - computed_table["x-normalizer"] = {"evolve-columns-once": True} # type: ignore[typeddict-unknown-key] + computed_table["x-normalizer"] = {"evolve-columns-once": True} existing_table = self.schema._schema_tables.get(table_name, None) if existing_table: - diff_table = utils.diff_tables(existing_table, computed_table) + diff_table = utils.diff_tables(self.schema.name, existing_table, computed_table) else: diff_table = computed_table @@ -291,7 +292,7 @@ def _compute_table(self, resource: DltResource, items: TDataItems) -> TPartialTa arrow_table = copy(computed_table) arrow_table["columns"] = pyarrow.py_arrow_to_table_schema_columns(items.schema) # normalize arrow table before merging - arrow_table = self.schema.normalize_table_identifiers(arrow_table) + arrow_table = utils.normalize_table_identifiers(arrow_table, self.schema.naming) # issue warnings when overriding computed with arrow for col_name, column in arrow_table["columns"].items(): if src_column := computed_table["columns"].get(col_name): diff --git a/dlt/extract/items.py b/dlt/extract/items.py index c6e1f0a4b8..086b7e9720 100644 --- a/dlt/extract/items.py +++ b/dlt/extract/items.py @@ -165,6 +165,10 @@ class FilterItem(ItemTransform[bool]): def __call__(self, item: TDataItems, meta: Any = None) -> Optional[TDataItems]: if isinstance(item, list): + # preserve empty lists + if len(item) == 0: + return item + if self._f_meta: item = [i for i in item if self._f_meta(i, meta)] else: diff --git a/dlt/extract/source.py b/dlt/extract/source.py index 9138341381..0cf43df5a9 100644 --- a/dlt/extract/source.py +++ b/dlt/extract/source.py @@ -12,6 +12,7 @@ from dlt.common.normalizers.json.relational import DataItemNormalizer as RelationalNormalizer from dlt.common.schema import Schema from dlt.common.schema.typing import TColumnName, TSchemaContract +from dlt.common.schema.utils import normalize_table_identifiers from dlt.common.typing import StrAny, TDataItem from dlt.common.configuration.container import Container from dlt.common.pipeline import ( @@ -304,8 +305,8 @@ def discover_schema(self, item: TDataItem = None) -> Schema: for r in self.selected_resources.values(): # names must be normalized here with contextlib.suppress(DataItemRequiredForDynamicTableHints): - partial_table = self._schema.normalize_table_identifiers( - r.compute_table_schema(item) + partial_table = normalize_table_identifiers( + r.compute_table_schema(item), self._schema.naming ) schema.update_table(partial_table) return schema diff --git a/dlt/normalize/items_normalizers.py b/dlt/normalize/items_normalizers.py index e6c68847d3..4db9cd35bf 100644 --- a/dlt/normalize/items_normalizers.py +++ b/dlt/normalize/items_normalizers.py @@ -150,7 +150,7 @@ def _normalize_chunk( continue # theres a new table or new columns in existing table # update schema and save the change - schema.update_table(partial_table) + schema.update_table(partial_table, normalize_identifiers=False) table_updates = schema_update.setdefault(table_name, []) table_updates.append(partial_table) @@ -197,6 +197,7 @@ def __call__( partial_update = self._normalize_chunk(root_table_name, items, may_have_pua(line)) schema_updates.append(partial_update) logger.debug(f"Processed {line_no} lines from file {extracted_items_file}") + # empty json files are when replace write disposition is used in order to truncate table(s) if line is None and root_table_name in self.schema.tables: # write only if table seen data before root_table = self.schema.tables[root_table_name] diff --git a/dlt/normalize/normalize.py b/dlt/normalize/normalize.py index ceb65e7788..83c311d34a 100644 --- a/dlt/normalize/normalize.py +++ b/dlt/normalize/normalize.py @@ -300,7 +300,7 @@ def spool_files( # update normalizer specific info for table_name in table_metrics: table = schema.tables[table_name] - x_normalizer = table.setdefault("x-normalizer", {}) # type: ignore[typeddict-item] + x_normalizer = table.setdefault("x-normalizer", {}) # drop evolve once for all tables that seen data x_normalizer.pop("evolve-columns-once", None) # mark that table have seen data only if there was data diff --git a/dlt/pipeline/pipeline.py b/dlt/pipeline/pipeline.py index 185a11962a..c4a766911a 100644 --- a/dlt/pipeline/pipeline.py +++ b/dlt/pipeline/pipeline.py @@ -36,7 +36,7 @@ DestinationUndefinedEntity, DestinationIncompatibleLoaderFileFormatException, ) -from dlt.common.normalizers import explicit_normalizers, import_normalizers +from dlt.common.normalizers.utils import explicit_normalizers, import_normalizers from dlt.common.runtime import signals, initialize_runtime from dlt.common.schema.typing import ( TColumnNames, @@ -1059,10 +1059,6 @@ def _extract_source( # extract into pipeline schema load_id = extract.extract(source, max_parallel_items, workers) - # save import with fully discovered schema - # NOTE: moved to with_schema_sync, remove this if all test pass - # self._schema_storage.save_import_schema_if_not_exists(source.schema) - # update live schema but not update the store yet self._schema_storage.update_live_schema(source.schema) diff --git a/docs/website/docs/general-usage/destination.md b/docs/website/docs/general-usage/destination.md index c20aa62d16..06462dab8b 100644 --- a/docs/website/docs/general-usage/destination.md +++ b/docs/website/docs/general-usage/destination.md @@ -172,4 +172,15 @@ load_info.raise_on_failed_jobs() ::: ## Declare external destination -You can implement [your own destination](../walkthroughs/create-new-destination.md) and pass the destination class type or instance to `dlt` pipeline. \ No newline at end of file +You can implement [your own destination](../walkthroughs/create-new-destination.md) and pass the destination class type or instance to `dlt` pipeline. + +## Control how dlt creates table, column and other identifiers + +- case folding +- case sensitivity + + +1. Redshift - always lower case, no matter which naming convention used. case insensitive +2. Athena - always lower case, no matter which naming convention used. uses different catalogue and query engines that are incompatible + + diff --git a/docs/website/docs/general-usage/schema.md b/docs/website/docs/general-usage/schema.md index 7ce1d959c9..80ad3c8239 100644 --- a/docs/website/docs/general-usage/schema.md +++ b/docs/website/docs/general-usage/schema.md @@ -36,37 +36,9 @@ the order is lost. ## Naming convention -`dlt` creates tables, child tables and column schemas from the data. The data being loaded, -typically JSON documents, contains identifiers (i.e. key names in a dictionary) with any Unicode -characters, any lengths and naming styles. On the other hand the destinations accept very strict -namespaces for their identifiers. Like Redshift that accepts case-insensitive alphanumeric -identifiers with maximum 127 characters. - -Each schema contains `naming convention` that tells `dlt` how to translate identifiers to the -namespace that the destination understands. - -The default naming convention: - -1. Converts identifiers to snake_case, small caps. Removes all ascii characters except ascii - alphanumerics and underscores. -1. Adds `_` if name starts with number. -1. Multiples of `_` are converted into single `_`. -1. The parent-child relation is expressed as double `_` in names. -1. It shorts the identifier if it exceed the length at the destination. - -> 💡 Standard behavior of `dlt` is to **use the same naming convention for all destinations** so -> users see always the same tables and columns in their databases. - -> 💡 If you provide any schema elements that contain identifiers via decorators or arguments (i.e. -> `table_name` or `columns`) all the names used will be converted via the naming convention when -> adding to the schema. For example if you execute `dlt.run(... table_name="CamelCase")` the data -> will be loaded into `camel_case`. - -> 💡 Use simple, short small caps identifiers for everything! - -The naming convention is configurable and users can easily create their own -conventions that i.e. pass all the identifiers unchanged if the destination accepts that (i.e. -DuckDB). +Each schema contains [naming convention](naming-convention.md) that tells `dlt` how to translate identifiers to the +namespace that the destination understands. This convention can be configured, changed in code or enforced via +destination. ## Data normalizer @@ -205,7 +177,7 @@ The precision for **bigint** is mapped to available integer types ie. TINYINT, I ## Schema settings The `settings` section of schema file lets you define various global rules that impact how tables -and columns are inferred from data. +and columns are inferred from data. For example you can assign **primary_key** hint to all columns with name `id` or force **timestamp** data type on all columns containing `timestamp` with an use of regex pattern. > 💡 It is the best practice to use those instead of providing the exact column schemas via `columns` > argument or by pasting them in `yaml`. @@ -214,7 +186,8 @@ and columns are inferred from data. You can define a set of functions that will be used to infer the data type of the column from a value. The functions are run from top to bottom on the lists. Look in `detections.py` to see what is -available. +available. **iso_timestamp** detector that looks for ISO 8601 strings and converts them to **timestamp** +is enabled by default. ```yaml settings: @@ -224,12 +197,24 @@ settings: - iso_date ``` +Alternatively you can add and remove detections from code: +```python + source = source() + # remove iso time detector + source.schema.remove_type_detection("iso_timestamp") + # convert UNIX timestamp (float, withing a year from NOW) into timestamp + source.schema.add_type_detection("timestamp") +``` +Above we modify a schema that comes with a source to detect UNIX timestamps with **timestamp** detector. + ### Column hint rules You can define a global rules that will apply hints of a newly inferred columns. Those rules apply -to normalized column names. You can use column names directly or with regular expressions. +to normalized column names. You can use column names directly or with regular expressions. `dlt` is matching +the column names **after they got normalized with naming convention**. -Example from ethereum schema: +By default, schema adopts hints rules from json(relational) normalizer to support correct hinting +of columns added by normalizer: ```yaml settings: @@ -237,36 +222,59 @@ settings: foreign_key: - _dlt_parent_id not_null: - - re:^_dlt_id$ + - _dlt_id - _dlt_root_id - _dlt_parent_id - _dlt_list_idx + - _dlt_load_id unique: - _dlt_id - cluster: - - block_hash + root_key: + - _dlt_root_id +``` +Above we require exact column name match for a hint to apply. You can also use regular expression (which we call `SimpleRegex`) as follows: +```yaml +settings: partition: - - block_timestamp + - re:_timestamp$ +``` +Above we add `partition` hint to all columns ending with `_timestamp`. You can do same thing in the code +```python + source = source() + # this will update existing hints with the hints passed + source.schema.merge_hints({"partition": ["re:_timestamp$"]}) ``` ### Preferred data types You can define rules that will set the data type for newly created columns. Put the rules under `preferred_types` key of `settings`. On the left side there's a rule on a column name, on the right -side is the data type. - -> ❗See the column hint rules for naming convention! +side is the data type. You can use column names directly or with regular expressions. +`dlt` is matching the column names **after they got normalized with naming convention**. Example: ```yaml settings: preferred_types: - timestamp: timestamp - re:^inserted_at$: timestamp - re:^created_at$: timestamp - re:^updated_at$: timestamp - re:^_dlt_list_idx$: bigint + re:timestamp: timestamp + inserted_at: timestamp + created_at: timestamp + updated_at: timestamp +``` + +Above we prefer `timestamp` data type for all columns containing **timestamp** substring and define a few exact matches ie. **created_at**. +Here's same thing in code +```python + source = source() + source.schema.update_preferred_types( + { + "re:timestamp": "timestamp", + "inserted_at": "timestamp", + "created_at": "timestamp", + "updated_at": "timestamp", + } + ) ``` ## Export and import schema files @@ -317,7 +325,6 @@ def textual(nesting_level: int): schema.remove_type_detection("iso_timestamp") # convert UNIX timestamp (float, withing a year from NOW) into timestamp schema.add_type_detection("timestamp") - schema.compile_settings() return dlt.resource(...) ``` diff --git a/tests/common/normalizers/test_json_relational.py b/tests/common/normalizers/test_json_relational.py index 15d77a7f02..159e33da4d 100644 --- a/tests/common/normalizers/test_json_relational.py +++ b/tests/common/normalizers/test_json_relational.py @@ -736,7 +736,7 @@ def test_table_name_meta_normalized() -> None: def test_parse_with_primary_key() -> None: schema = create_schema_with_name("discord") - schema.merge_hints({"primary_key": ["id"]}) # type: ignore[list-item] + schema._merge_hints({"primary_key": ["id"]}) # type: ignore[list-item] schema._compile_settings() add_dlt_root_id_propagation(schema.data_item_normalizer) # type: ignore[arg-type] diff --git a/tests/common/schema/conftest.py b/tests/common/schema/conftest.py new file mode 100644 index 0000000000..53d02fc663 --- /dev/null +++ b/tests/common/schema/conftest.py @@ -0,0 +1,25 @@ +import pytest + +from dlt.common.configuration import resolve_configuration +from dlt.common.schema import Schema +from dlt.common.storages import SchemaStorageConfiguration, SchemaStorage + + +from tests.utils import autouse_test_storage, preserve_environ + + +@pytest.fixture +def schema() -> Schema: + return Schema("event") + + +@pytest.fixture +def schema_storage() -> SchemaStorage: + C = resolve_configuration( + SchemaStorageConfiguration(), + explicit_value={ + "import_schema_path": "tests/common/cases/schemas/rasa", + "external_schema_format": "json", + }, + ) + return SchemaStorage(C, makedirs=True) diff --git a/tests/common/schema/test_filtering.py b/tests/common/schema/test_filtering.py index 8cfac9309f..6634a38aa6 100644 --- a/tests/common/schema/test_filtering.py +++ b/tests/common/schema/test_filtering.py @@ -10,11 +10,6 @@ from tests.common.utils import load_json_case -@pytest.fixture -def schema() -> Schema: - return Schema("event") - - def test_row_field_filter(schema: Schema) -> None: _add_excludes(schema) bot_case: DictStrAny = load_json_case("mod_bot_case") diff --git a/tests/common/schema/test_inference.py b/tests/common/schema/test_inference.py index dce9eba149..8fd9cf38f5 100644 --- a/tests/common/schema/test_inference.py +++ b/tests/common/schema/test_inference.py @@ -15,12 +15,6 @@ TablePropertiesConflictException, ) from tests.common.utils import load_json_case -from tests.utils import preserve_environ - - -@pytest.fixture -def schema() -> Schema: - return Schema("event") def test_get_preferred_type(schema: Schema) -> None: diff --git a/tests/common/schema/test_merges.py b/tests/common/schema/test_merges.py index 0bb7818b31..de950b917e 100644 --- a/tests/common/schema/test_merges.py +++ b/tests/common/schema/test_merges.py @@ -1,10 +1,9 @@ import pytest from copy import copy, deepcopy -from dlt.common.schema import Schema, utils +from dlt.common.schema import utils from dlt.common.schema.exceptions import ( CannotCoerceColumnException, - CannotCoerceNullException, TablePropertiesConflictException, ) from dlt.common.schema.typing import TStoredSchema, TTableSchema, TColumnSchema @@ -53,21 +52,21 @@ def test_column_remove_defaults() -> None: assert utils.remove_column_defaults(copy(COL_2_HINTS)) == {"name": "test_2"} -def test_column_add_defaults() -> None: - # test complete column - full = utils.add_column_defaults(copy(COL_1_HINTS)) - assert full["unique"] is False - # remove defaults from full - clean = utils.remove_column_defaults(copy(full)) - assert clean == COL_1_HINTS_DEFAULTS - # prop is None and will be removed - del full["prop"] # type: ignore[typeddict-item] - assert utils.add_column_defaults(copy(clean)) == full +# def test_column_add_defaults() -> None: +# # test complete column +# full = utils.add_column_defaults(copy(COL_1_HINTS)) +# assert full["unique"] is False +# # remove defaults from full +# clean = utils.remove_column_defaults(copy(full)) +# assert clean == COL_1_HINTS_DEFAULTS +# # prop is None and will be removed +# del full["prop"] # type: ignore[typeddict-item] +# assert utils.add_column_defaults(copy(clean)) == full - # test incomplete - complete_full = utils.add_column_defaults(copy(COL_2_HINTS)) - # defaults are added - assert complete_full["unique"] is False +# # test incomplete +# complete_full = utils.add_column_defaults(copy(COL_2_HINTS)) +# # defaults are added +# assert complete_full["unique"] is False def test_remove_defaults_stored_schema() -> None: @@ -173,10 +172,10 @@ def test_diff_tables() -> None: empty = utils.new_table("table") del empty["resource"] print(empty) - partial = utils.diff_tables(empty, deepcopy(table)) + partial = utils.diff_tables("schema", empty, deepcopy(table)) # partial is simply table assert partial == table - partial = utils.diff_tables(deepcopy(table), empty) + partial = utils.diff_tables("schema", deepcopy(table), empty) # partial is empty assert partial == empty @@ -184,7 +183,7 @@ def test_diff_tables() -> None: changed = deepcopy(table) changed["description"] = "new description" changed["name"] = "new name" - partial = utils.diff_tables(deepcopy(table), changed) + partial = utils.diff_tables("schema", deepcopy(table), changed) print(partial) assert partial == {"name": "new name", "description": "new description", "columns": {}} @@ -192,7 +191,7 @@ def test_diff_tables() -> None: existing = deepcopy(table) changed["write_disposition"] = "append" changed["schema_contract"] = "freeze" - partial = utils.diff_tables(deepcopy(existing), changed) + partial = utils.diff_tables("schema", deepcopy(existing), changed) assert partial == { "name": "new name", "description": "new description", @@ -202,14 +201,14 @@ def test_diff_tables() -> None: } existing["write_disposition"] = "append" existing["schema_contract"] = "freeze" - partial = utils.diff_tables(deepcopy(existing), changed) + partial = utils.diff_tables("schema", deepcopy(existing), changed) assert partial == {"name": "new name", "description": "new description", "columns": {}} # detect changed column existing = deepcopy(table) changed = deepcopy(table) changed["columns"]["test"]["cluster"] = True - partial = utils.diff_tables(existing, changed) + partial = utils.diff_tables("schema", existing, changed) assert "test" in partial["columns"] assert "test_2" not in partial["columns"] assert existing["columns"]["test"] == table["columns"]["test"] != partial["columns"]["test"] @@ -218,7 +217,7 @@ def test_diff_tables() -> None: existing = deepcopy(table) changed = deepcopy(table) changed["columns"]["test"]["foreign_key"] = False - partial = utils.diff_tables(existing, changed) + partial = utils.diff_tables("schema", existing, changed) assert "test" in partial["columns"] # even if not present in tab_a at all @@ -226,7 +225,7 @@ def test_diff_tables() -> None: changed = deepcopy(table) changed["columns"]["test"]["foreign_key"] = False del existing["columns"]["test"]["foreign_key"] - partial = utils.diff_tables(existing, changed) + partial = utils.diff_tables("schema", existing, changed) assert "test" in partial["columns"] @@ -242,7 +241,7 @@ def test_diff_tables_conflicts() -> None: other = utils.new_table("table_2") with pytest.raises(TablePropertiesConflictException) as cf_ex: - utils.diff_tables(table, other) + utils.diff_tables("schema", table, other) assert cf_ex.value.table_name == "table" assert cf_ex.value.prop_name == "parent" @@ -250,7 +249,7 @@ def test_diff_tables_conflicts() -> None: changed = deepcopy(table) changed["columns"]["test"]["data_type"] = "bigint" with pytest.raises(CannotCoerceColumnException): - utils.diff_tables(table, changed) + utils.diff_tables("schema", table, changed) def test_merge_tables() -> None: @@ -269,7 +268,7 @@ def test_merge_tables() -> None: changed["new-prop-3"] = False # type: ignore[typeddict-unknown-key] # drop column so partial has it del table["columns"]["test"] - partial = utils.merge_tables(table, changed) + partial = utils.merge_tables("schema", table, changed) assert "test" in table["columns"] assert table["x-special"] == 129 # type: ignore[typeddict-item] assert table["description"] == "new description" diff --git a/tests/common/schema/test_normalize_identifiers.py b/tests/common/schema/test_normalize_identifiers.py new file mode 100644 index 0000000000..bf5dcae885 --- /dev/null +++ b/tests/common/schema/test_normalize_identifiers.py @@ -0,0 +1,338 @@ +from copy import deepcopy +import os +from typing import Callable, List, Sequence, cast +import pytest + +from dlt.common import pendulum, json +from dlt.common.configuration import resolve_configuration +from dlt.common.configuration.container import Container +from dlt.common.normalizers.naming.naming import NamingConvention +from dlt.common.schema.migrations import migrate_schema +from dlt.common.storages import SchemaStorageConfiguration +from dlt.common.destination.capabilities import DestinationCapabilitiesContext +from dlt.common.exceptions import DictValidationException +from dlt.common.normalizers.naming import snake_case, direct, sql_upper +from dlt.common.typing import DictStrAny, StrAny +from dlt.common.utils import uniq_id +from dlt.common.schema import TColumnSchema, Schema, TStoredSchema, utils, TColumnHint +from dlt.common.schema.exceptions import ( + InvalidSchemaName, + ParentTableNotFoundException, + SchemaEngineNoUpgradePathException, + TableIdentifiersFrozen, +) +from dlt.common.schema.typing import ( + LOADS_TABLE_NAME, + SIMPLE_REGEX_PREFIX, + VERSION_TABLE_NAME, + TColumnName, + TSimpleRegex, + COLUMN_HINTS, +) +from dlt.common.storages import SchemaStorage +from tests.common.utils import load_json_case, load_yml_case, COMMON_TEST_CASES_PATH + + +@pytest.fixture +def schema_storage_no_import() -> SchemaStorage: + C = resolve_configuration(SchemaStorageConfiguration()) + return SchemaStorage(C, makedirs=True) + + +@pytest.fixture +def cn_schema() -> Schema: + return Schema( + "column_default", + { + "names": "tests.common.normalizers.custom_normalizers", + "json": { + "module": "tests.common.normalizers.custom_normalizers", + "config": {"not_null": ["fake_id"]}, + }, + }, + ) + + +def test_save_store_schema_custom_normalizers( + cn_schema: Schema, schema_storage: SchemaStorage +) -> None: + schema_storage.save_schema(cn_schema) + schema_copy = schema_storage.load_schema(cn_schema.name) + assert_new_schema_values_custom_normalizers(schema_copy) + + +def test_new_schema_custom_normalizers(cn_schema: Schema) -> None: + assert_new_schema_values_custom_normalizers(cn_schema) + + +def test_save_load_incomplete_column( + schema: Schema, schema_storage_no_import: SchemaStorage +) -> None: + # make sure that incomplete column is saved and restored without default hints + incomplete_col = utils.new_column("I", nullable=False) + incomplete_col["primary_key"] = True + incomplete_col["x-special"] = "spec" # type: ignore[typeddict-unknown-key] + table = utils.new_table("table", columns=[incomplete_col]) + schema.update_table(table, normalize_identifiers=False) + schema_storage_no_import.save_schema(schema) + schema_copy = schema_storage_no_import.load_schema("event") + assert schema_copy.get_table("table")["columns"]["I"] == { + "name": "I", + "nullable": False, + "primary_key": True, + "x-special": "spec", + } + + +def test_schema_config_normalizers(schema: Schema, schema_storage_no_import: SchemaStorage) -> None: + # save snake case schema + schema_storage_no_import.save_schema(schema) + # config direct naming convention + os.environ["SCHEMA__NAMING"] = "direct" + # new schema has direct naming convention + schema_direct_nc = Schema("direct_naming") + assert schema_direct_nc._normalizers_config["names"] == "direct" + # still after loading the config is "snake" + schema = schema_storage_no_import.load_schema(schema.name) + assert schema._normalizers_config["names"] == "snake_case" + # provide capabilities context + destination_caps = DestinationCapabilitiesContext.generic_capabilities() + destination_caps.naming_convention = "snake_case" + destination_caps.max_identifier_length = 127 + with Container().injectable_context(destination_caps): + # caps are ignored if schema is configured + schema_direct_nc = Schema("direct_naming") + assert schema_direct_nc._normalizers_config["names"] == "direct" + # but length is there + assert schema_direct_nc.naming.max_length == 127 + # also for loaded schema + schema = schema_storage_no_import.load_schema(schema.name) + assert schema._normalizers_config["names"] == "snake_case" + assert schema.naming.max_length == 127 + + +def test_normalize_table_identifiers() -> None: + # load with snake case + schema_dict: TStoredSchema = load_json_case("schemas/github/issues.schema") + schema = Schema.from_dict(schema_dict) # type: ignore[arg-type] + issues_table = schema.tables["issues"] + issues_table_str = json.dumps(issues_table) + # normalize table to upper + issues_table_norm = utils.normalize_table_identifiers( + issues_table, sql_upper.NamingConvention() + ) + # nothing got changes in issues table + assert issues_table_str == json.dumps(issues_table) + # check normalization + assert issues_table_norm["name"] == "ISSUES" + assert "REACTIONS___1" in issues_table_norm["columns"] + # subsequent normalization does not change dict + assert issues_table_norm == utils.normalize_table_identifiers( + issues_table_norm, sql_upper.NamingConvention() + ) + + +def test_normalize_table_identifiers_idempotent() -> None: + schema_dict: TStoredSchema = load_json_case("schemas/github/issues.schema") + schema = Schema.from_dict(schema_dict) # type: ignore[arg-type] + # assert column generated from "reactions/+1" and "-1", it is a valid identifier even with three underscores + assert "reactions___1" in schema.tables["issues"]["columns"] + issues_table = schema.tables["issues"] + # this schema is already normalized so normalization is idempotent + assert schema.tables["issues"] == utils.normalize_table_identifiers(issues_table, schema.naming) + assert schema.tables["issues"] == utils.normalize_table_identifiers( + utils.normalize_table_identifiers(issues_table, schema.naming), schema.naming + ) + + +def test_normalize_table_identifiers_merge_columns() -> None: + # create conflicting columns + table_create = [ + {"name": "case", "data_type": "bigint", "nullable": False, "x-description": "desc"}, + {"name": "Case", "data_type": "double", "nullable": True, "primary_key": True}, + ] + # schema normalizing to snake case will conflict on case and Case + table = utils.new_table("blend", columns=table_create) # type: ignore[arg-type] + table_str = json.dumps(table) + norm_table = utils.normalize_table_identifiers(table, Schema("norm").naming) + # nothing got changed in original table + assert table_str == json.dumps(table) + # only one column + assert len(norm_table["columns"]) == 1 + assert norm_table["columns"]["case"] == { + "nullable": False, # remove default, preserve non default + "primary_key": True, + "name": "case", + "data_type": "double", + "x-description": "desc", + } + + +def test_update_normalizers() -> None: + schema_dict: TStoredSchema = load_json_case("schemas/github/issues.schema") + schema = Schema.from_dict(schema_dict) # type: ignore[arg-type] + # drop seen data + del schema.tables["issues"]["x-normalizer"] + del schema.tables["issues__labels"]["x-normalizer"] + del schema.tables["issues__assignees"]["x-normalizer"] + # save default hints in original form + default_hints = schema._settings["default_hints"] + + os.environ["SCHEMA__NAMING"] = "sql_upper" + schema.update_normalizers() + assert isinstance(schema.naming, sql_upper.NamingConvention) + # print(schema.to_pretty_yaml()) + assert_schema_identifiers_case(schema, str.upper) + + # resource must be old name + assert schema.tables["ISSUES"]["resource"] == "issues" + + # make sure normalizer config is replaced + assert schema._normalizers_config["names"] == "sql_upper" + assert "allow_identifier_change_on_table_with_data" not in schema._normalizers_config + + # regexes are uppercased + new_default_hints = schema._settings["default_hints"] + for hint, regexes in default_hints.items(): + # same number of hints + assert len(regexes) == len(new_default_hints[hint]) + # but all upper cased + assert set(n.upper() for n in regexes) == set(new_default_hints[hint]) + + +def test_normalize_default_hints(schema_storage_no_import: SchemaStorage) -> None: + # use destination caps to force naming convention + from dlt.common.destination import DestinationCapabilitiesContext + from dlt.common.configuration.container import Container + + eth_V9 = load_yml_case("schemas/eth/ethereum_schema_v9") + orig_schema = Schema.from_dict(eth_V9) + # save schema + schema_storage_no_import.save_schema(orig_schema) + + with Container().injectable_context( + DestinationCapabilitiesContext.generic_capabilities( + naming_convention=sql_upper.NamingConvention() + ) + ) as caps: + assert isinstance(caps.naming_convention, sql_upper.NamingConvention) + # creating a schema from dict keeps original normalizers + schema = Schema.from_dict(eth_V9) + assert_schema_identifiers_case(schema, str.lower) + assert schema._normalizers_config["names"].endswith("snake_case") # type: ignore + + # loading from storage keeps storage normalizers + storage_schema = schema_storage_no_import.load_schema("ethereum") + assert_schema_identifiers_case(storage_schema, str.lower) + assert storage_schema._normalizers_config["names"].endswith("snake_case") # type: ignore + + # new schema instance is created using caps/config + new_schema = Schema("new") + assert_schema_identifiers_case(new_schema, str.upper) + assert isinstance(new_schema._normalizers_config["names"], NamingConvention) + + # attempt to update normalizers blocked by tables with data + with pytest.raises(TableIdentifiersFrozen): + schema.update_normalizers() + # also cloning with update normalizers + with pytest.raises(TableIdentifiersFrozen): + schema.clone(update_normalizers=True) + + # remove processing hints and normalize + norm_cloned = schema.clone(update_normalizers=True, remove_processing_hints=True) + assert_schema_identifiers_case(norm_cloned, str.upper) + assert isinstance(norm_cloned._normalizers_config["names"], NamingConvention) + + norm_schema = Schema.from_dict(deepcopy(eth_V9), remove_processing_hints=True) + norm_schema.update_normalizers() + assert_schema_identifiers_case(norm_schema, str.upper) + assert isinstance(norm_schema._normalizers_config["names"], NamingConvention) + + # both ways of obtaining schemas (cloning, cleaning dict) must generate identical schemas + assert norm_cloned.to_pretty_json() == norm_schema.to_pretty_json() + + # save to storage + schema_storage_no_import.save_schema(norm_cloned) + + # load schema out of caps + storage_schema = schema_storage_no_import.load_schema("ethereum") + assert_schema_identifiers_case(storage_schema, str.upper) + # the instance got converted into + assert storage_schema._normalizers_config["names"].endswith("sql_upper.NamingConvention") # type: ignore + assert storage_schema.stored_version_hash == storage_schema.version_hash + + +def test_raise_on_change_identifier_table_with_data() -> None: + schema_dict: TStoredSchema = load_json_case("schemas/github/issues.schema") + schema = Schema.from_dict(schema_dict) # type: ignore[arg-type] + # mark issues table to seen data and change naming to sql upper + issues_table = schema.tables["issues"] + issues_table["x-normalizer"] = {"seen-data": True} + os.environ["SCHEMA__NAMING"] = "sql_upper" + with pytest.raises(TableIdentifiersFrozen) as fr_ex: + schema.update_normalizers() + assert fr_ex.value.table_name == "issues" + assert isinstance(fr_ex.value.from_naming, snake_case.NamingConvention) + assert isinstance(fr_ex.value.to_naming, sql_upper.NamingConvention) + # try again, get exception (schema was not partially modified) + with pytest.raises(TableIdentifiersFrozen) as fr_ex: + schema.update_normalizers() + + # use special naming convention that only changes column names ending with x to _ + issues_table["columns"]["columnx"] = {"name": "columnx", "data_type": "bigint"} + assert schema.tables["issues"] is issues_table + os.environ["SCHEMA__NAMING"] = "tests.common.normalizers.snake_no_x" + with pytest.raises(TableIdentifiersFrozen) as fr_ex: + schema.update_normalizers() + assert fr_ex.value.table_name == "issues" + # allow to change tables with data + os.environ["SCHEMA__ALLOW_IDENTIFIER_CHANGE_ON_TABLE_WITH_DATA"] = "True" + schema.update_normalizers() + assert schema._normalizers_config["allow_identifier_change_on_table_with_data"] is True + + +def assert_schema_identifiers_case(schema: Schema, casing: Callable[[str], str]) -> None: + for table_name, table in schema.tables.items(): + assert table_name == casing(table_name) == table["name"] + if "parent" in table: + assert table["parent"] == casing(table["parent"]) + for col_name, column in table["columns"].items(): + assert col_name == casing(col_name) == column["name"] + + # make sure table prefixes are set + assert schema._dlt_tables_prefix == casing("_dlt") + assert schema.loads_table_name == casing("_dlt_loads") + assert schema.version_table_name == casing("_dlt_version") + assert schema.state_table_name == casing("_dlt_pipeline_state") + + def _case_regex(regex: str) -> str: + if regex.startswith(SIMPLE_REGEX_PREFIX): + return SIMPLE_REGEX_PREFIX + casing(regex[3:]) + else: + return casing(regex) + + # regexes are uppercased + new_default_hints = schema._settings["default_hints"] + for hint, regexes in new_default_hints.items(): + # but all upper cased + assert set(_case_regex(n) for n in regexes) == set(new_default_hints[hint]) + + +def assert_new_schema_values_custom_normalizers(schema: Schema) -> None: + # check normalizers config + assert schema._normalizers_config["names"] == "tests.common.normalizers.custom_normalizers" + assert ( + schema._normalizers_config["json"]["module"] + == "tests.common.normalizers.custom_normalizers" + ) + # check if schema was extended by json normalizer + assert ["fake_id"] == schema.settings["default_hints"]["not_null"] + # call normalizers + assert schema.naming.normalize_identifier("a") == "column_a" + assert schema.naming.normalize_path("a__b") == "column_a__column_b" + assert schema.naming.normalize_identifier("1A_b") == "column_1a_b" + # assumes elements are normalized + assert schema.naming.make_path("A", "B", "!C") == "A__B__!C" + assert schema.naming.break_path("A__B__!C") == ["A", "B", "!C"] + row = list(schema.normalize_data_item({"bool": True}, "load_id", "a_table")) + assert row[0] == (("a_table", None), {"bool": True}) diff --git a/tests/common/schema/test_schema.py b/tests/common/schema/test_schema.py index 53fa8f9ae0..aec7bb1d0f 100644 --- a/tests/common/schema/test_schema.py +++ b/tests/common/schema/test_schema.py @@ -1,19 +1,15 @@ -from copy import deepcopy import os -from typing import List, Sequence, cast +from typing import Dict, List, Sequence import pytest from dlt.common import pendulum -from dlt.common.configuration import resolve_configuration -from dlt.common.configuration.container import Container +from dlt.common.data_types.typing import TDataType from dlt.common.schema.migrations import migrate_schema -from dlt.common.storages import SchemaStorageConfiguration -from dlt.common.destination.capabilities import DestinationCapabilitiesContext from dlt.common.exceptions import DictValidationException -from dlt.common.normalizers.naming import snake_case, direct +from dlt.common.normalizers.naming import snake_case from dlt.common.typing import DictStrAny, StrAny from dlt.common.utils import uniq_id -from dlt.common.schema import TColumnSchema, Schema, TStoredSchema, utils, TColumnHint +from dlt.common.schema import TColumnSchema, Schema, TStoredSchema, utils from dlt.common.schema.exceptions import ( InvalidSchemaName, ParentTableNotFoundException, @@ -28,50 +24,12 @@ ) from dlt.common.storages import SchemaStorage -from tests.utils import autouse_test_storage, preserve_environ from tests.common.utils import load_json_case, load_yml_case, COMMON_TEST_CASES_PATH SCHEMA_NAME = "event" EXPECTED_FILE_NAME = f"{SCHEMA_NAME}.schema.json" -@pytest.fixture -def schema_storage() -> SchemaStorage: - C = resolve_configuration( - SchemaStorageConfiguration(), - explicit_value={ - "import_schema_path": "tests/common/cases/schemas/rasa", - "external_schema_format": "json", - }, - ) - return SchemaStorage(C, makedirs=True) - - -@pytest.fixture -def schema_storage_no_import() -> SchemaStorage: - C = resolve_configuration(SchemaStorageConfiguration()) - return SchemaStorage(C, makedirs=True) - - -@pytest.fixture -def schema() -> Schema: - return Schema("event") - - -@pytest.fixture -def cn_schema() -> Schema: - return Schema( - "column_default", - { - "names": "tests.common.normalizers.custom_normalizers", - "json": { - "module": "tests.common.normalizers.custom_normalizers", - "config": {"not_null": ["fake_id"]}, - }, - }, - ) - - def test_normalize_schema_name(schema: Schema) -> None: assert schema.naming.normalize_table_identifier("BAN_ANA") == "ban_ana" assert schema.naming.normalize_table_identifier("event-.!:value") == "event_value" @@ -91,37 +49,6 @@ def test_new_schema(schema: Schema) -> None: assert_new_schema_values(schema) -def test_new_schema_custom_normalizers(cn_schema: Schema) -> None: - assert_new_schema_values_custom_normalizers(cn_schema) - - -def test_schema_config_normalizers(schema: Schema, schema_storage_no_import: SchemaStorage) -> None: - # save snake case schema - schema_storage_no_import.save_schema(schema) - # config direct naming convention - os.environ["SCHEMA__NAMING"] = "direct" - # new schema has direct naming convention - schema_direct_nc = Schema("direct_naming") - assert schema_direct_nc._normalizers_config["names"] == "direct" - # still after loading the config is "snake" - schema = schema_storage_no_import.load_schema(schema.name) - assert schema._normalizers_config["names"] == "snake_case" - # provide capabilities context - destination_caps = DestinationCapabilitiesContext.generic_capabilities() - destination_caps.naming_convention = "snake_case" - destination_caps.max_identifier_length = 127 - with Container().injectable_context(destination_caps): - # caps are ignored if schema is configured - schema_direct_nc = Schema("direct_naming") - assert schema_direct_nc._normalizers_config["names"] == "direct" - # but length is there - assert schema_direct_nc.naming.max_length == 127 - # also for loaded schema - schema = schema_storage_no_import.load_schema(schema.name) - assert schema._normalizers_config["names"] == "snake_case" - assert schema.naming.max_length == 127 - - def test_simple_regex_validator() -> None: # can validate only simple regexes assert utils.simple_regex_validator(".", "k", "v", str) is False @@ -309,33 +236,6 @@ def test_save_store_schema(schema: Schema, schema_storage: SchemaStorage) -> Non assert_new_schema_values(schema_copy) -def test_save_store_schema_custom_normalizers( - cn_schema: Schema, schema_storage: SchemaStorage -) -> None: - schema_storage.save_schema(cn_schema) - schema_copy = schema_storage.load_schema(cn_schema.name) - assert_new_schema_values_custom_normalizers(schema_copy) - - -def test_save_load_incomplete_column( - schema: Schema, schema_storage_no_import: SchemaStorage -) -> None: - # make sure that incomplete column is saved and restored without default hints - incomplete_col = utils.new_column("I", nullable=False) - incomplete_col["primary_key"] = True - incomplete_col["x-special"] = "spec" # type: ignore[typeddict-unknown-key] - table = utils.new_table("table", columns=[incomplete_col]) - schema.update_table(table, normalize_identifiers=False) - schema_storage_no_import.save_schema(schema) - schema_copy = schema_storage_no_import.load_schema("event") - assert schema_copy.get_table("table")["columns"]["I"] == { - "name": "I", - "nullable": False, - "primary_key": True, - "x-special": "spec", - } - - def test_upgrade_engine_v1_schema() -> None: schema_dict: DictStrAny = load_json_case("schemas/ev1/event.schema") # ensure engine v1 @@ -394,7 +294,7 @@ def test_unknown_engine_upgrade() -> None: def test_preserve_column_order(schema: Schema, schema_storage: SchemaStorage) -> None: # python dicts are ordered from v3.6, add 50 column with random names update: List[TColumnSchema] = [ - schema._infer_column(uniq_id(), pendulum.now().timestamp()) for _ in range(50) + schema._infer_column("t" + uniq_id(), pendulum.now().timestamp()) for _ in range(50) ] schema.update_table(utils.new_table("event_test_order", columns=update)) @@ -411,7 +311,7 @@ def verify_items(table, update) -> None: verify_items(table, update) # add more columns update2: List[TColumnSchema] = [ - schema._infer_column(uniq_id(), pendulum.now().timestamp()) for _ in range(50) + schema._infer_column("t" + uniq_id(), pendulum.now().timestamp()) for _ in range(50) ] loaded_schema.update_table(utils.new_table("event_test_order", columns=update2)) table = loaded_schema.get_table_columns("event_test_order") @@ -563,6 +463,79 @@ def test_merge_hints(schema: Schema) -> None: for k in expected_hints: assert set(expected_hints[k]) == set(schema._settings["default_hints"][k]) # type: ignore[index] + # make sure that re:^_dlt_id$ and _dlt_id are equivalent when merging so we can use both forms + alt_form_hints = { + "not_null": ["re:^_dlt_id$"], + "foreign_key": ["_dlt_parent_id"], + } + schema.merge_hints(alt_form_hints) # type: ignore[arg-type] + # we keep the older forms so nothing changed + assert len(expected_hints) == len(schema._settings["default_hints"]) + for k in expected_hints: + assert set(expected_hints[k]) == set(schema._settings["default_hints"][k]) # type: ignore[index] + + # check normalize some regex forms + upper_hints = { + "not_null": [ + "_DLT_ID", + ], + "foreign_key": ["re:^_DLT_PARENT_ID$"], + } + schema.merge_hints(upper_hints) # type: ignore[arg-type] + # all upper form hints can be automatically converted to lower form + assert len(expected_hints) == len(schema._settings["default_hints"]) + for k in expected_hints: + assert set(expected_hints[k]) == set(schema._settings["default_hints"][k]) # type: ignore[index] + + # this form cannot be converted + upper_hints = { + "not_null": [ + "re:TU[b-b]a", + ], + } + schema.merge_hints(upper_hints) # type: ignore[arg-type] + assert "re:TU[b-b]a" in schema.settings["default_hints"]["not_null"] + + +def test_update_preferred_types(schema: Schema) -> None: + # no preferred types in the schema + assert "preferred_types" not in schema.settings + + expected: Dict[TSimpleRegex, TDataType] = { + TSimpleRegex("_dlt_id"): "bigint", + TSimpleRegex("re:^timestamp$"): "timestamp", + } + schema.update_preferred_types(expected) + assert schema.settings["preferred_types"] == expected + # no changes + schema.update_preferred_types(expected) + assert schema.settings["preferred_types"] == expected + + # add and replace, canonical form used to update / replace + updated: Dict[TSimpleRegex, TDataType] = { + TSimpleRegex("_dlt_id"): "decimal", + TSimpleRegex("timestamp"): "date", + TSimpleRegex("re:TU[b-c]a"): "text", + } + schema.update_preferred_types(updated) + assert schema.settings["preferred_types"] == { + "_dlt_id": "decimal", + "re:^timestamp$": "date", + "re:TU[b-c]a": "text", + } + + # will normalize some form of regex + updated = { + TSimpleRegex("_DLT_id"): "text", + TSimpleRegex("re:^TIMESTAMP$"): "timestamp", + } + schema.update_preferred_types(updated) + assert schema.settings["preferred_types"] == { + "_dlt_id": "text", + "re:^timestamp$": "timestamp", + "re:TU[b-c]a": "text", + } + def test_default_table_resource() -> None: """Parent tables without `resource` set default to table name""" @@ -674,59 +647,6 @@ def test_compare_columns() -> None: ) -def test_normalize_table_identifiers() -> None: - schema_dict: TStoredSchema = load_json_case("schemas/github/issues.schema") - schema = Schema.from_dict(schema_dict) # type: ignore[arg-type] - # assert column generated from "reactions/+1" and "-1", it is a valid identifier even with three underscores - assert "reactions___1" in schema.tables["issues"]["columns"] - issues_table = deepcopy(schema.tables["issues"]) - # this schema is already normalized so normalization is idempotent - assert schema.tables["issues"] == schema.normalize_table_identifiers(issues_table) - assert schema.tables["issues"] == schema.normalize_table_identifiers( - schema.normalize_table_identifiers(issues_table) - ) - - -def test_normalize_table_identifiers_merge_columns() -> None: - # create conflicting columns - table_create = [ - {"name": "case", "data_type": "bigint", "nullable": False, "x-description": "desc"}, - {"name": "Case", "data_type": "double", "nullable": True, "primary_key": True}, - ] - # schema normalizing to snake case will conflict on case and Case - table = utils.new_table("blend", columns=table_create) # type: ignore[arg-type] - norm_table = Schema("norm").normalize_table_identifiers(table) - # only one column - assert len(norm_table["columns"]) == 1 - assert norm_table["columns"]["case"] == { - "nullable": False, # remove default, preserve non default - "primary_key": True, - "name": "case", - "data_type": "double", - "x-description": "desc", - } - - -def assert_new_schema_values_custom_normalizers(schema: Schema) -> None: - # check normalizers config - assert schema._normalizers_config["names"] == "tests.common.normalizers.custom_normalizers" - assert ( - schema._normalizers_config["json"]["module"] - == "tests.common.normalizers.custom_normalizers" - ) - # check if schema was extended by json normalizer - assert ["fake_id"] == schema.settings["default_hints"]["not_null"] - # call normalizers - assert schema.naming.normalize_identifier("a") == "column_a" - assert schema.naming.normalize_path("a__b") == "column_a__column_b" - assert schema.naming.normalize_identifier("1A_b") == "column_1a_b" - # assumes elements are normalized - assert schema.naming.make_path("A", "B", "!C") == "A__B__!C" - assert schema.naming.break_path("A__B__!C") == ["A", "B", "!C"] - row = list(schema.normalize_data_item({"bool": True}, "load_id", "a_table")) - assert row[0] == (("a_table", None), {"bool": True}) - - def assert_new_schema_values(schema: Schema) -> None: assert schema.version == 1 assert schema.stored_version == 1 @@ -829,3 +749,36 @@ def test_group_tables_by_resource(schema: Schema) -> None: {"columns": {}, "name": "mc_products__sub", "parent": "mc_products"}, ] } + + +def test_remove_processing_hints() -> None: + eth_V9 = load_yml_case("schemas/eth/ethereum_schema_v9") + # here tables contain processing hints + schema = Schema.from_dict(eth_V9) + assert "x-normalizer" in schema.tables["blocks"] + + # clone with hints removal + cloned = schema.clone(remove_processing_hints=True) + assert "x-normalizer" not in cloned.tables["blocks"] + # clone does not touch original schema + assert "x-normalizer" in schema.tables["blocks"] + + # to string + to_yaml = schema.to_pretty_yaml() + assert "x-normalizer" in to_yaml + to_yaml = schema.to_pretty_yaml(remove_processing_hints=True) + assert "x-normalizer" not in to_yaml + to_json = schema.to_pretty_json() + assert "x-normalizer" in to_json + to_json = schema.to_pretty_json(remove_processing_hints=True) + assert "x-normalizer" not in to_json + + # load without hints + no_hints = schema.from_dict(eth_V9, remove_processing_hints=True) + assert no_hints.stored_version_hash == cloned.stored_version_hash + + +def test_get_new_table_columns() -> None: + pytest.fail(reason="must implement!") + pass + # get_new_table_columns() diff --git a/tests/common/schema/test_versioning.py b/tests/common/schema/test_versioning.py index dde05001e8..32f4056281 100644 --- a/tests/common/schema/test_versioning.py +++ b/tests/common/schema/test_versioning.py @@ -1,6 +1,5 @@ import pytest import yaml -from copy import deepcopy from dlt.common import json from dlt.common.schema import utils From c53808f61c26ea7adc5d31f7317edb922e882d67 Mon Sep 17 00:00:00 2001 From: Marcin Rudolf Date: Mon, 18 Mar 2024 14:35:58 +0100 Subject: [PATCH 031/105] accepts naming convention instances when resolving configs --- dlt/common/normalizers/__init__.py | 8 +-- dlt/common/normalizers/configuration.py | 7 +- dlt/common/normalizers/typing.py | 8 ++- dlt/common/normalizers/utils.py | 96 +++++++++++++++++-------- 4 files changed, 79 insertions(+), 40 deletions(-) diff --git a/dlt/common/normalizers/__init__.py b/dlt/common/normalizers/__init__.py index 2ff41d4c12..af6add6a19 100644 --- a/dlt/common/normalizers/__init__.py +++ b/dlt/common/normalizers/__init__.py @@ -1,11 +1,9 @@ -from dlt.common.normalizers.configuration import NormalizersConfiguration from dlt.common.normalizers.typing import TJSONNormalizer, TNormalizersConfig -from dlt.common.normalizers.utils import explicit_normalizers, import_normalizers +from dlt.common.normalizers.naming import NamingConvention + __all__ = [ - "NormalizersConfiguration", + "NamingConvention", "TJSONNormalizer", "TNormalizersConfig", - "explicit_normalizers", - "import_normalizers", ] diff --git a/dlt/common/normalizers/configuration.py b/dlt/common/normalizers/configuration.py index 6957417f9d..d4a6b6113d 100644 --- a/dlt/common/normalizers/configuration.py +++ b/dlt/common/normalizers/configuration.py @@ -1,9 +1,9 @@ -import dataclasses -from typing import Optional, TYPE_CHECKING +from typing import Optional, TYPE_CHECKING, Union from dlt.common.configuration import configspec from dlt.common.configuration.specs import BaseConfiguration from dlt.common.destination import DestinationCapabilitiesContext +from dlt.common.normalizers.naming import NamingConvention from dlt.common.normalizers.typing import TJSONNormalizer from dlt.common.typing import StrAny @@ -13,8 +13,9 @@ class NormalizersConfiguration(BaseConfiguration): # always in section __section__: str = "schema" - naming: Optional[str] = None + naming: Optional[Union[str, NamingConvention]] = None json_normalizer: Optional[StrAny] = None + allow_identifier_change_on_table_with_data: Optional[bool] = None destination_capabilities: Optional[DestinationCapabilitiesContext] = None # injectable def on_resolved(self) -> None: diff --git a/dlt/common/normalizers/typing.py b/dlt/common/normalizers/typing.py index 599426259f..3903858091 100644 --- a/dlt/common/normalizers/typing.py +++ b/dlt/common/normalizers/typing.py @@ -1,14 +1,16 @@ -from typing import List, Optional, TypedDict +from typing import List, Optional, TypedDict, Union from dlt.common.typing import StrAny +from dlt.common.normalizers.naming import NamingConvention class TJSONNormalizer(TypedDict, total=False): module: str - config: Optional[StrAny] # config is a free form and is consumed by `module` + config: Optional[StrAny] # config is a free form and is validated by `module` class TNormalizersConfig(TypedDict, total=False): - names: str + names: Union[str, NamingConvention] + allow_identifier_change_on_table_with_data: Optional[bool] detections: Optional[List[str]] json: TJSONNormalizer diff --git a/dlt/common/normalizers/utils.py b/dlt/common/normalizers/utils.py index dde78edede..0fc223ee7b 100644 --- a/dlt/common/normalizers/utils.py +++ b/dlt/common/normalizers/utils.py @@ -1,12 +1,13 @@ +import inspect from importlib import import_module -from typing import Any, Type, Tuple, cast, List +from typing import Any, Type, Tuple, Union, cast, List import dlt from dlt.common.configuration.inject import with_config from dlt.common.destination import DestinationCapabilitiesContext from dlt.common.normalizers.configuration import NormalizersConfiguration from dlt.common.normalizers.json import SupportsDataItemNormalizer, DataItemNormalizer -from dlt.common.normalizers.naming import NamingConvention, SupportsNamingConvention +from dlt.common.normalizers.naming import NamingConvention from dlt.common.normalizers.naming.exceptions import UnknownNamingModule, InvalidNamingModule from dlt.common.normalizers.typing import TJSONNormalizer, TNormalizersConfig from dlt.common.utils import uniq_id_base64, many_uniq_ids_base64 @@ -17,10 +18,17 @@ @with_config(spec=NormalizersConfiguration) def explicit_normalizers( - naming: str = dlt.config.value, json_normalizer: TJSONNormalizer = dlt.config.value + naming: Union[str, NamingConvention] = dlt.config.value, + json_normalizer: TJSONNormalizer = dlt.config.value, + allow_identifier_change_on_table_with_data: bool = None, ) -> TNormalizersConfig: """Gets explicitly configured normalizers - via config or destination caps. May return None as naming or normalizer""" - return {"names": naming, "json": json_normalizer} + norm_conf: TNormalizersConfig = {"names": naming, "json": json_normalizer} + if allow_identifier_change_on_table_with_data is not None: + norm_conf["allow_identifier_change_on_table_with_data"] = ( + allow_identifier_change_on_table_with_data + ) + return norm_conf @with_config @@ -37,39 +45,69 @@ def import_normalizers( normalizers_config["json"] = item_normalizer = normalizers_config["json"] or { "module": "dlt.common.normalizers.json.relational" } - try: - if "." in names: - # TODO: bump schema engine version and migrate schema. also change the name in TNormalizersConfig from names to naming - if names == "dlt.common.normalizers.names.snake_case": - names = DEFAULT_NAMING_MODULE - # this is full module name - naming_module = cast(SupportsNamingConvention, import_module(names)) - else: - # from known location - naming_module = cast( - SupportsNamingConvention, import_module(f"dlt.common.normalizers.naming.{names}") - ) - except ImportError: - raise UnknownNamingModule(names) - if not hasattr(naming_module, "NamingConvention"): - raise InvalidNamingModule(names) - # get max identifier length - if destination_capabilities: - max_length = min( - destination_capabilities.max_identifier_length, - destination_capabilities.max_column_identifier_length, - ) - else: - max_length = None json_module = cast(SupportsDataItemNormalizer, import_module(item_normalizer["module"])) return ( normalizers_config, - naming_module.NamingConvention(max_length), + naming_from_reference(names, destination_capabilities), json_module.DataItemNormalizer, ) +def naming_from_reference( + names: Union[str, NamingConvention], + destination_capabilities: DestinationCapabilitiesContext = None, +) -> NamingConvention: + """Resolves naming convention from reference in `names` and applies max length from `destination_capabilities` + + Reference may be: (1) actual instance of NamingConvention (2) shorthand name pointing to `dlt.common.normalizers.naming` namespace + (3) a type name which is a module containing `NamingConvention` attribute (4) a type of class deriving from NamingConvention + """ + + def _import_naming(module: str, cls: str) -> Type[NamingConvention]: + if "." in module or cls != "NamingConvention": + # TODO: bump schema engine version and migrate schema. also change the name in TNormalizersConfig from names to naming + if module == "dlt.common.normalizers.names.snake_case": + module = DEFAULT_NAMING_MODULE + # this is full module name + naming_module = import_module(module) + else: + # from known location + naming_module = import_module(f"dlt.common.normalizers.naming.{module}") + class_ = getattr(naming_module, cls, None) + if class_ is None: + raise UnknownNamingModule(module + "." + cls) + if inspect.isclass(class_) and issubclass(class_, NamingConvention): + return class_ + raise InvalidNamingModule(module, cls) + + if not isinstance(names, NamingConvention): + try: + class_ = _import_naming(names, "NamingConvention") + except ImportError: + parts = names.rsplit(".", 1) + # we have no more options to try + if len(parts) <= 1: + raise UnknownNamingModule(names) + try: + class_ = _import_naming(*parts) + except UnknownNamingModule: + raise + except ImportError: + raise UnknownNamingModule(names) + + # get max identifier length + if destination_capabilities: + max_length = min( + destination_capabilities.max_identifier_length, + destination_capabilities.max_column_identifier_length, + ) + else: + max_length = None + names = class_(max_length) + return names + + def generate_dlt_ids(n_ids: int) -> List[str]: return many_uniq_ids_base64(n_ids, DLT_ID_LENGTH_BYTES) From b97ae530989ca84c76071b996c404d81a663eddc Mon Sep 17 00:00:00 2001 From: Marcin Rudolf Date: Mon, 18 Mar 2024 14:36:38 +0100 Subject: [PATCH 032/105] fixes the cloning of schema in decorator, removes processing hints --- dlt/extract/decorators.py | 31 ++++++++++++++++++----------- tests/extract/test_decorators.py | 34 ++++++++++++++++++++++++++++++-- 2 files changed, 52 insertions(+), 13 deletions(-) diff --git a/dlt/extract/decorators.py b/dlt/extract/decorators.py index 6e916ff6e1..3607269b08 100644 --- a/dlt/extract/decorators.py +++ b/dlt/extract/decorators.py @@ -214,16 +214,16 @@ def decorator( source_sections = (known_sections.SOURCES, source_section, effective_name) conf_f = with_config(f, spec=spec, sections=source_sections) - def _eval_rv(_rv: Any) -> TDltSourceImpl: + def _eval_rv(_rv: Any, schema_copy: Schema) -> TDltSourceImpl: """Evaluates return value from the source function or coroutine""" if _rv is None: - raise SourceDataIsNone(schema.name) + raise SourceDataIsNone(schema_copy.name) # if generator, consume it immediately if inspect.isgenerator(_rv): _rv = list(_rv) # convert to source - s = _impl_cls.from_data(schema.clone(update_normalizers=True), source_section, _rv) + s = _impl_cls.from_data(schema_copy, source_section, _rv) # apply hints if max_table_nesting is not None: s.max_table_nesting = max_table_nesting @@ -235,7 +235,10 @@ def _eval_rv(_rv: Any) -> TDltSourceImpl: @wraps(conf_f) def _wrap(*args: Any, **kwargs: Any) -> TDltSourceImpl: """Wrap a regular function, injection context must be a part of the wrap""" - with Container().injectable_context(SourceSchemaInjectableContext(schema)): + # clone the schema passed to decorator, update normalizers, remove processing hints + # NOTE: source may be called several times in many different settings + schema_copy = schema.clone(update_normalizers=True, remove_processing_hints=True) + with Container().injectable_context(SourceSchemaInjectableContext(schema_copy)): # configurations will be accessed in this section in the source proxy = Container()[PipelineContext] pipeline_name = None if not proxy.is_active() else proxy.pipeline().pipeline_name @@ -243,18 +246,21 @@ def _wrap(*args: Any, **kwargs: Any) -> TDltSourceImpl: ConfigSectionContext( pipeline_name=pipeline_name, sections=source_sections, - source_state_key=schema.name, + source_state_key=schema_copy.name, ) ): rv = conf_f(*args, **kwargs) - return _eval_rv(rv) + return _eval_rv(rv, schema_copy) @wraps(conf_f) async def _wrap_coro(*args: Any, **kwargs: Any) -> TDltSourceImpl: """In case of co-routine we must wrap the whole injection context in awaitable, there's no easy way to avoid some code duplication """ - with Container().injectable_context(SourceSchemaInjectableContext(schema)): + # clone the schema passed to decorator, update normalizers, remove processing hints + # NOTE: source may be called several times in many different settings + schema_copy = schema.clone(update_normalizers=True, remove_processing_hints=True) + with Container().injectable_context(SourceSchemaInjectableContext(schema_copy)): # configurations will be accessed in this section in the source proxy = Container()[PipelineContext] pipeline_name = None if not proxy.is_active() else proxy.pipeline().pipeline_name @@ -262,11 +268,11 @@ async def _wrap_coro(*args: Any, **kwargs: Any) -> TDltSourceImpl: ConfigSectionContext( pipeline_name=pipeline_name, sections=source_sections, - source_state_key=schema.name, + source_state_key=schema_copy.name, ) ): rv = await conf_f(*args, **kwargs) - return _eval_rv(rv) + return _eval_rv(rv, schema_copy) # get spec for wrapped function SPEC = get_fun_spec(conf_f) @@ -732,8 +738,11 @@ def _maybe_load_schema_for_callable(f: AnyFun, name: str) -> Optional[Schema]: try: file = inspect.getsourcefile(f) if file: - return SchemaStorage.load_schema_file(os.path.dirname(file), name) - + schema = SchemaStorage.load_schema_file( + os.path.dirname(file), name, remove_processing_hints=True + ) + schema.update_normalizers() + return schema except SchemaNotFoundError: pass return None diff --git a/tests/extract/test_decorators.py b/tests/extract/test_decorators.py index 03f87db923..21d620c8ed 100644 --- a/tests/extract/test_decorators.py +++ b/tests/extract/test_decorators.py @@ -42,7 +42,7 @@ ) from dlt.extract.items import TableNameMeta -from tests.common.utils import IMPORTED_VERSION_HASH_ETH_V9 +from tests.common.utils import IMPORTED_VERSION_HASH_ETH_V9, load_yml_case def test_none_returning_source() -> None: @@ -87,7 +87,10 @@ def test_load_schema_for_callable() -> None: schema = s.schema assert schema.name == "ethereum" == s.name # the schema in the associated file has this hash - assert schema.stored_version_hash == IMPORTED_VERSION_HASH_ETH_V9 + eth_v9 = load_yml_case("schemas/eth/ethereum_schema_v9") + # source removes processing hints so we do + reference_schema = Schema.from_dict(eth_v9, remove_processing_hints=True) + assert schema.stored_version_hash == reference_schema.stored_version_hash def test_unbound_parametrized_transformer() -> None: @@ -545,6 +548,21 @@ def created_global(): _assert_source_schema(created_global(), "global") +def test_source_schema_removes_processing_hints() -> None: + eth_V9 = load_yml_case("schemas/eth/ethereum_schema_v9") + assert "x-normalizer" in eth_V9["tables"]["blocks"] + + @dlt.source(schema=Schema.from_dict(eth_V9)) + def created_explicit(): + schema = dlt.current.source_schema() + assert schema.name == "ethereum" + assert "x-normalizer" not in schema.tables["blocks"] + return dlt.resource([1, 2, 3], name="res") + + source = created_explicit() + assert "x-normalizer" not in source.schema.tables["blocks"] + + def test_source_state_context() -> None: @dlt.resource(selected=False) def main(): @@ -781,6 +799,18 @@ def test_standalone_transformer() -> None: ] +def test_transformer_required_args() -> None: + @dlt.transformer + def path_params(id_, workspace_id, load_id, base: bool = False): + yield {"id": id_, "workspace_id": workspace_id, "load_id": load_id} + + data = list([1, 2, 3] | path_params(121, 343)) + assert len(data) == 3 + assert data[0] == {"id": 1, "workspace_id": 121, "load_id": 343} + + # @dlt + + @dlt.transformer(standalone=True, name=lambda args: args["res_name"]) def standalone_tx_with_name(item: TDataItem, res_name: str, init: int = dlt.config.value): return res_name * item * init From 0132c2fe15b3916c82c2940fdbe8dfc58fb122a2 Mon Sep 17 00:00:00 2001 From: Marcin Rudolf Date: Mon, 18 Mar 2024 14:38:13 +0100 Subject: [PATCH 033/105] removes processing hints when saving imported schema --- dlt/common/storages/live_schema_storage.py | 11 +++---- dlt/common/storages/schema_storage.py | 30 +++++++++++++++----- tests/common/storages/test_schema_storage.py | 24 +++++++++++++++- tests/extract/test_extract_pipe.py | 13 +++++++++ 4 files changed, 63 insertions(+), 15 deletions(-) diff --git a/dlt/common/storages/live_schema_storage.py b/dlt/common/storages/live_schema_storage.py index d3d5f14fe5..6ea921f131 100644 --- a/dlt/common/storages/live_schema_storage.py +++ b/dlt/common/storages/live_schema_storage.py @@ -24,11 +24,6 @@ def __getitem__(self, name: str) -> Schema: return schema - # def load_schema(self, name: str) -> Schema: - # self.commit_live_schema(name) - # # now live schema is saved so we can load it with the changes - # return super().load_schema(name) - def save_schema(self, schema: Schema) -> str: rv = super().save_schema(schema) # update the live schema with schema being saved, if no live schema exist, create one to be available for a getter @@ -45,8 +40,10 @@ def save_import_schema_if_not_exists(self, schema: Schema) -> None: try: self._load_import_schema(schema.name) except FileNotFoundError: - # save import schema only if it not exist - self._export_schema(schema, self.config.import_schema_path) + # save import schema only if it does not exist + self._export_schema( + schema, self.config.import_schema_path, remove_processing_hints=True + ) def commit_live_schema(self, name: str) -> Schema: # if live schema exists and is modified then it must be used as an import schema diff --git a/dlt/common/storages/schema_storage.py b/dlt/common/storages/schema_storage.py index 4745d50dcc..f86918f808 100644 --- a/dlt/common/storages/schema_storage.py +++ b/dlt/common/storages/schema_storage.py @@ -55,6 +55,14 @@ def load_schema(self, name: str) -> Schema: return Schema.from_dict(storage_schema) def save_schema(self, schema: Schema) -> str: + """Saves schema to the storage and returns the path relative to storage. + + If import schema path is configured and import schema with schena.name exits, it + will be linked to `schema` via `_imported_version_hash`. Such hash is used in `load_schema` to + detect if import schema changed and thus to overwrite the storage schema. + + If export schema path is configured, `schema` will be exported to it. + """ # check if there's schema to import if self.config.import_schema_path: try: @@ -103,9 +111,9 @@ def _maybe_import_schema(self, name: str, storage_schema: DictStrAny = None) -> rv_schema: Schema = None try: imported_schema = self._load_import_schema(name) + rv_schema = Schema.from_dict(imported_schema) if storage_schema is None: # import schema when no schema in storage - rv_schema = Schema.from_dict(imported_schema) # if schema was imported, overwrite storage schema rv_schema._imported_version_hash = rv_schema.version_hash self._save_schema(rv_schema) @@ -117,7 +125,6 @@ def _maybe_import_schema(self, name: str, storage_schema: DictStrAny = None) -> else: # import schema when imported schema was modified from the last import sc = Schema.from_dict(storage_schema) - rv_schema = Schema.from_dict(imported_schema) if rv_schema.version_hash != sc._imported_version_hash: # use imported schema but version must be bumped and imported hash set rv_schema._stored_version = sc.stored_version + 1 @@ -153,14 +160,18 @@ def _load_import_schema(self, name: str) -> DictStrAny: import_storage.load(schema_file), self.config.external_schema_format ) - def _export_schema(self, schema: Schema, export_path: str) -> None: + def _export_schema( + self, schema: Schema, export_path: str, remove_processing_hints: bool = False + ) -> None: if self.config.external_schema_format == "json": exported_schema_s = schema.to_pretty_json( - remove_defaults=self.config.external_schema_format_remove_defaults + remove_defaults=self.config.external_schema_format_remove_defaults, + remove_processing_hints=remove_processing_hints, ) elif self.config.external_schema_format == "yaml": exported_schema_s = schema.to_pretty_yaml( - remove_defaults=self.config.external_schema_format_remove_defaults + remove_defaults=self.config.external_schema_format_remove_defaults, + remove_processing_hints=remove_processing_hints, ) else: raise ValueError(self.config.external_schema_format) @@ -180,14 +191,19 @@ def _save_schema(self, schema: Schema) -> str: @staticmethod def load_schema_file( - path: str, name: str, extensions: Tuple[TSchemaFileFormat, ...] = SchemaFileExtensions + path: str, + name: str, + extensions: Tuple[TSchemaFileFormat, ...] = SchemaFileExtensions, + remove_processing_hints: bool = False, ) -> Schema: storage = FileStorage(path) for extension in extensions: file = SchemaStorage._file_name_in_store(name, extension) if storage.has_file(file): parsed_schema = SchemaStorage._parse_schema_str(storage.load(file), extension) - schema = Schema.from_dict(parsed_schema) + schema = Schema.from_dict( + parsed_schema, remove_processing_hints=remove_processing_hints + ) if schema.name != name: raise UnexpectedSchemaName(name, path, schema.name) return schema diff --git a/tests/common/storages/test_schema_storage.py b/tests/common/storages/test_schema_storage.py index 0e04554649..65272fab46 100644 --- a/tests/common/storages/test_schema_storage.py +++ b/tests/common/storages/test_schema_storage.py @@ -4,7 +4,7 @@ import yaml from dlt.common import json -from dlt.common.normalizers import explicit_normalizers +from dlt.common.normalizers.utils import explicit_normalizers from dlt.common.schema.schema import Schema from dlt.common.schema.typing import TStoredSchema from dlt.common.storages.exceptions import ( @@ -309,6 +309,28 @@ def test_schema_from_file() -> None: ) +def test_save_initial_import_schema(ie_storage: LiveSchemaStorage) -> None: + # no schema in regular storage + with pytest.raises(SchemaNotFoundError): + ie_storage.load_schema("ethereum") + + # save initial import schema where processing hints are removed + eth_V9 = load_yml_case("schemas/eth/ethereum_schema_v9") + schema = Schema.from_dict(eth_V9) + ie_storage.save_import_schema_if_not_exists(schema) + # should be available now + eth = ie_storage.load_schema("ethereum") + assert "x-normalizer" not in eth.tables["blocks"] + + # won't overwrite initial schema + del eth_V9["tables"]["blocks__uncles"] + schema = Schema.from_dict(eth_V9) + ie_storage.save_import_schema_if_not_exists(schema) + # should be available now + eth = ie_storage.load_schema("ethereum") + assert "blocks__uncles" in eth.tables + + # def test_save_empty_schema_name(storage: SchemaStorage) -> None: # schema = Schema("") # schema.settings["schema_sealed"] = True diff --git a/tests/extract/test_extract_pipe.py b/tests/extract/test_extract_pipe.py index 68c1c82124..fd0d210ebd 100644 --- a/tests/extract/test_extract_pipe.py +++ b/tests/extract/test_extract_pipe.py @@ -505,6 +505,19 @@ def test_pipe_copy_on_fork() -> None: assert elems[0].item is not elems[1].item +def test_pipe_pass_empty_list() -> None: + def _gen(): + yield [] + + pipe = Pipe.from_data("data", _gen()) + elems = list(PipeIterator.from_pipe(pipe)) + assert elems[0].item == [] + + pipe = Pipe.from_data("data", [[]]) + elems = list(PipeIterator.from_pipe(pipe)) + assert elems[0].item == [] + + def test_clone_single_pipe() -> None: doc = {"e": 1, "l": 2} parent = Pipe.from_data("data", [doc]) From 2a7c5dd049704376fd8db2f581269147189237bb Mon Sep 17 00:00:00 2001 From: Marcin Rudolf Date: Mon, 18 Mar 2024 14:38:37 +0100 Subject: [PATCH 034/105] adds docs on naming conventions, removes technical docs --- docs/technical/README.md | 10 - docs/technical/create_pipeline.md | 441 ------------------ docs/technical/customization_and_hacking.md | 60 --- docs/technical/general_usage.md | 38 -- docs/technical/working_with_schemas.md | 178 ------- .../docs/general-usage/naming-convention.md | 44 ++ 6 files changed, 44 insertions(+), 727 deletions(-) delete mode 100644 docs/technical/README.md delete mode 100644 docs/technical/create_pipeline.md delete mode 100644 docs/technical/customization_and_hacking.md create mode 100644 docs/website/docs/general-usage/naming-convention.md diff --git a/docs/technical/README.md b/docs/technical/README.md deleted file mode 100644 index 6e2b5048a8..0000000000 --- a/docs/technical/README.md +++ /dev/null @@ -1,10 +0,0 @@ -## Finished documents - -1. [general_usage.md](general_usage.md) -2. [create_pipeline.md](create_pipeline.md) -3. [secrets_and_config.md](secrets_and_config.md) -4. [working_with_schemas.md](working_with_schemas.md) - -## In progress - -5. [customization_and_hacking.md](customization_and_hacking.md) diff --git a/docs/technical/create_pipeline.md b/docs/technical/create_pipeline.md deleted file mode 100644 index f6603d08b8..0000000000 --- a/docs/technical/create_pipeline.md +++ /dev/null @@ -1,441 +0,0 @@ -# Create Pipeline -marks features that are: - -⛔ not implemented, hard to add - -☮️ not implemented, easy to add - - -## Example from `dlt` module docstring -It is possible to create "intuitive" pipeline just by providing a list of objects to `dlt.run` methods No decorators and secret files, configurations are necessary. - -```python -import dlt -from dlt.sources.helpers import requests - -dlt.run( - requests.get("https://api.chess.com/pub/player/magnuscarlsen/games/2022/11").json()["games"], - destination="duckdb", - table_name="magnus_games" -) -``` - -Run your pipeline script -`$ python magnus_games.py` - -See and query your data with autogenerated Streamlit app -`$ dlt pipeline dlt_magnus_games show` - -## Source extractor function the preferred way -General guidelines: -1. the source extractor is a function decorated with `@dlt.source`. that function **yields** or **returns** a list of resources. -2. resources are generator functions that always **yield** data (enforced by exception which I hope is user friendly). Access to external endpoints, databases etc. should happen from that generator function. Generator functions may be decorated with `@dlt.resource` to provide alternative names, write disposition etc. -3. resource generator functions can be OFC parametrized and resources may be created dynamically -4. the resource generator function may yield **anything that is json serializable**. we prefer to yield _dict_ or list of dicts. -> yielding lists is much more efficient in terms of processing! -5. like any other iterator, the @dlt.source and @dlt.resource **can be iterated and thus extracted and loaded only once**, see example below. - -**Remarks:** - -1. the **@dlt.resource** let's you define the table schema hints: `name`, `write_disposition`, `columns` -2. the **@dlt.source** let's you define global schema props: `name` (which is also source name), `schema` which is Schema object if explicit schema is provided `nesting` to set nesting level etc. -3. decorators can also be used as functions ie in case of dlt.resource and `lazy_function` (see examples) - -```python -endpoints = ["songs", "playlist", "albums"] -# return list of resourced -return [dlt.resource(lazy_function(endpoint, name=endpoint) for endpoint in endpoints)] - -``` - -### Extracting data -Source function is not meant to extract the data, but in many cases getting some metadata ie. to generate dynamic resources (like in case of google sheets example) is unavoidable. The source function's body is evaluated **outside** the pipeline `run` (if `dlt.source` is a generator, it is immediately consumed). - -Actual extraction of the data should happen inside the `dlt.resource` which is lazily executed inside the `dlt` pipeline. - -> both a `dlt` source and resource are regular Python iterators and can be passed to any python function that accepts them ie to `list`. `dlt` will evaluate such iterators, also parallel and async ones and provide mock state to it. - -## Multiple resources and resource selection when loading -The source extraction function may contain multiple resources. The resources can be defined as multiple resource functions or created dynamically ie. with parametrized generators. -The user of the pipeline can check what resources are available and select the resources to load. - - -**each resource has a a separate resource function** -```python -from dlt.sources.helpers import requests -import dlt - -@dlt.source -def hubspot(...): - - @dlt.resource(write_disposition="replace") - def users(): - # calls to API happens here - ... - yield users - - @dlt.resource(write_disposition="append") - def transactions(): - ... - yield transactions - - # return a list of resources - return users, transactions - -# load all resources -taktile_data(1).run(destination=bigquery) -# load only decisions -taktile_data(1).with_resources("decisions").run(....) - -# alternative form: -source = taktile_data(1) -# select only decisions to be loaded -source.resources.select("decisions") -# see what is selected -print(source.selected_resources) -# same as this -print(source.resources.selected) -``` - -Except being accessible via `source.resources` dictionary, **every resource is available as an attribute of the source**. For the example above -```python -print(list(source.decisions)) # will iterate decisions resource -source.logs.selected = False # deselect resource -``` - -## Resources may be created dynamically -Here we implement a single parametrized function that **yields** data and we call it repeatedly. Mind that the function body won't be executed immediately, only later when generator is consumed in extract stage. - -```python - -@dlt.source -def spotify(): - - endpoints = ["songs", "playlists", "albums"] - - def get_resource(endpoint): - # here we yield the whole response - yield requests.get(url + "/" + endpoint).json() - - # here we yield resources because this produces cleaner code - for endpoint in endpoints: - # calling get_resource creates generator, the actual code of the function will be executed in extractor - yield dlt.resource(get_resource(endpoint), name=endpoint) - -``` - -## Unbound (parametrized) resources -Imagine the situation in which you have a resource for which you want (or require) user to pass some options ie. the number of records returned. - -> try it, it is ⚡ powerful - -1. In all examples above you do that via the source and returned resources are not parametrized. -OR -2. You can return a **parametrized (unbound)** resources from the source. - -```python - -@dlt.source -def chess(chess_api_url): - - # let people choose player title, the default is grand master - @dlt.resource - def players(title_filter="GM", max_results=10): - yield - - # ❗ return the players without the calling - return players - -s = chess("url") -# let's parametrize the resource to select masters. you simply call `bind` method on the resource to bind it -# if you do not bind it, the default values are used -s.players.bind("M", max_results=1000) -# load the masters -s.run() - -``` - -## A standalone @resource -A general purpose resource (ie. jsonl reader, generic sql query reader etc.) that you want to add to any of your sources or multiple instances of it to your pipelines? -Yeah definitely possible. Just replace `@source` with `@resource` decorator. - -```python -@dlt.resource(name="logs", write_disposition="append") -def taktile_data(initial_log_id, taktile_api_key=dlt.secret.value): - - # yes, this will also work but data will be obtained immediately when taktile_data() is called. - resp = requests.get( - "https://taktile.com/api/v2/logs?from_log_id=%i" % initial_log_id, - headers={"Authorization": taktile_api_key}) - resp.raise_for_status() - for item in resp.json()["result"]: - yield item - -# this will load the resource into default schema. see `general_usage.md) -dlt.run(source=taktile_data(1), destination=bigquery) - -``` -How standalone resource works: -1. It can be used like a source that contains only one resource (ie. single endpoint) -2. The main difference is that when extracted it will join the default schema in the pipeline (or explicitly passed schema) -3. It can be called from a `@source` function and then it becomes a resource of that source and joins the source schema - -## `dlt` state availability - -The state is a python dictionary-like object that is available within the `@dlt.source` and `@dlt.resource` decorated functions and may be read and written to. -The data within the state is loaded into destination together with any other extracted data and made automatically available to the source/resource extractor functions when they are run next time. -When using the state: -* Any JSON-serializable values can be written and the read from the state. -* The state available in the `dlt source` is read only and any changes will be discarded. Still it may be used to initialize the resources. -* The state available in the `dlt resource` is writable and written values will be available only once - -### State sharing and isolation across sources - -1. Each source and resources **in the same Python module** (no matter if they are standalone, inner or created dynamically) share the same state dictionary and is separated from other sources -2. Source accepts `section` argument which creates a separate state for that resource (and separate configuration as well). All sources with the same `section` share the state. -2. All the standalone resources and generators that do not belong to any source share the same state when being extracted (they are extracted withing ad-hoc created source) - -## Stream resources: dispatching data to several tables from single resources -What about resource like rasa tracker or singer tap that send a stream of events that should be routed to different tables? we have an answer (actually two): -1. in many cases the table name is based on the data item content (ie. you dispatch events of given type to different tables by event type). We can pass a function that takes the data item as input and returns table name. -```python -# send item to a table with name item["type"] -@dlt.resource(table_name=lambda i: i['type']) -def repo_events() -> Iterator[TDataItems]: - yield item -``` - -2. You can mark the yielded data with a table name (`dlt.mark.with_table_name`). This gives you full control on the name of the table - -see [here](docs/examples/sources/rasa/rasa.py) and [here](docs/examples/sources/singer_tap.py). - -## Source / resource config sections and arguments injection -You should read [secrets_and_config](secrets_and_config.md) now to understand how configs and credentials are passed to the decorated functions and how the users of them can configure their projects. - -Also look at the following [test](/tests/extract/test_decorators.py) : `test_source_sections` - -## Example sources and resources - -### With inner resource function -Resource functions can be placed inside the source extractor function. That lets them get access to source function input arguments and all the computations within the source function via so called closure. - -```python -from dlt.sources.helpers import requests -import dlt - -# the `dlt.source` tell the library that the decorated function is a source -# it will use function name `taktile_data` to name the source and the generated schema by default -# in general `@source` should **return** a list of resources or list of generators (function that yield data) -# @source may also **yield** resources or generators - if yielding is more convenient -# if @source returns or yields data - this will generate exception with a proper explanation. dlt user can always load the data directly without any decorators like in the previous example! -@dlt.source -def taktile_data(initial_log_id, taktile_api_key=dlt.secret.value): - - # the `dlt.resource` tells the `dlt.source` that the function defines a resource - # will use function name `logs` as resource/table name by default - # the function should **yield** the data items one by one or **yield** a list. - # here the decorator is optional: there are no parameters to `dlt.resource` - @dlt.resource - def logs(): - resp = requests.get( - "https://taktile.com/api/v2/logs?from_log_id=%i" % initial_log_id, - headers={"Authorization": taktile_api_key}) - resp.raise_for_status() - # option 1: yield the whole list - yield resp.json()["result"] - # or -> this is useful if you deal with a stream of data and for that you need an API that supports that, for example you could yield lists containing paginated results - for item in resp.json()["result"]: - yield item - - # as mentioned we return a resource or a list of resources - return logs - # this will also work - # return logs() -``` - -### With outer generator yielding data, and @resource created dynamically -```python - -def taktile_logs_data(initial_log_id, taktile_api_key=dlt.secret.value) - yield data - - -@dlt.source -def taktile_data(initial_log_id, taktile_api_key): - # pass the arguments and convert to resource - return dlt.resource(taktile_logs_data(initial_log_id, taktile_api_key), name="logs", write_disposition="append") -``` - -### A source with resources defined elsewhere -Example of the above -```python -from taktile.resources import logs - -@dlt.source -def taktile_data(initial_log_id, taktile_api_key=dlt.secret.value): - return logs(initial_log_id, taktile_api_key) -``` - -## Advanced Topics - -### Transformers ⚡ -This happens all the time: -1. We have an endpoint that returns a list of users and then we must get each profile with a separate call. -2. The situation above is getting even more complicated when we need that list in two places in our source ie. we want to get the profiles but also a list of transactions per user. - -Ideally we would obtain the list only once and then call and yield from the profiles and transactions endpoint in parallel so the extraction time is minimized. - -Here's example how to do that: [run resources and transformers in parallel threads](/docs/examples/chess/chess.py) and test named `test_evolve_schema` - -More on transformers: -1. you can have unbound (parametrized) transformers as well -2. you can use pipe '|' operator to pipe data from resources to transformers instead of binding them statically with `data_from`. -> see our [singer tap](/docs/examples/singer_tap_jsonl_example.py) example where we pipe a stream of document from `jsonl` into `raw_singer_tap` which is a standalone, unbound ⚡ transformer. -3. If transformer yields just one element you can `return` it instead. This allows you to apply the `retry` and `defer` (parallel execution) decorators directly to it. - -#### Transformer example - -Here we have a list of huge documents and we want to load into several tables. - -```python -@dlt.source -def spotify(): - - # deselect by default, we do not want to load the huge doc - @dlt.resource(selected=False) - def get_huge_doc(): - return requests.get(...) - - # make songs and playlists to be dependent on get_huge_doc - @dlt.transformer(data_from=get_huge_doc) - def songs(huge_doc): - yield huge_doc["songs"] - - @dlt.transformer(data_from=get_huge_doc) - def playlists(huge_doc): - yield huge_doc["playlists"] - - # as you can see the get_huge_doc is not even returned, nevertheless it will be evaluated (only once) - # the huge doc will not be extracted and loaded - return songs, playlists - # we could also use the pipe operator, intead of providing_data from - # return get_huge_doc | songs, get_huge_doc | playlists -``` - -## Data item transformations - -You can attach any number of transformations to your resource that are evaluated on item per item basis. The available transformation types: -* map - transform the data item -* filter - filter the data item -* yield map - a map that returns iterator (so single row may generate many rows) - -You can add and insert transformations on the `DltResource` object (ie. decorated function) -* resource.add_map -* resource.add_filter -* resource.add_yield_map - -> Transformations always deal with single items even if you return lists. - -You can add transformations to a resource (also within a source) **after it is created**. This allows to customize existing pipelines. The transformations may -be distributed with the pipeline or written ad hoc in pipeline script. -```python -# anonymize creates nice deterministic hash for any hashable data type (not implemented yet:) -from dlt.helpers import anonymize - -# example transformation provided by the user -def anonymize_user(user_data): - user_data["user_id"] = anonymize(user_data["user_id"]) - user_data["user_email"] = anonymize(user_data["user_email"]) - return user_data - -@dlt.source -def pipedrive(...): - ... - - @dlt.resource(write_disposition="replace") - def users(): - ... - users = requests.get(...) - ... - yield users - - return users, deals, customers -``` - -in pipeline script: -1. we want to remove user with id == "me" -2. we want to anonymize user data -3. we want to pivot `user_props` into KV table - -```python -from pipedrive import pipedrive, anonymize_user - -source = pipedrive() -# access resource in the source by name and add filter and map transformation -source.users.add_filter(lambda user: user["user_id"] != "me").add_map(anonymize_user) -# now we want to yield user props to separate table. we define our own generator function -def pivot_props(user): - # keep user - yield user - # yield user props to user_props table - yield from [ - dlt.mark.with_table_name({"user_id": user["user_id"], "name": k, "value": v}, "user_props") for k, v in user["props"] - ] - -source.user.add_yield_map(pivot_props) -pipeline.run(source) -``` - -We provide a library of various concrete transformations: - -* ☮️ a recursive versions of the map, filter and flat map which can be applied to any nesting level of the data item (the standard transformations work on recursion level 0). Possible applications - - ☮️ recursive rename of dict keys - - ☮️ converting all values to strings - - etc. - -## Some CS Theory - -### The power of decorators - -With decorators dlt can inspect and modify the code being decorated. -1. it knows what are the sources and resources without running them -2. it knows input arguments so it knows the config values and secret values (see `secrets_and_config`). with those we can generate deployments automatically -3. it can inject config and secret values automatically -4. it wraps the functions into objects that provide additional functionalities -- sources and resources are iterators so you can write -```python -items = list(source()) - -for item in source()["logs"]: - ... -``` -- you can select which resources to load with `source().select(*names)` -- you can add mappings and filters to resources - -### The power of yielding: The preferred way to write resources - -The Python function that yields is not a function but magical object that `dlt` can control: -1. it is not executed when you call it! the call just creates a generator (see below). in the example above `taktile_data(1)` will not execute the code inside, it will just return an object composed of function code and input parameters. dlt has control over the object and can execute the code later. this is called `lazy execution` -2. i can control when and how much of the code is executed. the function that yields typically looks like that - -```python -def lazy_function(endpoint_name): - # INIT - this will be executed only once when dlt wants! - get_configuration() - from_item = dlt.current.state.get("last_item", 0) - l = get_item_list_from_api(api_key, endpoint_name) - - # ITERATOR - this will be executed many times also when dlt wants more data! - for item in l: - yield requests.get(url, api_key, "%s?id=%s" % (endpoint_name, item["id"])).json() - # CLEANUP - # this will be executed only once after the last item was yielded! - dlt.current.state["last_item"] = item["id"] -``` - -3. dlt will execute this generator in extractor. the whole execution is atomic (including writing to state). if anything fails with exception the whole extract function fails. -4. the execution can be parallelized by using a decorator or a simple modifier function ie: -```python -for item in l: - yield deferred(requests.get(url, api_key, "%s?id=%s" % (endpoint_name, item["id"])).json()) -``` \ No newline at end of file diff --git a/docs/technical/customization_and_hacking.md b/docs/technical/customization_and_hacking.md deleted file mode 100644 index 6dad02068c..0000000000 --- a/docs/technical/customization_and_hacking.md +++ /dev/null @@ -1,60 +0,0 @@ -# Customization - -Customizations allow the user to change `dlt` behaviour without modifying the source code (which we call `hacking` 😄) Most of the customizations require writing python on yaml snipppets. - -⛔ not implemented, hard to add - -☮️ not implemented, easy to add - -# in schema file - -## global settings -- default column hints, types -- column propagation -- max nesting -- choose type autodetectors -- ⛔ add custom type autodetectors - more powerful than you think - - -## table/column settings -- table and column hints -- include and exclude filters -- ⛔ last value as decorator for common cases (ie. jsonpath + max operator + automatic filtering of the results) - -# source and resource creation -when you implement new source/resource - -## source -- providing custom schema via file -- providing custom schema in the code + decorator -- providing the nesting level via decorator - -## resource -- providing table schema via hints (that includes the column definitions and column hints) -- resources may be parametrized (generators!) -- transformers also may be prametrized! (tutorial in progress) -- yielding metadata with the data items -- yielding custom data (ie. panda frames) (yes but last lambda must convert it to ) - -## extraction -- [retry with the decorator](/docs/examples/chess/chess.py) -- [run resources and transformers in parallel threads](/docs/examples/chess/chess.py) and test named `test_evolve_schema` -- run async resources and transformers - -# source and resource modifications -- resource selection - -## modification of sources and resources after they are created -must be done before passing to `run` method. - -- adding custom resources and transformers to the pipeline after it is created -- easy change the table name for a resource (currently the whole template must be changed) -- ☮️ adding stateles lambdas (row transformations) to the resources: map, filter, flat_map (reverse pivot) -- ☮️ adding stateful lambdas (row transformations with the write access to pipeline state) -- change the source name - - -# pipeline callbacks and hooks -those are not implemented -https://github.com/dlt-hub/dlt/issues/63 - diff --git a/docs/technical/general_usage.md b/docs/technical/general_usage.md index efbb2fd506..7ce8a91b46 100644 --- a/docs/technical/general_usage.md +++ b/docs/technical/general_usage.md @@ -181,44 +181,6 @@ The `run`, `extract`, `normalize` and `load` method raise `PipelineStepFailed` w > should we add it? I have a runner in `dlt` that would be easy to modify -## the `Pipeline` object -There are many ways to create or get current pipeline object. -```python - -# create and get default pipeline -p1 = dlt.pipeline() -# create explicitly configured pipeline -p2 = dlt.pipeline(pipeline_name="pipe", destination=bigquery) -# get recently created pipeline -assert dlt.pipeline() is p2 -# load data with recently created pipeline -assert dlt.run(taktile_data()) is p2 -assert taktile_data().run() is p2 - -``` - -The `Pipeline` object provides following functionalities: -1. `run`, `extract`, `normalize` and `load` methods -2. a `pipeline.schema` dictionary-like object to enumerate and get the schemas in pipeline -3. schema get with `pipeline.schemas[name]` is a live object: any modification to it is automatically applied to the pipeline with the next `run`, `load` etc. see [working_with_schemas.md](working_with_schemas.md) -4. it returns `sql_client` and `native_client` to get direct access to the destination (if destination supports SQL - currently all of them do) -5. it has several methods to inspect the pipeline state and I think those should be exposed via `dlt pipeline` CLI - -for example: -- list the extracted files if any -- list the load packages ready to load -- list the failed jobs in package -- show info on destination: what are the datasets, the current load_id, the current schema etc. - - -## Examples -[we have some here](/docs/examples/) - -## command line interface - - -## logging -I need your input for user friendly logging. What should we log? What is important to see? ## pipeline runtime setup diff --git a/docs/technical/working_with_schemas.md b/docs/technical/working_with_schemas.md index d94edb8727..532f0e5a1d 100644 --- a/docs/technical/working_with_schemas.md +++ b/docs/technical/working_with_schemas.md @@ -1,134 +1,7 @@ -## General approach to define schemas -marks features that are: - -⛔ not implemented, hard to add - -☮️ not implemented, easy to add - -## Schema components - -### Schema content hash and version -Each schema file contains content based hash `version_hash` that is used to -1. detect manual changes to schema (ie. user edits content) -2. detect if the destination database schema is synchronized with the file schema - -Each time the schema is saved, the version hash is updated. - -Each schema contains also numeric version which increases automatically whenever schema is updated and saved. This version is mostly for informative purposes and there are cases where the increasing order will be lost. - -> Schema in the database is only updated if its hash is not stored in `_dlt_versions` table. In principle many pipelines may send data to a single dataset. If table name clash then a single table with the union of the columns will be created. If columns clash and they have different types etc. then the load will fail. - -### ❗ Normalizer and naming convention - -The parent table is created from all top level fields, if field are dictionaries they will be flattened. **all the key names will be converted with the configured naming convention**. The current naming convention -1. converts to snake_case, small caps. removes all ascii characters except alphanum and underscore -2. add `_` if name starts with number -3. multiples of `_` are converted into single `_` -4. the parent-child relation is expressed as double `_` in names. - -The nested lists will be converted into child tables. - -The data normalizer and the naming convention are part of the schema configuration. In principle the source can set own naming convention or json unpacking mechanism. Or user can overwrite those in `config.toml` - -> The table and column names are mapped automatically. **you cannot rename the columns or tables by changing the `name` property - you must rename your source documents** - -> if you provide any schema elements that contain identifiers via decorators or arguments (ie. `table_name` or `columns`) all the names used will be converted via the naming convention when adding to the schema. For example if you execute `dlt.run(... table_name="CamelCase")` the data will be loaded into `camel_case` - -> 💡 use simple, short small caps identifiers for everything! - -☠️ not implemented! - -⛔ The schema holds lineage information (from json paths to tables/columns) and (1) automatically adapts to destination limits ie. postgres 64 chars by recomputing all names (2) let's user to change the naming convention ie. to verbatim naming convention of `duckdb` where everything is allowed as identifier. - -⛔ Any naming convention generates name clashes. `dlt` detects and fixes name clashes using lineage information - - -#### JSON normalizer settings -Yes those are part of the normalizer module and can be plugged in. -1. column propagation from parent to child tables -2. nesting level - -```yaml -normalizers: - names: dlt.common.normalizers.names.snake_case - json: - module: dlt.common.normalizers.json.relational - config: - max_nesting: 5 - propagation: - # for all root tables - root: - # propagate root dlt id - _dlt_id: _dlt_root_id - tables: - # for particular tables - blocks: - # propagate timestamp as block_timestamp to child tables - timestamp: block_timestamp - hash: block_hash -``` - -## Data types -"text", "double", "bool", "timestamp", "bigint", "binary", "complex", "decimal", "wei" -⛔ you cannot specify scale and precision for bigint, binary, text and decimal - -☮️ there's no time and date type - -wei is a datatype that tries to best represent native Ethereum 256bit integers and fixed point decimals. it works correcly on postgres and bigquery ## Schema settings The `settings` section of schema let's you define various global rules that impact how tables and columns are inferred from data. -> 💡 it is the best practice to use those instead of providing the exact column schemas via `columns` argument or by pasting them in `yaml`. Any ideas for improvements? tell me. - -### Column hint rules -You can define a global rules that will apply hints to a newly inferred columns. Those rules apply to normalized column names. You can use column names directly or with regular expressions. ❗ when lineages are implemented the regular expressions will apply to lineages not to column names. - -Example from ethereum schema -```yaml -settings: - default_hints: - foreign_key: - - _dlt_parent_id - not_null: - - re:^_dlt_id$ - - _dlt_root_id - - _dlt_parent_id - - _dlt_list_idx - unique: - - _dlt_id - cluster: - - block_hash - partition: - - block_timestamp -``` - -### Preferred data types -You can define rules that will set the data type for newly created columns. Put the rules under `preferred_types` key of `settings`. On the left side there's a rule on a column name, on the right side is the data type. ❗See the column hint rules for naming convention! - -Example: -```yaml -settings: - preferred_types: - timestamp: timestamp - re:^inserted_at$: timestamp - re:^created_at$: timestamp - re:^updated_at$: timestamp - re:^_dlt_list_idx$: bigint -``` - -### data type autodetectors -You can define a set of functions that will be used to infer the data type of the column from a value. The functions are run from top to bottom on the lists. Look in `detections.py` to see what is available. -```yaml -settings: - detections: - - timestamp - - iso_timestamp - - iso_date -``` - -⛔ we may define `all_text` function that will generate string only schemas by telling `dlt` that all types should be coerced to strings. - ### Table exclude and include filters You can define the include and exclude filters on tables but you are much better off transforming and filtering your source data in python. The current implementation is both weird and quite powerful. In essence you can exclude columns and whole tables with regular expressions to which the inputs are normalized lineages of the values. Example @@ -191,54 +64,3 @@ p.run() ``` > The `normalize` stage creates standalone load packages each containing data and schema with particular version. Those packages are of course not impacted by the "live" schema changes. - -## Attaching schemas to sources -The general approach when creating a new pipeline is to setup a few global schema settings and then let the table and column schemas to be generated from the resource hints and data itself. - -> ⛔ I do not have any cool "schema builder" api yet to see the global settings. - -The `dlt.source` decorator accepts a schema instance that you can create yourself and whatever you want. It also support a few typical use cases: - -### Schema created implicitly by decorator -If no schema instance is passed, the decorator creates a schema with the name set to source name and all the settings to default. - -### Automatically load schema file stored with source python module -If no schema instance is passed, and a file with a name `{source name}_schema.yml` exists in the same folder as the module with the decorated function, it will be automatically loaded and used as the schema. - -This should make easier to bundle a fully specified (or non trivially configured) schema with a source. - -### Schema is modified in the source function body -What if you can configure your schema or add some tables only inside your schema function, when ie. you have the source credentials and user settings? You could for example add detailed schemas of all the database tables when someone requests a table data to be loaded. This information is available only at the moment source function is called. - -Similarly to the `state`, source and resource function has current schema available via `dlt.current.source_schema` - -Example: - -```python - -# apply schema to the source -@dlt.source -def createx(nesting_level: int): - - schema = dlt.current.source_schema() - - # get default normalizer config - normalizer_conf = dlt.schema.normalizer_config() - # set hash names convention which produces short names without clashes but very ugly - if short_names_convention: - normalizer_conf["names"] = dlt.common.normalizers.names.hash_names - - # apply normalizer conf - schema = Schema("createx", normalizer_conf) - # set nesting level, yeah it's ugly - schema._normalizers_config["json"].setdefault("config", {})["max_nesting"] = nesting_level - # remove date detector and add type detector that forces all fields to strings - schema._settings["detections"].remove("iso_timestamp") - schema._settings["detections"].insert(0, "all_text") - schema.compile_settings() - - return dlt.resource(...) - -``` - -Also look at the following [test](/tests/extract/test_decorators.py) : `test_source_schema_context` diff --git a/docs/website/docs/general-usage/naming-convention.md b/docs/website/docs/general-usage/naming-convention.md new file mode 100644 index 0000000000..bb9eae9c89 --- /dev/null +++ b/docs/website/docs/general-usage/naming-convention.md @@ -0,0 +1,44 @@ +--- +title: Naming Convention +description: Control how dlt creates table, column and other identifiers +keywords: [identifiers, snake case, ] +--- + +# Naming Convention +`dlt` creates tables, child tables and column schemas from the data. The data being loaded, +typically JSON documents, contains identifiers (i.e. key names in a dictionary) with any Unicode +characters, any lengths and naming styles. On the other hand the destinations accept very strict +namespaces for their identifiers. Like Redshift that accepts case-insensitive alphanumeric +identifiers with maximum 127 characters. + +Each schema contains `naming convention` that tells `dlt` how to translate identifiers to the +namespace that the destination understands. + + + +## Default naming convention (snake_case) + +1. Converts identifiers to **snake_case**, small caps. Removes all ascii characters except ascii + alphanumerics and underscores. +1. Adds `_` if name starts with number. +1. Multiples of `_` are converted into single `_`. +1. The parent-child relation is expressed as double `_` in names. +1. It shorts the identifier if it exceed the length at the destination. + +> 💡 Standard behavior of `dlt` is to **use the same naming convention for all destinations** so +> users see always the same tables and columns in their databases. + +> 💡 If you provide any schema elements that contain identifiers via decorators or arguments (i.e. +> `table_name` or `columns`) all the names used will be converted via the naming convention when +> adding to the schema. For example if you execute `dlt.run(... table_name="CamelCase")` the data +> will be loaded into `camel_case`. + +> 💡 Use simple, short small caps identifiers for everything! + + +## Change naming convention + +The naming convention is configurable and users can easily create their own +conventions that i.e. pass all the identifiers unchanged if the destination accepts that (i.e. +DuckDB). + From 3bb929f3bec4efa49bfbbbcf560570202d0835c9 Mon Sep 17 00:00:00 2001 From: Marcin Rudolf Date: Tue, 11 Jun 2024 21:29:41 +0200 Subject: [PATCH 035/105] adds casing info to databrick caps, makes caps an instance attr --- dlt/destinations/impl/databricks/__init__.py | 3 +++ dlt/destinations/impl/databricks/databricks.py | 13 +++++++++---- dlt/destinations/impl/databricks/sql_client.py | 18 ++++++++++-------- 3 files changed, 22 insertions(+), 12 deletions(-) diff --git a/dlt/destinations/impl/databricks/__init__.py b/dlt/destinations/impl/databricks/__init__.py index 0784335196..e07075b960 100644 --- a/dlt/destinations/impl/databricks/__init__.py +++ b/dlt/destinations/impl/databricks/__init__.py @@ -10,7 +10,10 @@ def capabilities() -> DestinationCapabilitiesContext: caps.preferred_staging_file_format = "parquet" caps.supported_staging_file_formats = ["jsonl", "parquet"] caps.escape_identifier = escape_databricks_identifier + # databricks identifiers are case insensitive and stored in lower case + # https://docs.databricks.com/en/sql/language-manual/sql-ref-identifiers.html caps.escape_literal = escape_databricks_literal + caps.casefold_identifier = str.lower caps.has_case_sensitive_identifiers = False caps.decimal_precision = (DEFAULT_NUMERIC_PRECISION, DEFAULT_NUMERIC_SCALE) caps.wei_precision = (DEFAULT_NUMERIC_PRECISION, 0) diff --git a/dlt/destinations/impl/databricks/databricks.py b/dlt/destinations/impl/databricks/databricks.py index b7f601535e..0ddf5acf71 100644 --- a/dlt/destinations/impl/databricks/databricks.py +++ b/dlt/destinations/impl/databricks/databricks.py @@ -255,10 +255,15 @@ def gen_delete_from_sql( class DatabricksClient(InsertValuesJobClient, SupportsStagingDestination): - capabilities: ClassVar[DestinationCapabilitiesContext] = capabilities() - - def __init__(self, schema: Schema, config: DatabricksClientConfiguration) -> None: - sql_client = DatabricksSqlClient(config.normalize_dataset_name(schema), config.credentials) + def __init__( + self, + schema: Schema, + config: DatabricksClientConfiguration, + capabilities: DestinationCapabilitiesContext, + ) -> None: + sql_client = DatabricksSqlClient( + config.normalize_dataset_name(schema), config.credentials, capabilities + ) super().__init__(schema, config, sql_client) self.config: DatabricksClientConfiguration = config self.sql_client: DatabricksSqlClient = sql_client diff --git a/dlt/destinations/impl/databricks/sql_client.py b/dlt/destinations/impl/databricks/sql_client.py index 0530fd297f..85fb423a75 100644 --- a/dlt/destinations/impl/databricks/sql_client.py +++ b/dlt/destinations/impl/databricks/sql_client.py @@ -1,5 +1,5 @@ from contextlib import contextmanager, suppress -from typing import Any, AnyStr, ClassVar, Iterator, Optional, Sequence, List, Union, Dict +from typing import Any, AnyStr, ClassVar, Iterator, Optional, Sequence, List, Tuple, Union, Dict from databricks import sql as databricks_lib from databricks.sql.client import ( @@ -44,10 +44,14 @@ def df(self, chunk_size: int = None, **kwargs: Any) -> DataFrame: class DatabricksSqlClient(SqlClientBase[DatabricksSqlConnection], DBTransaction): dbapi: ClassVar[DBApi] = databricks_lib - capabilities: ClassVar[DestinationCapabilitiesContext] = capabilities() - def __init__(self, dataset_name: str, credentials: DatabricksCredentials) -> None: - super().__init__(credentials.catalog, dataset_name) + def __init__( + self, + dataset_name: str, + credentials: DatabricksCredentials, + capabilities: DestinationCapabilitiesContext, + ) -> None: + super().__init__(credentials.catalog, dataset_name, capabilities) self._conn: DatabricksSqlConnection = None self.credentials = credentials @@ -133,13 +137,11 @@ def execute_query(self, query: AnyStr, *args: Any, **kwargs: Any) -> Iterator[DB curr.execute(query, db_args) yield DatabricksCursorImpl(curr) # type: ignore[abstract] - def fully_qualified_dataset_name(self, escape: bool = True) -> str: + def catalog_name(self, escape: bool = True) -> Optional[str]: catalog = self.capabilities.casefold_identifier(self.credentials.catalog) - dataset_name = self.capabilities.casefold_identifier(self.dataset_name) if escape: catalog = self.capabilities.escape_identifier(catalog) - dataset_name = self.capabilities.escape_identifier(dataset_name) - return f"{catalog}.{dataset_name}" + return catalog @staticmethod def _make_database_exception(ex: Exception) -> Exception: From 724dc159b43969d7a988c43d0afda2bbe7071a43 Mon Sep 17 00:00:00 2001 From: Marcin Rudolf Date: Tue, 11 Jun 2024 21:46:09 +0200 Subject: [PATCH 036/105] adjusts destination casing in caps from schema naming and config --- dlt/common/destination/reference.py | 61 ++++++++++++++++++++++++----- 1 file changed, 51 insertions(+), 10 deletions(-) diff --git a/dlt/common/destination/reference.py b/dlt/common/destination/reference.py index 9982a22cf8..d83729ea36 100644 --- a/dlt/common/destination/reference.py +++ b/dlt/common/destination/reference.py @@ -26,6 +26,7 @@ from dlt.common import logger from dlt.common.destination.utils import verify_schema_capabilities +from dlt.common.normalizers.naming import NamingConvention from dlt.common.schema import Schema, TTableSchema, TSchemaTables from dlt.common.schema.utils import ( get_write_disposition, @@ -247,11 +248,15 @@ class DoNothingFollowupJob(DoNothingJob, FollowupJob): class JobClientBase(ABC): - capabilities: ClassVar[DestinationCapabilitiesContext] = None - - def __init__(self, schema: Schema, config: DestinationClientConfiguration) -> None: + def __init__( + self, + schema: Schema, + config: DestinationClientConfiguration, + capabilities: DestinationCapabilitiesContext, + ) -> None: self.schema = schema self.config = config + self.capabilities = capabilities @abstractmethod def initialize_storage(self, truncate_tables: Iterable[str] = None) -> None: @@ -453,14 +458,56 @@ def client_class(self) -> Type[TDestinationClient]: def configuration(self, initial_config: TDestinationConfig) -> TDestinationConfig: """Get a fully resolved destination config from the initial config""" + config = resolve_configuration( - initial_config, + initial_config or self.spec(), sections=(known_sections.DESTINATION, self.destination_name), # Already populated values will supersede resolved env config explicit_value=self.config_params, ) return config + def client( + self, schema: Schema, initial_config: TDestinationConfig = None + ) -> TDestinationClient: + """Returns a configured instance of the destination's job client""" + config = self.configuration(initial_config) + caps = self.adjust_capabilities(self.capabilities(), config, schema.naming) + return self.client_class(schema, config, caps) + + @classmethod + def adjust_capabilities( + cls, + caps: DestinationCapabilitiesContext, + config: TDestinationConfig, + naming: NamingConvention, + ) -> DestinationCapabilitiesContext: + """Adjust the capabilities to match the case sensitivity as requested by naming convention.""" + if not naming.is_case_sensitive: + # all destinations are configured to be case insensitive so there's nothing to adjust + return caps + if not caps.has_case_sensitive_identifiers: + if caps.casefold_identifier is str: + logger.info( + f"Naming convention {naming.name()} is case sensitive but the destination does" + " not support case sensitive identifiers. Nevertheless identifier casing will" + " be preserved in the destination schema." + ) + else: + logger.warn( + f"Naming convention {naming.name()} is case sensitive but the destination does" + " not support case sensitive identifiers. Destination will case fold all the" + f" identifiers with {caps.casefold_identifier}" + ) + else: + # adjust case folding to store casefold identifiers in the schema + if caps.casefold_identifier is not str: + caps.casefold_identifier = str + logger.info( + f"Enabling case sensitive identifiers for naming convention {naming.name()}" + ) + return caps + @staticmethod def to_name(ref: TDestinationReferenceArg) -> str: if ref is None: @@ -536,11 +583,5 @@ def from_reference( raise InvalidDestinationReference(ref) from e return dest - def client( - self, schema: Schema, initial_config: TDestinationConfig = None - ) -> TDestinationClient: - """Returns a configured instance of the destination's job client""" - return self.client_class(schema, self.configuration(initial_config)) - TDestination = Destination[DestinationClientConfiguration, JobClientBase] From b58a118eac8b030ca095f12446e15eaf2b9c6dc2 Mon Sep 17 00:00:00 2001 From: Marcin Rudolf Date: Tue, 11 Jun 2024 21:47:05 +0200 Subject: [PATCH 037/105] raises detailed schema identifier clash exceptions --- dlt/common/destination/utils.py | 32 +++++++++++++++++++++++--------- dlt/common/schema/exceptions.py | 29 +++++++++++++++++++++++++++++ 2 files changed, 52 insertions(+), 9 deletions(-) diff --git a/dlt/common/destination/utils.py b/dlt/common/destination/utils.py index 064a641fa7..8b48d9f394 100644 --- a/dlt/common/destination/utils.py +++ b/dlt/common/destination/utils.py @@ -3,7 +3,10 @@ from dlt.common import logger from dlt.common.destination.exceptions import IdentifierTooLongException from dlt.common.schema import Schema -from dlt.common.schema.exceptions import SchemaCorruptedException +from dlt.common.schema.exceptions import ( + SchemaCorruptedException, + SchemaIdentifierNormalizationClash, +) from dlt.common.schema.exceptions import SchemaException from dlt.common.schema.utils import is_complete_column from dlt.common.typing import DictStrStr @@ -42,6 +45,11 @@ def verify_schema_capabilities( " identifiers. You may try to change the destination capabilities by changing the" " `casefold_identifier` to `str`" ) + clash_msg += ( + ". Please clean up your data before loading so the entities have different name. You can" + " also change to case insensitive naming convention. Note that in that case data from both" + " columns will be merged into one." + ) # check for any table clashes for table in schema.data_tables(): @@ -51,11 +59,14 @@ def verify_schema_capabilities( if cased_table_name in table_name_lookup: conflict_table_name = table_name_lookup[cased_table_name] exception_log.append( - SchemaCorruptedException( + SchemaIdentifierNormalizationClash( schema.name, - f"A table name {table_name} clashes with {conflict_table_name} after" - f" normalization to {cased_table_name}. " - + clash_msg, + table_name, + "table", + table_name, + conflict_table_name, + schema.naming.name(), + clash_msg, ) ) table_name_lookup[cased_table_name] = table_name @@ -76,11 +87,14 @@ def verify_schema_capabilities( if cased_column_name in column_name_lookup: conflict_column_name = column_name_lookup[cased_column_name] exception_log.append( - SchemaCorruptedException( + SchemaIdentifierNormalizationClash( schema.name, - f"A column name {column_name} in table {table_name} clashes with" - f" {conflict_column_name} after normalization to {cased_column_name}. " - + clash_msg, + table_name, + "column", + column_name, + conflict_column_name, + schema.naming.name(), + clash_msg, ) ) column_name_lookup[cased_column_name] = column_name diff --git a/dlt/common/schema/exceptions.py b/dlt/common/schema/exceptions.py index 827bea5566..283069d030 100644 --- a/dlt/common/schema/exceptions.py +++ b/dlt/common/schema/exceptions.py @@ -102,6 +102,35 @@ class SchemaCorruptedException(SchemaException): pass +class SchemaIdentifierNormalizationClash(SchemaCorruptedException): + def __init__( + self, + schema_name: str, + table_name: str, + identifier_type: str, + identifier_name: str, + conflict_identifier_name: str, + naming_name: str, + clash_msg: str, + ) -> None: + if identifier_type == "column": + table_info = f"in table {table_name} " + else: + table_info = "" + msg = ( + f"A {identifier_type} name {identifier_name} {table_info}clashes with" + f" {conflict_identifier_name} after normalization with {naming_name} naming" + " convention. " + + clash_msg + ) + self.table_name = table_name + self.identifier_type = identifier_type + self.identifier_name = identifier_name + self.conflict_identifier_name = conflict_identifier_name + self.naming_name = naming_name + super().__init__(schema_name, msg) + + class SchemaEngineNoUpgradePathException(SchemaException): def __init__( self, schema_name: str, init_engine: int, from_engine: int, to_engine: int From d190ea1dfa1bfc2aca9b839cb5ba71d29f2c57e9 Mon Sep 17 00:00:00 2001 From: Marcin Rudolf Date: Tue, 11 Jun 2024 21:51:15 +0200 Subject: [PATCH 038/105] adds is_case_sensitive and name to NamingConvention --- dlt/common/normalizers/naming/direct.py | 1 - dlt/common/normalizers/naming/naming.py | 16 ++++++++++++++-- dlt/common/normalizers/naming/snake_case.py | 3 +++ tests/common/normalizers/test_naming.py | 5 +++-- 4 files changed, 20 insertions(+), 5 deletions(-) diff --git a/dlt/common/normalizers/naming/direct.py b/dlt/common/normalizers/naming/direct.py index 0998650852..c164e28365 100644 --- a/dlt/common/normalizers/naming/direct.py +++ b/dlt/common/normalizers/naming/direct.py @@ -5,7 +5,6 @@ class NamingConvention(BaseNamingConvention): PATH_SEPARATOR = "▶" - _CLEANUP_TABLE = str.maketrans(".\n\r'\"▶", "______") def normalize_identifier(self, identifier: str) -> str: diff --git a/dlt/common/normalizers/naming/naming.py b/dlt/common/normalizers/naming/naming.py index 59728f2a14..eaf871d66d 100644 --- a/dlt/common/normalizers/naming/naming.py +++ b/dlt/common/normalizers/naming/naming.py @@ -3,15 +3,19 @@ from functools import lru_cache import math import hashlib -from typing import Any, List, Protocol, Sequence, Type +from typing import Sequence class NamingConvention(ABC): _TR_TABLE = bytes.maketrans(b"/+", b"ab") _DEFAULT_COLLISION_PROB = 0.001 - def __init__(self, max_length: int = None) -> None: + def __init__(self, max_length: int = None, is_case_sensitive: bool = True) -> None: + """Initializes naming convention producing identifiers with `max_length` and transforming input + in case sensitive or case insensitive manner. + """ self.max_length = max_length + self.is_case_sensitive = is_case_sensitive @abstractmethod def normalize_identifier(self, identifier: str) -> str: @@ -58,6 +62,14 @@ def shorten_fragments(self, *normalized_idents: str) -> str: path_str = self.make_path(*normalized_idents) return self.shorten_identifier(path_str, path_str, self.max_length) + @classmethod + def name(cls) -> str: + """Naming convention name is the name of the module in which NamingConvention is defined""" + if cls.__module__.startswith("dlt.common.normalizers.naming."): + # return last component + return cls.__module__.split(".")[-1] + return cls.__module__ + @staticmethod @lru_cache(maxsize=None) def shorten_identifier( diff --git a/dlt/common/normalizers/naming/snake_case.py b/dlt/common/normalizers/naming/snake_case.py index b3c65e9b8d..782c9ec781 100644 --- a/dlt/common/normalizers/naming/snake_case.py +++ b/dlt/common/normalizers/naming/snake_case.py @@ -18,6 +18,9 @@ class NamingConvention(BaseNamingConvention): # subsequent nested fields will be separated with the string below, applies both to field and table names PATH_SEPARATOR = "__" + def __init__(self, max_length: int = None, is_case_sensitive: bool = False) -> None: + super().__init__(max_length, False) + def normalize_identifier(self, identifier: str) -> str: identifier = super().normalize_identifier(identifier) # print(f"{identifier} -> {self.shorten_identifier(identifier, self.max_length)} ({self.max_length})") diff --git a/tests/common/normalizers/test_naming.py b/tests/common/normalizers/test_naming.py index 3bf4762c35..27325ab3cc 100644 --- a/tests/common/normalizers/test_naming.py +++ b/tests/common/normalizers/test_naming.py @@ -266,8 +266,9 @@ def test_shorten_fragments(convention: Type[NamingConvention]) -> None: assert naming.shorten_fragments(*RAW_PATH_WITH_EMPTY_IDENT) == norm_path -# 'event__parse_data__response_selector__default__response__response_templates' -# E 'event__parse_data__response_selector__default__response__responses' +def test_naming_convention_name() -> None: + assert SnakeCaseNamingConvention.name() == "snake_case" + assert DirectNamingConvention.name() == "direct" def assert_short_path(norm_path: str, naming: NamingConvention) -> None: From b44565421647f7652ac8307ee10ebfbe610befa6 Mon Sep 17 00:00:00 2001 From: Marcin Rudolf Date: Tue, 11 Jun 2024 21:52:44 +0200 Subject: [PATCH 039/105] adds sanity check if _dlt prefix is preserved --- dlt/common/schema/migrations.py | 6 +++--- dlt/common/schema/schema.py | 10 +++++++++- dlt/common/schema/utils.py | 5 ++++- 3 files changed, 16 insertions(+), 5 deletions(-) diff --git a/dlt/common/schema/migrations.py b/dlt/common/schema/migrations.py index 1b644f2514..1ef602a3f8 100644 --- a/dlt/common/schema/migrations.py +++ b/dlt/common/schema/migrations.py @@ -14,7 +14,7 @@ from dlt.common.schema.exceptions import SchemaEngineNoUpgradePathException from dlt.common.normalizers.utils import import_normalizers -from dlt.common.schema.utils import new_table, version_table, load_table +from dlt.common.schema.utils import new_table, version_table, loads_table def migrate_schema(schema_dict: DictStrAny, from_engine: int, to_engine: int) -> TStoredSchema: @@ -92,11 +92,11 @@ def migrate_filters(group: str, filters: List[str]) -> None: if from_engine == 4 and to_engine > 4: # replace schema versions table schema_dict["tables"][VERSION_TABLE_NAME] = version_table() - schema_dict["tables"][LOADS_TABLE_NAME] = load_table() + schema_dict["tables"][LOADS_TABLE_NAME] = loads_table() from_engine = 5 if from_engine == 5 and to_engine > 5: # replace loads table - schema_dict["tables"][LOADS_TABLE_NAME] = load_table() + schema_dict["tables"][LOADS_TABLE_NAME] = loads_table() from_engine = 6 if from_engine == 6 and to_engine > 6: # migrate from sealed properties to schema evolution settings diff --git a/dlt/common/schema/schema.py b/dlt/common/schema/schema.py index 3bfd0b118e..1ee1013127 100644 --- a/dlt/common/schema/schema.py +++ b/dlt/common/schema/schema.py @@ -942,7 +942,7 @@ def _add_standard_tables(self) -> None: utils.version_table(), self.naming ) self._schema_tables[self.loads_table_name] = utils.normalize_table_identifiers( - utils.load_table(), self.naming + utils.loads_table(), self.naming ) def _add_standard_hints(self) -> None: @@ -1036,6 +1036,14 @@ def _renormalize_schema_identifiers( self.version_table_name = to_naming.normalize_table_identifier(VERSION_TABLE_NAME) self.loads_table_name = to_naming.normalize_table_identifier(LOADS_TABLE_NAME) self.state_table_name = to_naming.normalize_table_identifier(PIPELINE_STATE_TABLE_NAME) + # do a sanity check - dlt tables must start with dlt prefix + for table_name in [self.version_table_name, self.loads_table_name, self.state_table_name]: + if not table_name.startswith(self._dlt_tables_prefix): + raise SchemaCorruptedException( + self.name, + f"A naming convention {self.naming.name()} mangles _dlt table prefix to" + f" '{self._dlt_tables_prefix}'. A table '{table_name}' does not start with it.", + ) # normalize default hints if default_hints := self._settings.get("default_hints"): self._settings["default_hints"] = self._normalize_default_hints(default_hints) diff --git a/dlt/common/schema/utils.py b/dlt/common/schema/utils.py index aa2f6cc817..7730cd2b58 100644 --- a/dlt/common/schema/utils.py +++ b/dlt/common/schema/utils.py @@ -737,6 +737,7 @@ def version_table() -> TTableSchema: # NOTE: always add new columns at the end of the table so we have identical layout # after an update of existing tables (always at the end) # set to nullable so we can migrate existing tables + # WARNING: do not reorder the columns table = new_table( VERSION_TABLE_NAME, columns=[ @@ -757,10 +758,11 @@ def version_table() -> TTableSchema: return table -def load_table() -> TTableSchema: +def loads_table() -> TTableSchema: # NOTE: always add new columns at the end of the table so we have identical layout # after an update of existing tables (always at the end) # set to nullable so we can migrate existing tables + # WARNING: do not reorder the columns table = new_table( LOADS_TABLE_NAME, columns=[ @@ -784,6 +786,7 @@ def pipeline_state_table() -> TTableSchema: # NOTE: always add new columns at the end of the table so we have identical layout # after an update of existing tables (always at the end) # set to nullable so we can migrate existing tables + # WARNING: do not reorder the columns table = new_table( PIPELINE_STATE_TABLE_NAME, columns=[ From ee8a95b5f14dd4a20ba87369a0a06e1d6019f926 Mon Sep 17 00:00:00 2001 From: Marcin Rudolf Date: Tue, 11 Jun 2024 21:54:30 +0200 Subject: [PATCH 040/105] finds genric types in non generic classes deriving from generic --- dlt/common/typing.py | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/dlt/common/typing.py b/dlt/common/typing.py index 29c1b01d80..15fe3f0649 100644 --- a/dlt/common/typing.py +++ b/dlt/common/typing.py @@ -4,7 +4,7 @@ import os from re import Pattern as _REPattern import sys -from types import FunctionType, MethodType, ModuleType +from types import FunctionType from typing import ( ForwardRef, Callable, @@ -39,6 +39,7 @@ Concatenate, get_args, get_origin, + get_original_bases, ) try: @@ -362,7 +363,7 @@ def is_subclass(subclass: Any, cls: Any) -> bool: def get_generic_type_argument_from_instance( - instance: Any, sample_value: Optional[Any] + instance: Any, sample_value: Optional[Any] = None ) -> Type[Any]: """Infers type argument of a Generic class from an `instance` of that class using optional `sample_value` of the argument type @@ -376,8 +377,14 @@ def get_generic_type_argument_from_instance( Type[Any]: type argument or Any if not known """ orig_param_type = Any - if hasattr(instance, "__orig_class__"): - orig_param_type = get_args(instance.__orig_class__)[0] + # instance of class deriving from generic + if bases_ := get_original_bases(instance): + cls_ = bases_[0] + else: + # instance of generic class + cls_ = getattr(instance, "__orig_class__", None) + if cls_: + orig_param_type = get_args(cls_)[0] if orig_param_type is Any and sample_value is not None: orig_param_type = type(sample_value) return orig_param_type # type: ignore From eb30838d442954b946b266f93ee7cc780d0dac88 Mon Sep 17 00:00:00 2001 From: Marcin Rudolf Date: Tue, 11 Jun 2024 21:55:54 +0200 Subject: [PATCH 041/105] uses casefold INSERT VALUES job column names --- dlt/destinations/insert_job_client.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/dlt/destinations/insert_job_client.py b/dlt/destinations/insert_job_client.py index 74e14f0221..652d13f556 100644 --- a/dlt/destinations/insert_job_client.py +++ b/dlt/destinations/insert_job_client.py @@ -36,6 +36,10 @@ def _insert(self, qualified_table_name: str, file_path: str) -> Iterator[List[st # the procedure below will split the inserts into max_query_length // 2 packs with FileStorage.open_zipsafe_ro(file_path, "r", encoding="utf-8") as f: header = f.readline() + # format and casefold header + header = self._sql_client.capabilities.casefold_identifier(header).format( + qualified_table_name + ) writer_type = self._sql_client.capabilities.insert_values_writer_type if writer_type == "default": sep = "," @@ -70,7 +74,7 @@ def _insert(self, qualified_table_name: str, file_path: str) -> Iterator[List[st # Chunk by max_rows - 1 for simplicity because one more row may be added for chunk in chunks(values_rows, max_rows - 1): processed += len(chunk) - insert_sql.append(header.format(qualified_table_name)) + insert_sql.append(header) if writer_type == "default": insert_sql.append(values_mark) if processed == len_rows: @@ -82,11 +86,9 @@ def _insert(self, qualified_table_name: str, file_path: str) -> Iterator[List[st else: # otherwise write all content in a single INSERT INTO if writer_type == "default": - insert_sql.extend( - [header.format(qualified_table_name), values_mark, content + until_nl] - ) + insert_sql.extend([header, values_mark, content + until_nl]) elif writer_type == "select_union": - insert_sql.extend([header.format(qualified_table_name), content + until_nl]) + insert_sql.extend([header, content + until_nl]) # actually this may be empty if we were able to read a full file into content if not is_eof: From 558db91e08227e73587ecb9c028154ac90eac41e Mon Sep 17 00:00:00 2001 From: Marcin Rudolf Date: Tue, 11 Jun 2024 21:57:47 +0200 Subject: [PATCH 042/105] adds a method make_qualified_table_name_path that calculates components of fully qualified table name and uses it to query INFO SCHEMA --- dlt/destinations/sql_client.py | 58 +++++++++++++++++++++++++++------- 1 file changed, 47 insertions(+), 11 deletions(-) diff --git a/dlt/destinations/sql_client.py b/dlt/destinations/sql_client.py index 7c77a49972..7912ac4561 100644 --- a/dlt/destinations/sql_client.py +++ b/dlt/destinations/sql_client.py @@ -30,13 +30,15 @@ class SqlClientBase(ABC, Generic[TNativeConn]): dbapi: ClassVar[DBApi] = None - capabilities: ClassVar[DestinationCapabilitiesContext] = None - def __init__(self, database_name: str, dataset_name: str) -> None: + def __init__( + self, database_name: str, dataset_name: str, capabilities: DestinationCapabilitiesContext + ) -> None: if not dataset_name: raise ValueError(dataset_name) self.dataset_name = dataset_name self.database_name = database_name + self.capabilities = capabilities @abstractmethod def open_connection(self) -> TNativeConn: @@ -75,9 +77,12 @@ def has_dataset(self) -> bool: SELECT 1 FROM INFORMATION_SCHEMA.SCHEMATA WHERE """ - db_params = self.fully_qualified_dataset_name(escape=False).split(".", 2) - if len(db_params) == 2: + catalog_name, schema_name, _ = self._get_information_schema_components() + db_params: List[str] = [] + if catalog_name is not None: query += " catalog_name = %s AND " + db_params.append(catalog_name) + db_params.append(schema_name) query += "schema_name = %s" rows = self.execute_sql(query, *db_params) return len(rows) > 0 @@ -137,17 +142,36 @@ def execute_many( ret.append(result) return ret + def catalog_name(self, escape: bool = True) -> Optional[str]: + # default is no catalogue component of the name, which typically means that + # connection is scoped to a current database + return None + def fully_qualified_dataset_name(self, escape: bool = True) -> str: - dataset_name = self.capabilities.casefold_identifier(self.dataset_name) - if escape: - return self.capabilities.escape_identifier(dataset_name) - return dataset_name + return ".".join(self.make_qualified_table_name_path(None, escape=escape)) def make_qualified_table_name(self, table_name: str, escape: bool = True) -> str: - table_name = self.capabilities.casefold_identifier(table_name) + return ".".join(self.make_qualified_table_name_path(table_name, escape=escape)) + + def make_qualified_table_name_path( + self, table_name: Optional[str], escape: bool = True + ) -> List[str]: + """Returns a list with path components leading from catalog to table_name. + Used to construct fully qualified names. `table_name` is optional. + """ + path: List[str] = [] + if catalog_name := self.catalog_name(escape=escape): + path.append(catalog_name) + dataset_name = self.capabilities.casefold_identifier(self.dataset_name) if escape: - table_name = self.capabilities.escape_identifier(table_name) - return f"{self.fully_qualified_dataset_name(escape=escape)}.{table_name}" + dataset_name = self.capabilities.escape_identifier(dataset_name) + path.append(dataset_name) + if table_name: + table_name = self.capabilities.casefold_identifier(table_name) + if escape: + table_name = self.capabilities.escape_identifier(table_name) + path.append(table_name) + return path def escape_column_name(self, column_name: str, escape: bool = True) -> str: column_name = self.capabilities.casefold_identifier(column_name) @@ -195,6 +219,18 @@ def is_dbapi_exception(ex: Exception) -> bool: def make_staging_dataset_name(dataset_name: str) -> str: return dataset_name + "_staging" + def _get_information_schema_components(self, *tables: str) -> Tuple[str, str, List[str]]: + """Gets catalog name, schema name and name of the tables in format that can be directly + used to query INFORMATION_SCHEMA. catalog name is optional: in that case None is + returned in the first element of the tuple. + """ + schema_path = self.make_qualified_table_name_path(None, escape=False) + return ( + self.catalog_name(escape=False), + schema_path[-1], + [self.make_qualified_table_name_path(table, escape=False)[-1] for table in tables], + ) + # # generate sql statements # From dea96699adb7ac75cbaefa9413881c0862305f28 Mon Sep 17 00:00:00 2001 From: Marcin Rudolf Date: Tue, 11 Jun 2024 22:05:13 +0200 Subject: [PATCH 043/105] adds casing info to destinations, caps as instance attrs, custom table name paths --- dlt/destinations/impl/athena/__init__.py | 3 + dlt/destinations/impl/athena/athena.py | 31 ++++--- dlt/destinations/impl/bigquery/bigquery.py | 10 ++- dlt/destinations/impl/bigquery/sql_client.py | 10 +-- dlt/destinations/impl/clickhouse/__init__.py | 5 ++ .../impl/clickhouse/clickhouse.py | 33 +------ .../impl/clickhouse/sql_client.py | 59 ++++++++----- .../impl/databricks/sql_client.py | 3 +- .../impl/destination/destination.py | 11 ++- dlt/destinations/impl/dremio/__init__.py | 3 + dlt/destinations/impl/dremio/dremio.py | 47 ++-------- dlt/destinations/impl/dremio/sql_client.py | 34 ++++---- dlt/destinations/impl/duckdb/__init__.py | 1 + dlt/destinations/impl/duckdb/duck.py | 13 ++- dlt/destinations/impl/duckdb/sql_client.py | 10 ++- dlt/destinations/impl/dummy/dummy.py | 11 ++- .../impl/filesystem/filesystem.py | 10 ++- dlt/destinations/impl/motherduck/__init__.py | 1 + .../impl/motherduck/motherduck.py | 16 ++-- .../impl/motherduck/sql_client.py | 41 +++------ dlt/destinations/impl/mssql/__init__.py | 4 + dlt/destinations/impl/mssql/configuration.py | 1 + dlt/destinations/impl/mssql/factory.py | 29 +++++-- dlt/destinations/impl/mssql/mssql.py | 15 ++-- dlt/destinations/impl/mssql/sql_client.py | 10 ++- dlt/destinations/impl/postgres/__init__.py | 5 ++ dlt/destinations/impl/postgres/postgres.py | 13 ++- dlt/destinations/impl/postgres/sql_client.py | 10 ++- dlt/destinations/impl/qdrant/qdrant_client.py | 11 ++- dlt/destinations/impl/redshift/__init__.py | 3 + .../impl/redshift/configuration.py | 2 + dlt/destinations/impl/redshift/factory.py | 20 ++++- dlt/destinations/impl/redshift/redshift.py | 30 ++++--- dlt/destinations/impl/snowflake/__init__.py | 4 + dlt/destinations/impl/snowflake/snowflake.py | 13 ++- dlt/destinations/impl/snowflake/sql_client.py | 10 ++- dlt/destinations/impl/synapse/__init__.py | 4 + dlt/destinations/impl/synapse/factory.py | 17 ++++ dlt/destinations/impl/synapse/sql_client.py | 2 - dlt/destinations/impl/synapse/synapse.py | 25 +++--- dlt/destinations/impl/weaviate/__init__.py | 5 ++ dlt/destinations/impl/weaviate/ci_naming.py | 3 + dlt/destinations/impl/weaviate/exceptions.py | 6 +- dlt/destinations/impl/weaviate/naming.py | 3 + .../impl/weaviate/weaviate_client.py | 85 +++++++++++-------- dlt/destinations/job_client_impl.py | 77 +++++++++-------- dlt/destinations/utils.py | 4 +- 47 files changed, 438 insertions(+), 325 deletions(-) diff --git a/dlt/destinations/impl/athena/__init__.py b/dlt/destinations/impl/athena/__init__.py index ba04312064..f971b5e90a 100644 --- a/dlt/destinations/impl/athena/__init__.py +++ b/dlt/destinations/impl/athena/__init__.py @@ -14,6 +14,9 @@ def capabilities() -> DestinationCapabilitiesContext: caps.supported_table_formats = ["iceberg"] caps.preferred_staging_file_format = "parquet" caps.supported_staging_file_formats = ["parquet", "jsonl"] + # athena is storing all identifiers in lower case and is case insensitive + # it also uses lower case in all the queries + # https://docs.aws.amazon.com/athena/latest/ug/tables-databases-columns-names.html caps.escape_identifier = escape_athena_identifier caps.casefold_identifier = str.lower caps.has_case_sensitive_identifiers = False diff --git a/dlt/destinations/impl/athena/athena.py b/dlt/destinations/impl/athena/athena.py index 35eaab1bc4..1f11a27521 100644 --- a/dlt/destinations/impl/athena/athena.py +++ b/dlt/destinations/impl/athena/athena.py @@ -34,20 +34,17 @@ from dlt.common import logger from dlt.common.exceptions import TerminalValueError from dlt.common.utils import without_none -from dlt.common.data_types import TDataType -from dlt.common.schema import TColumnSchema, Schema, TSchemaTables, TTableSchema +from dlt.common.schema import TColumnSchema, Schema, TTableSchema from dlt.common.schema.typing import ( TTableSchema, TColumnType, - TWriteDisposition, TTableFormat, TSortOrder, ) -from dlt.common.schema.utils import table_schema_has_type, get_table_format +from dlt.common.schema.utils import table_schema_has_type from dlt.common.destination import DestinationCapabilitiesContext from dlt.common.destination.reference import LoadJob, DoNothingFollowupJob, DoNothingJob -from dlt.common.destination.reference import TLoadJobState, NewLoadJob, SupportsStagingDestination -from dlt.common.storages import FileStorage +from dlt.common.destination.reference import NewLoadJob, SupportsStagingDestination from dlt.common.data_writers.escape import escape_hive_identifier from dlt.destinations.sql_jobs import SqlStagingCopyJob, SqlMergeJob @@ -58,7 +55,6 @@ DatabaseUndefinedRelation, LoadJobTerminalException, ) -from dlt.destinations.impl.athena import capabilities from dlt.destinations.sql_client import ( SqlClientBase, DBApiCursorImpl, @@ -221,11 +217,15 @@ def requires_temp_table_for_delete(cls) -> bool: class AthenaSQLClient(SqlClientBase[Connection]): - capabilities: ClassVar[DestinationCapabilitiesContext] = capabilities() dbapi: ClassVar[DBApi] = pyathena - def __init__(self, dataset_name: str, config: AthenaClientConfiguration) -> None: - super().__init__(None, dataset_name) + def __init__( + self, + dataset_name: str, + config: AthenaClientConfiguration, + capabilities: DestinationCapabilitiesContext, + ) -> None: + super().__init__(None, dataset_name, capabilities) self._conn: Connection = None self.config = config self.credentials = config.credentials @@ -370,9 +370,12 @@ def has_dataset(self) -> bool: class AthenaClient(SqlJobClientWithStaging, SupportsStagingDestination): - capabilities: ClassVar[DestinationCapabilitiesContext] = capabilities() - - def __init__(self, schema: Schema, config: AthenaClientConfiguration) -> None: + def __init__( + self, + schema: Schema, + config: AthenaClientConfiguration, + capabilities: DestinationCapabilitiesContext, + ) -> None: # verify if staging layout is valid for Athena # this will raise if the table prefix is not properly defined # we actually that {table_name} is first, no {schema_name} is allowed @@ -382,7 +385,7 @@ def __init__(self, schema: Schema, config: AthenaClientConfiguration) -> None: table_needs_own_folder=True, ) - sql_client = AthenaSQLClient(config.normalize_dataset_name(schema), config) + sql_client = AthenaSQLClient(config.normalize_dataset_name(schema), config, capabilities) super().__init__(schema, config, sql_client) self.sql_client: AthenaSQLClient = sql_client # type: ignore self.config: AthenaClientConfiguration = config diff --git a/dlt/destinations/impl/bigquery/bigquery.py b/dlt/destinations/impl/bigquery/bigquery.py index 46c3ebe232..c2607202bc 100644 --- a/dlt/destinations/impl/bigquery/bigquery.py +++ b/dlt/destinations/impl/bigquery/bigquery.py @@ -175,12 +175,16 @@ def gen_key_table_clauses( class BigQueryClient(SqlJobClientWithStaging, SupportsStagingDestination): - capabilities: ClassVar[DestinationCapabilitiesContext] = capabilities() - - def __init__(self, schema: Schema, config: BigQueryClientConfiguration) -> None: + def __init__( + self, + schema: Schema, + config: BigQueryClientConfiguration, + capabilities: DestinationCapabilitiesContext, + ) -> None: sql_client = BigQuerySqlClient( config.normalize_dataset_name(schema), config.credentials, + capabilities, config.get_location(), config.http_timeout, config.retry_deadline, diff --git a/dlt/destinations/impl/bigquery/sql_client.py b/dlt/destinations/impl/bigquery/sql_client.py index 1a48ad8362..3f79081988 100644 --- a/dlt/destinations/impl/bigquery/sql_client.py +++ b/dlt/destinations/impl/bigquery/sql_client.py @@ -75,12 +75,12 @@ def close(self) -> None: class BigQuerySqlClient(SqlClientBase[bigquery.Client], DBTransaction): dbapi: ClassVar[DBApi] = bq_dbapi - capabilities: ClassVar[DestinationCapabilitiesContext] = capabilities() def __init__( self, dataset_name: str, credentials: GcpServiceAccountCredentialsWithoutDefaults, + capabilities: DestinationCapabilitiesContext, location: str = "US", http_timeout: float = 15.0, retry_deadline: float = 60.0, @@ -89,7 +89,7 @@ def __init__( self.credentials: GcpServiceAccountCredentialsWithoutDefaults = credentials self.location = location self.http_timeout = http_timeout - super().__init__(credentials.project_id, dataset_name) + super().__init__(credentials.project_id, dataset_name, capabilities) self._default_retry = bigquery.DEFAULT_RETRY.with_deadline(retry_deadline) self._default_query = bigquery.QueryJobConfig( @@ -235,13 +235,11 @@ def execute_query(self, query: AnyStr, *args: Any, **kwargs: Any) -> Iterator[DB # will close all cursors conn.close() - def fully_qualified_dataset_name(self, escape: bool = True) -> str: + def catalog_name(self, escape: bool = True) -> Optional[str]: project_id = self.capabilities.casefold_identifier(self.credentials.project_id) - dataset_name = self.capabilities.casefold_identifier(self.dataset_name) if escape: project_id = self.capabilities.escape_identifier(project_id) - dataset_name = self.capabilities.escape_identifier(dataset_name) - return f"{project_id}.{dataset_name}" + return project_id @classmethod def _make_database_exception(cls, ex: Exception) -> Exception: diff --git a/dlt/destinations/impl/clickhouse/__init__.py b/dlt/destinations/impl/clickhouse/__init__.py index bead136828..9594ee0b9e 100644 --- a/dlt/destinations/impl/clickhouse/__init__.py +++ b/dlt/destinations/impl/clickhouse/__init__.py @@ -21,6 +21,11 @@ def capabilities() -> DestinationCapabilitiesContext: caps.format_datetime_literal = format_clickhouse_datetime_literal caps.escape_identifier = escape_clickhouse_identifier caps.escape_literal = escape_clickhouse_literal + # docs are very unclear https://clickhouse.com/docs/en/sql-reference/syntax + # taking into account other sources: identifiers are case sensitive + caps.has_case_sensitive_identifiers = True + # and store as is in the information schema + caps.casefold_identifier = str # https://stackoverflow.com/questions/68358686/what-is-the-maximum-length-of-a-column-in-clickhouse-can-it-be-modified caps.max_identifier_length = 255 diff --git a/dlt/destinations/impl/clickhouse/clickhouse.py b/dlt/destinations/impl/clickhouse/clickhouse.py index cf1f1bc857..c556fab08e 100644 --- a/dlt/destinations/impl/clickhouse/clickhouse.py +++ b/dlt/destinations/impl/clickhouse/clickhouse.py @@ -289,15 +289,14 @@ def requires_temp_table_for_delete(cls) -> bool: class ClickHouseClient(SqlJobClientWithStaging, SupportsStagingDestination): - capabilities: ClassVar[DestinationCapabilitiesContext] = capabilities() - def __init__( self, schema: Schema, config: ClickHouseClientConfiguration, + capabilities: DestinationCapabilitiesContext, ) -> None: self.sql_client: ClickHouseSqlClient = ClickHouseSqlClient( - config.normalize_dataset_name(schema), config.credentials + config.normalize_dataset_name(schema), config.credentials, capabilities ) super().__init__(schema, config, self.sql_client) self.config: ClickHouseClientConfiguration = config @@ -367,34 +366,6 @@ def _get_table_update_sql( return sql - def get_storage_table(self, table_name: str) -> Tuple[bool, TTableSchemaColumns]: - fields = self._get_storage_table_query_columns() - db_params = self.sql_client.make_qualified_table_name(table_name, escape=False).split( - ".", 3 - ) - query = f'SELECT {",".join(fields)} FROM INFORMATION_SCHEMA.COLUMNS WHERE ' - if len(db_params) == 3: - query += "table_catalog = %s AND " - query += "table_schema = %s AND table_name = %s ORDER BY ordinal_position;" - rows = self.sql_client.execute_sql(query, *db_params) - - # If no rows we assume that table does not exist. - schema_table: TTableSchemaColumns = {} - if len(rows) == 0: - return False, schema_table - for c in rows: - numeric_precision = ( - c[3] if self.capabilities.schema_supports_numeric_precision else None - ) - numeric_scale = c[4] if self.capabilities.schema_supports_numeric_precision else None - schema_c: TColumnSchemaBase = { - "name": c[0], - "nullable": bool(c[2]), - **self._from_db_type(c[1], numeric_precision, numeric_scale), - } - schema_table[c[0]] = schema_c # type: ignore - return True, schema_table - @staticmethod def _gen_not_null(v: bool) -> str: # ClickHouse fields are not nullable by default. diff --git a/dlt/destinations/impl/clickhouse/sql_client.py b/dlt/destinations/impl/clickhouse/sql_client.py index 8fb89c90cd..2083b17c7c 100644 --- a/dlt/destinations/impl/clickhouse/sql_client.py +++ b/dlt/destinations/impl/clickhouse/sql_client.py @@ -7,6 +7,7 @@ Optional, Sequence, ClassVar, + Tuple, ) import clickhouse_driver # type: ignore[import-untyped] @@ -45,15 +46,20 @@ class ClickHouseSqlClient( SqlClientBase[clickhouse_driver.dbapi.connection.Connection], DBTransaction ): dbapi: ClassVar[DBApi] = clickhouse_driver.dbapi - capabilities: ClassVar[DestinationCapabilitiesContext] = capabilities() - def __init__(self, dataset_name: str, credentials: ClickHouseCredentials) -> None: - super().__init__(credentials.database, dataset_name) + def __init__( + self, + dataset_name: str, + credentials: ClickHouseCredentials, + capabilities: DestinationCapabilitiesContext, + ) -> None: + super().__init__(credentials.database, dataset_name, capabilities) self._conn: clickhouse_driver.dbapi.connection = None self.credentials = credentials self.database_name = credentials.database def has_dataset(self) -> bool: + # we do not need to normalize dataset_sentinel_table_name sentinel_table = self.credentials.dataset_sentinel_table_name return sentinel_table in [ t.split(self.credentials.dataset_table_separator)[1] for t in self._list_tables() @@ -110,10 +116,11 @@ def drop_dataset(self) -> None: # This is because the driver incorrectly substitutes the entire query string, causing the "DROP TABLE" keyword to be omitted. # To resolve this, we are forced to provide the full query string here. self.execute_sql( - f"""DROP TABLE {self.capabilities.escape_identifier(self.database_name)}.{self.capabilities.escape_identifier(table)} SYNC""" + f"""DROP TABLE {self.catalog_name()}.{self.capabilities.escape_identifier(table)} SYNC""" ) def _list_tables(self) -> List[str]: + catalog_name, table_name = self.make_qualified_table_name_path("%", escape=False) rows = self.execute_sql( """ SELECT name @@ -121,10 +128,8 @@ def _list_tables(self) -> List[str]: WHERE database = %s AND name LIKE %s """, - ( - self.database_name, - f"{self.dataset_name}{self.credentials.dataset_table_separator}%", - ), + catalog_name, + table_name, ) return [row[0] for row in rows] @@ -151,21 +156,33 @@ def execute_query( yield ClickHouseDBApiCursorImpl(cursor) # type: ignore[abstract] - def fully_qualified_dataset_name(self, escape: bool = True) -> str: - database_name = self.database_name - dataset_name = self.dataset_name - if escape: - database_name = self.capabilities.escape_identifier(database_name) - dataset_name = self.capabilities.escape_identifier(dataset_name) - return f"{database_name}.{dataset_name}" - - def make_qualified_table_name(self, table_name: str, escape: bool = True) -> str: - database_name = self.database_name - table_name = f"{self.dataset_name}{self.credentials.dataset_table_separator}{table_name}" + def catalog_name(self, escape: bool = True) -> Optional[str]: + database_name = self.capabilities.casefold_identifier(self.database_name) if escape: database_name = self.capabilities.escape_identifier(database_name) - table_name = self.capabilities.escape_identifier(table_name) - return f"{database_name}.{table_name}" + return database_name + + def make_qualified_table_name_path( + self, table_name: Optional[str], escape: bool = True + ) -> List[str]: + # get catalog and dataset + path = super().make_qualified_table_name_path(None, escape=escape) + if table_name: + # table name combines dataset name and table name + table_name = self.capabilities.casefold_identifier( + f"{self.dataset_name}{self.credentials.dataset_table_separator}{table_name}" + ) + if escape: + table_name = self.capabilities.escape_identifier(table_name) + # we have only two path components + path[1] = table_name + return path + + def _get_information_schema_components(self, *tables: str) -> Tuple[str, str, List[str]]: + components = super()._get_information_schema_components(*tables) + # clickhouse has a catalogue and no schema but uses catalogue as a schema to query the information schema 🤷 + # so we must disable catalogue search. also note that table name is prefixed with logical "dataset_name" + return (None, components[0], components[2]) @classmethod def _make_database_exception(cls, ex: Exception) -> Exception: diff --git a/dlt/destinations/impl/databricks/sql_client.py b/dlt/destinations/impl/databricks/sql_client.py index 6337f071ee..da91402803 100644 --- a/dlt/destinations/impl/databricks/sql_client.py +++ b/dlt/destinations/impl/databricks/sql_client.py @@ -23,13 +23,12 @@ ) from dlt.destinations.typing import DBApi, DBApiCursor, DBTransaction, DataFrame from dlt.destinations.impl.databricks.configuration import DatabricksCredentials -from dlt.destinations.impl.databricks import capabilities class DatabricksCursorImpl(DBApiCursorImpl): """Use native data frame support if available""" - native_cursor: DatabricksSqlCursor + native_cursor: DatabricksSqlCursor # type: ignore[assignment] vector_size: ClassVar[int] = 2048 def df(self, chunk_size: int = None, **kwargs: Any) -> DataFrame: diff --git a/dlt/destinations/impl/destination/destination.py b/dlt/destinations/impl/destination/destination.py index 69d1d1d98a..a2fe717d73 100644 --- a/dlt/destinations/impl/destination/destination.py +++ b/dlt/destinations/impl/destination/destination.py @@ -27,10 +27,13 @@ class DestinationClient(JobClientBase): """Sink Client""" - capabilities: ClassVar[DestinationCapabilitiesContext] = capabilities() - - def __init__(self, schema: Schema, config: CustomDestinationClientConfiguration) -> None: - super().__init__(schema, config) + def __init__( + self, + schema: Schema, + config: CustomDestinationClientConfiguration, + capabilities: DestinationCapabilitiesContext, + ) -> None: + super().__init__(schema, config, capabilities) self.config: CustomDestinationClientConfiguration = config # create pre-resolved callable to avoid multiple config resolutions during execution of the jobs self.destination_callable = create_resolved_partial( diff --git a/dlt/destinations/impl/dremio/__init__.py b/dlt/destinations/impl/dremio/__init__.py index b4bde2fe6d..96d4748f1d 100644 --- a/dlt/destinations/impl/dremio/__init__.py +++ b/dlt/destinations/impl/dremio/__init__.py @@ -10,6 +10,9 @@ def capabilities() -> DestinationCapabilitiesContext: caps.preferred_staging_file_format = "parquet" caps.supported_staging_file_formats = ["jsonl", "parquet"] caps.escape_identifier = escape_dremio_identifier + # all identifiers are case insensitive but are stored as is + # https://docs.dremio.com/current/sonar/data-sources + caps.has_case_sensitive_identifiers = False caps.decimal_precision = (DEFAULT_NUMERIC_PRECISION, DEFAULT_NUMERIC_SCALE) caps.wei_precision = (DEFAULT_NUMERIC_PRECISION, 0) caps.max_identifier_length = 255 diff --git a/dlt/destinations/impl/dremio/dremio.py b/dlt/destinations/impl/dremio/dremio.py index 23bca0ad74..1552bd5b3e 100644 --- a/dlt/destinations/impl/dremio/dremio.py +++ b/dlt/destinations/impl/dremio/dremio.py @@ -137,10 +137,15 @@ def exception(self) -> str: class DremioClient(SqlJobClientWithStaging, SupportsStagingDestination): - capabilities: ClassVar[DestinationCapabilitiesContext] = capabilities() - - def __init__(self, schema: Schema, config: DremioClientConfiguration) -> None: - sql_client = DremioSqlClient(config.normalize_dataset_name(schema), config.credentials) + def __init__( + self, + schema: Schema, + config: DremioClientConfiguration, + capabilities: DestinationCapabilitiesContext, + ) -> None: + sql_client = DremioSqlClient( + config.normalize_dataset_name(schema), config.credentials, capabilities + ) super().__init__(schema, config, sql_client) self.config: DremioClientConfiguration = config self.sql_client: DremioSqlClient = sql_client # type: ignore @@ -198,40 +203,6 @@ def _get_column_def_sql(self, c: TColumnSchema, table_format: TTableFormat = Non f"{name} {self.type_mapper.to_db_type(c)} {self._gen_not_null(c.get('nullable', True))}" ) - def get_storage_table(self, table_name: str) -> Tuple[bool, TTableSchemaColumns]: - def _null_to_bool(v: str) -> bool: - if v == "NO": - return False - elif v == "YES": - return True - raise ValueError(v) - - fields = self._get_storage_table_query_columns() - table_schema = self.sql_client.fully_qualified_dataset_name(escape=False) - db_params = (table_schema, table_name) - query = f""" -SELECT {",".join(fields)} - FROM INFORMATION_SCHEMA.COLUMNS -WHERE - table_catalog = 'DREMIO' AND table_schema = %s AND table_name = %s ORDER BY ordinal_position; -""" - rows = self.sql_client.execute_sql(query, *db_params) - - # if no rows we assume that table does not exist - schema_table: TTableSchemaColumns = {} - if len(rows) == 0: - return False, schema_table - for c in rows: - numeric_precision = c[3] - numeric_scale = c[4] - schema_c: TColumnSchemaBase = { - "name": c[0], - "nullable": _null_to_bool(c[2]), - **self._from_db_type(c[1], numeric_precision, numeric_scale), - } - schema_table[c[0]] = schema_c # type: ignore - return True, schema_table - def _create_merge_followup_jobs(self, table_chain: Sequence[TTableSchema]) -> List[NewLoadJob]: return [DremioMergeJob.from_table_chain(table_chain, self.sql_client)] diff --git a/dlt/destinations/impl/dremio/sql_client.py b/dlt/destinations/impl/dremio/sql_client.py index 255c8acee0..1f17045c0b 100644 --- a/dlt/destinations/impl/dremio/sql_client.py +++ b/dlt/destinations/impl/dremio/sql_client.py @@ -1,5 +1,5 @@ from contextlib import contextmanager, suppress -from typing import Any, AnyStr, ClassVar, Iterator, Optional, Sequence, List +from typing import Any, AnyStr, ClassVar, Iterator, Optional, Sequence, List, Tuple import pyarrow @@ -32,10 +32,14 @@ def df(self, chunk_size: int = None, **kwargs: Any) -> Optional[DataFrame]: class DremioSqlClient(SqlClientBase[pydremio.DremioConnection]): dbapi: ClassVar[DBApi] = pydremio - capabilities: ClassVar[DestinationCapabilitiesContext] = capabilities() - def __init__(self, dataset_name: str, credentials: DremioCredentials) -> None: - super().__init__(credentials.database, dataset_name) + def __init__( + self, + dataset_name: str, + credentials: DremioCredentials, + capabilities: DestinationCapabilitiesContext, + ) -> None: + super().__init__(credentials.database, dataset_name, capabilities) self._conn: Optional[pydremio.DremioConnection] = None self.credentials = credentials @@ -99,18 +103,16 @@ def execute_query(self, query: AnyStr, *args: Any, **kwargs: Any) -> Iterator[DB raise DatabaseTransientException(ex) yield DremioCursorImpl(curr) # type: ignore - def fully_qualified_dataset_name(self, escape: bool = True) -> str: - database_name = self.credentials.database - dataset_name = self.dataset_name + def catalog_name(self, escape: bool = True) -> Optional[str]: + database_name = self.capabilities.casefold_identifier(self.database_name) if escape: database_name = self.capabilities.escape_identifier(database_name) - dataset_name = self.capabilities.escape_identifier(dataset_name) - return f"{database_name}.{dataset_name}" + return database_name - def make_qualified_table_name(self, table_name: str, escape: bool = True) -> str: - if escape: - table_name = self.capabilities.escape_identifier(table_name) - return f"{self.fully_qualified_dataset_name(escape=escape)}.{table_name}" + def _get_information_schema_components(self, *tables: str) -> Tuple[str, str, List[str]]: + components = super()._get_information_schema_components(*tables) + # catalog is always DREMIO but schema contains "database" prefix 🤷 + return ("DREMIO", self.fully_qualified_dataset_name(escape=False), components[2]) @classmethod def _make_database_exception(cls, ex: Exception) -> Exception: @@ -138,10 +140,10 @@ def _get_table_names(self) -> List[str]: query = """ SELECT TABLE_NAME FROM INFORMATION_SCHEMA."TABLES" - WHERE TABLE_CATALOG = 'DREMIO' AND TABLE_SCHEMA = %s + WHERE TABLE_CATALOG = %s AND TABLE_SCHEMA = %s """ - db_params = [self.fully_qualified_dataset_name(escape=False)] - tables = self.execute_sql(query, *db_params) or [] + catalog_name, schema_name, _ = self._get_information_schema_components() + tables = self.execute_sql(query, catalog_name, schema_name) or [] return [table[0] for table in tables] def drop_dataset(self) -> None: diff --git a/dlt/destinations/impl/duckdb/__init__.py b/dlt/destinations/impl/duckdb/__init__.py index d127523707..8523735a09 100644 --- a/dlt/destinations/impl/duckdb/__init__.py +++ b/dlt/destinations/impl/duckdb/__init__.py @@ -10,6 +10,7 @@ def capabilities() -> DestinationCapabilitiesContext: caps.preferred_staging_file_format = None caps.supported_staging_file_formats = [] caps.escape_identifier = escape_postgres_identifier + # all identifiers are case insensitive but are stored as is caps.escape_literal = escape_duckdb_literal caps.has_case_sensitive_identifiers = False caps.decimal_precision = (DEFAULT_NUMERIC_PRECISION, DEFAULT_NUMERIC_SCALE) diff --git a/dlt/destinations/impl/duckdb/duck.py b/dlt/destinations/impl/duckdb/duck.py index 91bc318bc0..cac21ddbc6 100644 --- a/dlt/destinations/impl/duckdb/duck.py +++ b/dlt/destinations/impl/duckdb/duck.py @@ -151,10 +151,15 @@ def exception(self) -> str: class DuckDbClient(InsertValuesJobClient): - capabilities: ClassVar[DestinationCapabilitiesContext] = capabilities() - - def __init__(self, schema: Schema, config: DuckDbClientConfiguration) -> None: - sql_client = DuckDbSqlClient(config.normalize_dataset_name(schema), config.credentials) + def __init__( + self, + schema: Schema, + config: DuckDbClientConfiguration, + capabilities: DestinationCapabilitiesContext, + ) -> None: + sql_client = DuckDbSqlClient( + config.normalize_dataset_name(schema), config.credentials, capabilities + ) super().__init__(schema, config, sql_client) self.config: DuckDbClientConfiguration = config self.sql_client: DuckDbSqlClient = sql_client # type: ignore diff --git a/dlt/destinations/impl/duckdb/sql_client.py b/dlt/destinations/impl/duckdb/sql_client.py index bd62799765..fb19f0d947 100644 --- a/dlt/destinations/impl/duckdb/sql_client.py +++ b/dlt/destinations/impl/duckdb/sql_client.py @@ -43,10 +43,14 @@ def df(self, chunk_size: int = None, **kwargs: Any) -> DataFrame: class DuckDbSqlClient(SqlClientBase[duckdb.DuckDBPyConnection], DBTransaction): dbapi: ClassVar[DBApi] = duckdb - capabilities: ClassVar[DestinationCapabilitiesContext] = capabilities() - def __init__(self, dataset_name: str, credentials: DuckDbBaseCredentials) -> None: - super().__init__(None, dataset_name) + def __init__( + self, + dataset_name: str, + credentials: DuckDbBaseCredentials, + capabilities: DestinationCapabilitiesContext, + ) -> None: + super().__init__(None, dataset_name, capabilities) self._conn: duckdb.DuckDBPyConnection = None self.credentials = credentials diff --git a/dlt/destinations/impl/dummy/dummy.py b/dlt/destinations/impl/dummy/dummy.py index 3c78493b57..d0e2aa2885 100644 --- a/dlt/destinations/impl/dummy/dummy.py +++ b/dlt/destinations/impl/dummy/dummy.py @@ -110,10 +110,13 @@ def create_followup_jobs(self, final_state: TLoadJobState) -> List[NewLoadJob]: class DummyClient(JobClientBase, SupportsStagingDestination, WithStagingDataset): """dummy client storing jobs in memory""" - capabilities: ClassVar[DestinationCapabilitiesContext] = capabilities() - - def __init__(self, schema: Schema, config: DummyClientConfiguration) -> None: - super().__init__(schema, config) + def __init__( + self, + schema: Schema, + config: DummyClientConfiguration, + capabilities: DestinationCapabilitiesContext, + ) -> None: + super().__init__(schema, config, capabilities) self.in_staging_context = False self.config: DummyClientConfiguration = config diff --git a/dlt/destinations/impl/filesystem/filesystem.py b/dlt/destinations/impl/filesystem/filesystem.py index 9d15ba959e..9f2af7ab30 100644 --- a/dlt/destinations/impl/filesystem/filesystem.py +++ b/dlt/destinations/impl/filesystem/filesystem.py @@ -153,15 +153,19 @@ def create_followup_jobs(self, final_state: TLoadJobState) -> List[NewLoadJob]: class FilesystemClient(FSClientBase, JobClientBase, WithStagingDataset, WithStateSync): """filesystem client storing jobs in memory""" - capabilities: ClassVar[DestinationCapabilitiesContext] = capabilities() fs_client: AbstractFileSystem # a path (without the scheme) to a location in the bucket where dataset is present bucket_path: str # name of the dataset dataset_name: str - def __init__(self, schema: Schema, config: FilesystemDestinationClientConfiguration) -> None: - super().__init__(schema, config) + def __init__( + self, + schema: Schema, + config: FilesystemDestinationClientConfiguration, + capabilities: DestinationCapabilitiesContext, + ) -> None: + super().__init__(schema, config, capabilities) self.fs_client, fs_path = fsspec_from_config(config) self.is_local_filesystem = config.protocol == "file" self.bucket_path = ( diff --git a/dlt/destinations/impl/motherduck/__init__.py b/dlt/destinations/impl/motherduck/__init__.py index bfcb9cba14..849c56d10b 100644 --- a/dlt/destinations/impl/motherduck/__init__.py +++ b/dlt/destinations/impl/motherduck/__init__.py @@ -8,6 +8,7 @@ def capabilities() -> DestinationCapabilitiesContext: caps.preferred_loader_file_format = "parquet" caps.supported_loader_file_formats = ["parquet", "insert_values", "jsonl"] caps.escape_identifier = escape_postgres_identifier + # all identifiers are case insensitive but are stored as is caps.escape_literal = escape_duckdb_literal caps.has_case_sensitive_identifiers = False caps.decimal_precision = (DEFAULT_NUMERIC_PRECISION, DEFAULT_NUMERIC_SCALE) diff --git a/dlt/destinations/impl/motherduck/motherduck.py b/dlt/destinations/impl/motherduck/motherduck.py index c695d9715e..8e361e4ac1 100644 --- a/dlt/destinations/impl/motherduck/motherduck.py +++ b/dlt/destinations/impl/motherduck/motherduck.py @@ -5,16 +5,20 @@ from dlt.destinations.impl.duckdb.duck import DuckDbClient -from dlt.destinations.impl.motherduck import capabilities from dlt.destinations.impl.motherduck.sql_client import MotherDuckSqlClient from dlt.destinations.impl.motherduck.configuration import MotherDuckClientConfiguration class MotherDuckClient(DuckDbClient): - capabilities: ClassVar[DestinationCapabilitiesContext] = capabilities() - - def __init__(self, schema: Schema, config: MotherDuckClientConfiguration) -> None: - super().__init__(schema, config) # type: ignore - sql_client = MotherDuckSqlClient(config.normalize_dataset_name(schema), config.credentials) + def __init__( + self, + schema: Schema, + config: MotherDuckClientConfiguration, + capabilities: DestinationCapabilitiesContext, + ) -> None: + super().__init__(schema, config, capabilities) # type: ignore + sql_client = MotherDuckSqlClient( + config.normalize_dataset_name(schema), config.credentials, capabilities + ) self.config: MotherDuckClientConfiguration = config # type: ignore self.sql_client: MotherDuckSqlClient = sql_client diff --git a/dlt/destinations/impl/motherduck/sql_client.py b/dlt/destinations/impl/motherduck/sql_client.py index 677fa16098..40157406ab 100644 --- a/dlt/destinations/impl/motherduck/sql_client.py +++ b/dlt/destinations/impl/motherduck/sql_client.py @@ -1,37 +1,22 @@ -import duckdb +from typing import Optional -from contextlib import contextmanager -from typing import Any, AnyStr, ClassVar, Iterator, Optional, Sequence -from dlt.common.destination import DestinationCapabilitiesContext - -from dlt.destinations.exceptions import ( - DatabaseTerminalException, - DatabaseTransientException, - DatabaseUndefinedRelation, -) -from dlt.destinations.typing import DBApi, DBApiCursor, DBTransaction, DataFrame -from dlt.destinations.sql_client import ( - SqlClientBase, - DBApiCursorImpl, - raise_database_error, - raise_open_connection_error, -) - -from dlt.destinations.impl.duckdb.sql_client import DuckDbSqlClient, DuckDBDBApiCursorImpl -from dlt.destinations.impl.motherduck import capabilities +from dlt.common.destination.capabilities import DestinationCapabilitiesContext +from dlt.destinations.impl.duckdb.sql_client import DuckDbSqlClient from dlt.destinations.impl.motherduck.configuration import MotherDuckCredentials class MotherDuckSqlClient(DuckDbSqlClient): - capabilities: ClassVar[DestinationCapabilitiesContext] = capabilities() - - def __init__(self, dataset_name: str, credentials: MotherDuckCredentials) -> None: - super().__init__(dataset_name, credentials) + def __init__( + self, + dataset_name: str, + credentials: MotherDuckCredentials, + capabilities: DestinationCapabilitiesContext, + ) -> None: + super().__init__(dataset_name, credentials, capabilities) self.database_name = credentials.database - def fully_qualified_dataset_name(self, escape: bool = True) -> str: - dataset_name = super().fully_qualified_dataset_name(escape) - database_name = self.capabilities.casefold_identifier(self.database_name) + def catalog_name(self, escape: bool = True) -> Optional[str]: + database_name = self.database_name if escape: database_name = self.capabilities.escape_identifier(database_name) - return f"{database_name}.{dataset_name}" + return database_name diff --git a/dlt/destinations/impl/mssql/__init__.py b/dlt/destinations/impl/mssql/__init__.py index f7768d9238..8b874af2f6 100644 --- a/dlt/destinations/impl/mssql/__init__.py +++ b/dlt/destinations/impl/mssql/__init__.py @@ -10,8 +10,12 @@ def capabilities() -> DestinationCapabilitiesContext: caps.supported_loader_file_formats = ["insert_values"] caps.preferred_staging_file_format = None caps.supported_staging_file_formats = [] + # mssql is by default case insensitive and stores identifiers as is + # case sensitivity can be changed by database collation so we allow to reconfigure + # capabilities in the mssql factory caps.escape_identifier = escape_postgres_identifier caps.escape_literal = escape_mssql_literal + caps.has_case_sensitive_identifiers = False caps.decimal_precision = (DEFAULT_NUMERIC_PRECISION, DEFAULT_NUMERIC_SCALE) caps.wei_precision = (DEFAULT_NUMERIC_PRECISION, 0) # https://learn.microsoft.com/en-us/sql/sql-server/maximum-capacity-specifications-for-sql-server?view=sql-server-ver16&redirectedfrom=MSDN diff --git a/dlt/destinations/impl/mssql/configuration.py b/dlt/destinations/impl/mssql/configuration.py index 1d085f40c1..cc74e7b268 100644 --- a/dlt/destinations/impl/mssql/configuration.py +++ b/dlt/destinations/impl/mssql/configuration.py @@ -95,6 +95,7 @@ class MsSqlClientConfiguration(DestinationClientDwhWithStagingConfiguration): credentials: MsSqlCredentials = None create_indexes: bool = False + has_case_sensitive_identifiers: bool = False def fingerprint(self) -> str: """Returns a fingerprint of host part of a connection string""" diff --git a/dlt/destinations/impl/mssql/factory.py b/dlt/destinations/impl/mssql/factory.py index 2e19d7c2a8..d935a10176 100644 --- a/dlt/destinations/impl/mssql/factory.py +++ b/dlt/destinations/impl/mssql/factory.py @@ -2,29 +2,31 @@ from dlt.common.destination import Destination, DestinationCapabilitiesContext +from dlt.common.normalizers.naming.naming import NamingConvention from dlt.destinations.impl.mssql.configuration import MsSqlCredentials, MsSqlClientConfiguration from dlt.destinations.impl.mssql import capabilities if t.TYPE_CHECKING: - from dlt.destinations.impl.mssql.mssql import MsSqlClient + from dlt.destinations.impl.mssql.mssql import MsSqlJobClient -class mssql(Destination[MsSqlClientConfiguration, "MsSqlClient"]): +class mssql(Destination[MsSqlClientConfiguration, "MsSqlJobClient"]): spec = MsSqlClientConfiguration def capabilities(self) -> DestinationCapabilitiesContext: return capabilities() @property - def client_class(self) -> t.Type["MsSqlClient"]: - from dlt.destinations.impl.mssql.mssql import MsSqlClient + def client_class(self) -> t.Type["MsSqlJobClient"]: + from dlt.destinations.impl.mssql.mssql import MsSqlJobClient - return MsSqlClient + return MsSqlJobClient def __init__( self, credentials: t.Union[MsSqlCredentials, t.Dict[str, t.Any], str] = None, - create_indexes: bool = True, + create_indexes: bool = False, + has_case_sensitive_identifiers: bool = False, destination_name: t.Optional[str] = None, environment: t.Optional[str] = None, **kwargs: t.Any, @@ -37,12 +39,27 @@ def __init__( credentials: Credentials to connect to the mssql database. Can be an instance of `MsSqlCredentials` or a connection string in the format `mssql://user:password@host:port/database` create_indexes: Should unique indexes be created + has_case_sensitive_identifiers: Are identifiers used by mssql database case sensitive (following the collation) **kwargs: Additional arguments passed to the destination config """ super().__init__( credentials=credentials, create_indexes=create_indexes, + has_case_sensitive_identifiers=has_case_sensitive_identifiers, destination_name=destination_name, environment=environment, **kwargs, ) + + @classmethod + def adjust_capabilities( + cls, + caps: DestinationCapabilitiesContext, + config: MsSqlClientConfiguration, + naming: NamingConvention, + ) -> DestinationCapabilitiesContext: + # modify the caps if case sensitive identifiers are requested + if config.has_case_sensitive_identifiers: + caps.has_case_sensitive_identifiers = True + caps.casefold_identifier = str + return super().adjust_capabilities(caps, config, naming) diff --git a/dlt/destinations/impl/mssql/mssql.py b/dlt/destinations/impl/mssql/mssql.py index 6f364c8af1..c651c3eea0 100644 --- a/dlt/destinations/impl/mssql/mssql.py +++ b/dlt/destinations/impl/mssql/mssql.py @@ -145,11 +145,16 @@ def _new_temp_table_name(cls, name_prefix: str, sql_client: SqlClientBase[Any]) return "#" + name -class MsSqlClient(InsertValuesJobClient): - capabilities: ClassVar[DestinationCapabilitiesContext] = capabilities() - - def __init__(self, schema: Schema, config: MsSqlClientConfiguration) -> None: - sql_client = PyOdbcMsSqlClient(config.normalize_dataset_name(schema), config.credentials) +class MsSqlJobClient(InsertValuesJobClient): + def __init__( + self, + schema: Schema, + config: MsSqlClientConfiguration, + capabilities: DestinationCapabilitiesContext, + ) -> None: + sql_client = PyOdbcMsSqlClient( + config.normalize_dataset_name(schema), config.credentials, capabilities + ) super().__init__(schema, config, sql_client) self.config: MsSqlClientConfiguration = config self.sql_client = sql_client diff --git a/dlt/destinations/impl/mssql/sql_client.py b/dlt/destinations/impl/mssql/sql_client.py index b43f324919..1c6f82cdbf 100644 --- a/dlt/destinations/impl/mssql/sql_client.py +++ b/dlt/destinations/impl/mssql/sql_client.py @@ -43,10 +43,14 @@ def handle_datetimeoffset(dto_value: bytes) -> datetime: class PyOdbcMsSqlClient(SqlClientBase[pyodbc.Connection], DBTransaction): dbapi: ClassVar[DBApi] = pyodbc - capabilities: ClassVar[DestinationCapabilitiesContext] = capabilities() - def __init__(self, dataset_name: str, credentials: MsSqlCredentials) -> None: - super().__init__(credentials.database, dataset_name) + def __init__( + self, + dataset_name: str, + credentials: MsSqlCredentials, + capabilities: DestinationCapabilitiesContext, + ) -> None: + super().__init__(credentials.database, dataset_name, capabilities) self._conn: pyodbc.Connection = None self.credentials = credentials diff --git a/dlt/destinations/impl/postgres/__init__.py b/dlt/destinations/impl/postgres/__init__.py index c69c2c5dec..76ee2b5cff 100644 --- a/dlt/destinations/impl/postgres/__init__.py +++ b/dlt/destinations/impl/postgres/__init__.py @@ -12,6 +12,11 @@ def capabilities() -> DestinationCapabilitiesContext: caps.preferred_staging_file_format = None caps.supported_staging_file_formats = [] caps.escape_identifier = escape_postgres_identifier + # postgres has case sensitive identifiers but by default + # it folds them to lower case which makes them case insensitive + # https://stackoverflow.com/questions/20878932/are-postgresql-column-names-case-sensitive + caps.casefold_identifier = str.lower + caps.has_case_sensitive_identifiers = True caps.escape_literal = escape_postgres_literal caps.has_case_sensitive_identifiers = True caps.decimal_precision = (DEFAULT_NUMERIC_PRECISION, DEFAULT_NUMERIC_SCALE) diff --git a/dlt/destinations/impl/postgres/postgres.py b/dlt/destinations/impl/postgres/postgres.py index 791c7005ef..089365bbef 100644 --- a/dlt/destinations/impl/postgres/postgres.py +++ b/dlt/destinations/impl/postgres/postgres.py @@ -135,10 +135,15 @@ def exception(self) -> str: class PostgresClient(InsertValuesJobClient): - capabilities: ClassVar[DestinationCapabilitiesContext] = capabilities() - - def __init__(self, schema: Schema, config: PostgresClientConfiguration) -> None: - sql_client = Psycopg2SqlClient(config.normalize_dataset_name(schema), config.credentials) + def __init__( + self, + schema: Schema, + config: PostgresClientConfiguration, + capabilities: DestinationCapabilitiesContext, + ) -> None: + sql_client = Psycopg2SqlClient( + config.normalize_dataset_name(schema), config.credentials, capabilities + ) super().__init__(schema, config, sql_client) self.config: PostgresClientConfiguration = config self.sql_client: Psycopg2SqlClient = sql_client diff --git a/dlt/destinations/impl/postgres/sql_client.py b/dlt/destinations/impl/postgres/sql_client.py index a012780f08..8bd1a9cfa5 100644 --- a/dlt/destinations/impl/postgres/sql_client.py +++ b/dlt/destinations/impl/postgres/sql_client.py @@ -31,10 +31,14 @@ class Psycopg2SqlClient(SqlClientBase["psycopg2.connection"], DBTransaction): dbapi: ClassVar[DBApi] = psycopg2 - capabilities: ClassVar[DestinationCapabilitiesContext] = capabilities() - def __init__(self, dataset_name: str, credentials: PostgresCredentials) -> None: - super().__init__(credentials.database, dataset_name) + def __init__( + self, + dataset_name: str, + credentials: PostgresCredentials, + capabilities: DestinationCapabilitiesContext, + ) -> None: + super().__init__(credentials.database, dataset_name, capabilities) self._conn: psycopg2.connection = None self.credentials = credentials diff --git a/dlt/destinations/impl/qdrant/qdrant_client.py b/dlt/destinations/impl/qdrant/qdrant_client.py index 4aa9ceb516..83fdb8001b 100644 --- a/dlt/destinations/impl/qdrant/qdrant_client.py +++ b/dlt/destinations/impl/qdrant/qdrant_client.py @@ -146,10 +146,13 @@ def exception(self) -> str: class QdrantClient(JobClientBase, WithStateSync): """Qdrant Destination Handler""" - capabilities: ClassVar[DestinationCapabilitiesContext] = capabilities() - - def __init__(self, schema: Schema, config: QdrantClientConfiguration) -> None: - super().__init__(schema, config) + def __init__( + self, + schema: Schema, + config: QdrantClientConfiguration, + capabilities: DestinationCapabilitiesContext, + ) -> None: + super().__init__(schema, config, capabilities) self.version_collection_properties = list( schema.get_table_columns(schema.version_table_name).keys() ) diff --git a/dlt/destinations/impl/redshift/__init__.py b/dlt/destinations/impl/redshift/__init__.py index 5899fa2e12..f79ce7fb66 100644 --- a/dlt/destinations/impl/redshift/__init__.py +++ b/dlt/destinations/impl/redshift/__init__.py @@ -9,6 +9,9 @@ def capabilities() -> DestinationCapabilitiesContext: caps.supported_loader_file_formats = ["insert_values"] caps.preferred_staging_file_format = "jsonl" caps.supported_staging_file_formats = ["jsonl", "parquet"] + # redshift is case insensitive and will lower case identifiers when stored + # you can enable case sensitivity https://docs.aws.amazon.com/redshift/latest/dg/r_enable_case_sensitive_identifier.html + # then redshift behaves like postgres caps.escape_identifier = escape_redshift_identifier caps.escape_literal = escape_redshift_literal caps.casefold_identifier = str.lower diff --git a/dlt/destinations/impl/redshift/configuration.py b/dlt/destinations/impl/redshift/configuration.py index 72d7f70a9f..3b84c8663e 100644 --- a/dlt/destinations/impl/redshift/configuration.py +++ b/dlt/destinations/impl/redshift/configuration.py @@ -23,7 +23,9 @@ class RedshiftCredentials(PostgresCredentials): class RedshiftClientConfiguration(PostgresClientConfiguration): destination_type: Final[str] = dataclasses.field(default="redshift", init=False, repr=False, compare=False) # type: ignore credentials: RedshiftCredentials = None + staging_iam_role: Optional[str] = None + has_case_sensitive_identifiers: bool = False def fingerprint(self) -> str: """Returns a fingerprint of host part of a connection string""" diff --git a/dlt/destinations/impl/redshift/factory.py b/dlt/destinations/impl/redshift/factory.py index d80ef9dcad..ef9c3d0ad3 100644 --- a/dlt/destinations/impl/redshift/factory.py +++ b/dlt/destinations/impl/redshift/factory.py @@ -2,6 +2,7 @@ from dlt.common.destination import Destination, DestinationCapabilitiesContext +from dlt.common.normalizers.naming import NamingConvention from dlt.destinations.impl.redshift.configuration import ( RedshiftCredentials, RedshiftClientConfiguration, @@ -27,8 +28,8 @@ def client_class(self) -> t.Type["RedshiftClient"]: def __init__( self, credentials: t.Union[RedshiftCredentials, t.Dict[str, t.Any], str] = None, - create_indexes: bool = True, staging_iam_role: t.Optional[str] = None, + has_case_sensitive_identifiers: bool = False, destination_name: t.Optional[str] = None, environment: t.Optional[str] = None, **kwargs: t.Any, @@ -40,15 +41,28 @@ def __init__( Args: credentials: Credentials to connect to the redshift database. Can be an instance of `RedshiftCredentials` or a connection string in the format `redshift://user:password@host:port/database` - create_indexes: Should unique indexes be created staging_iam_role: IAM role to use for staging data in S3 + has_case_sensitive_identifiers: Are case sensitive identifiers enabled for a database **kwargs: Additional arguments passed to the destination config """ super().__init__( credentials=credentials, - create_indexes=create_indexes, staging_iam_role=staging_iam_role, + has_case_sensitive_identifiers=has_case_sensitive_identifiers, destination_name=destination_name, environment=environment, **kwargs, ) + + @classmethod + def adjust_capabilities( + cls, + caps: DestinationCapabilitiesContext, + config: RedshiftClientConfiguration, + naming: NamingConvention, + ) -> DestinationCapabilitiesContext: + # modify the caps if case sensitive identifiers are requested + if config.has_case_sensitive_identifiers: + caps.has_case_sensitive_identifiers = True + caps.casefold_identifier = str + return super().adjust_capabilities(caps, config, naming) diff --git a/dlt/destinations/impl/redshift/redshift.py b/dlt/destinations/impl/redshift/redshift.py index 7d32f4621d..a753a22166 100644 --- a/dlt/destinations/impl/redshift/redshift.py +++ b/dlt/destinations/impl/redshift/redshift.py @@ -1,11 +1,6 @@ import platform import os -from dlt.common.exceptions import TerminalValueError -from dlt.destinations.impl.postgres.sql_client import Psycopg2SqlClient - -from dlt.common.schema.utils import table_schema_has_type, table_schema_has_type_with_precision - if platform.python_implementation() == "PyPy": import psycopg2cffi as psycopg2 @@ -15,16 +10,19 @@ # from psycopg2.sql import SQL, Composed -from typing import ClassVar, Dict, List, Optional, Sequence, Any, Tuple +from typing import Dict, List, Optional, Sequence, Any, Tuple + -from dlt.common.destination import DestinationCapabilitiesContext from dlt.common.destination.reference import ( NewLoadJob, CredentialsConfiguration, SupportsStagingDestination, ) from dlt.common.data_types import TDataType +from dlt.common.destination.capabilities import DestinationCapabilitiesContext from dlt.common.schema import TColumnSchema, TColumnHint, Schema +from dlt.common.exceptions import TerminalValueError +from dlt.common.schema.utils import table_schema_has_type, table_schema_has_type_with_precision from dlt.common.schema.typing import TTableSchema, TColumnType, TTableFormat, TTableSchemaColumns from dlt.common.configuration.specs import AwsCredentialsWithoutDefaults @@ -32,7 +30,8 @@ from dlt.destinations.sql_jobs import SqlMergeJob from dlt.destinations.exceptions import DatabaseTerminalException, LoadJobTerminalException from dlt.destinations.job_client_impl import CopyRemoteFileLoadJob, LoadJob - +from dlt.destinations.impl.postgres.configuration import PostgresCredentials +from dlt.destinations.impl.postgres.sql_client import Psycopg2SqlClient from dlt.destinations.impl.redshift import capabilities from dlt.destinations.impl.redshift.configuration import RedshiftClientConfiguration from dlt.destinations.job_impl import NewReferenceJob @@ -109,8 +108,6 @@ def from_db_type( class RedshiftSqlClient(Psycopg2SqlClient): - capabilities: ClassVar[DestinationCapabilitiesContext] = capabilities() - @staticmethod def _maybe_make_terminal_exception_from_data_error( pg_ex: psycopg2.DataError, @@ -231,10 +228,15 @@ def gen_key_table_clauses( class RedshiftClient(InsertValuesJobClient, SupportsStagingDestination): - capabilities: ClassVar[DestinationCapabilitiesContext] = capabilities() - - def __init__(self, schema: Schema, config: RedshiftClientConfiguration) -> None: - sql_client = RedshiftSqlClient(config.normalize_dataset_name(schema), config.credentials) + def __init__( + self, + schema: Schema, + config: RedshiftClientConfiguration, + capabilities: DestinationCapabilitiesContext, + ) -> None: + sql_client = RedshiftSqlClient( + config.normalize_dataset_name(schema), config.credentials, capabilities + ) super().__init__(schema, config, sql_client) self.sql_client = sql_client self.config: RedshiftClientConfiguration = config diff --git a/dlt/destinations/impl/snowflake/__init__.py b/dlt/destinations/impl/snowflake/__init__.py index 4de37762a6..f5c1bec314 100644 --- a/dlt/destinations/impl/snowflake/__init__.py +++ b/dlt/destinations/impl/snowflake/__init__.py @@ -9,7 +9,11 @@ def capabilities() -> DestinationCapabilitiesContext: caps.supported_loader_file_formats = ["jsonl", "parquet"] caps.preferred_staging_file_format = "jsonl" caps.supported_staging_file_formats = ["jsonl", "parquet"] + # snowflake is case sensitive but all unquoted identifiers are upper cased + # so upper case identifiers are considered case insensitive caps.escape_identifier = escape_snowflake_identifier + # dlt is configured to create case insensitive identifiers + # note that case sensitive naming conventions will change this setting to "str" (case sensitive) caps.casefold_identifier = str.upper caps.has_case_sensitive_identifiers = True caps.decimal_precision = (DEFAULT_NUMERIC_PRECISION, DEFAULT_NUMERIC_SCALE) diff --git a/dlt/destinations/impl/snowflake/snowflake.py b/dlt/destinations/impl/snowflake/snowflake.py index d4fd032750..d8650d33c5 100644 --- a/dlt/destinations/impl/snowflake/snowflake.py +++ b/dlt/destinations/impl/snowflake/snowflake.py @@ -193,10 +193,15 @@ def exception(self) -> str: class SnowflakeClient(SqlJobClientWithStaging, SupportsStagingDestination): - capabilities: ClassVar[DestinationCapabilitiesContext] = capabilities() - - def __init__(self, schema: Schema, config: SnowflakeClientConfiguration) -> None: - sql_client = SnowflakeSqlClient(config.normalize_dataset_name(schema), config.credentials) + def __init__( + self, + schema: Schema, + config: SnowflakeClientConfiguration, + capabilities: DestinationCapabilitiesContext, + ) -> None: + sql_client = SnowflakeSqlClient( + config.normalize_dataset_name(schema), config.credentials, capabilities + ) super().__init__(schema, config, sql_client) self.config: SnowflakeClientConfiguration = config self.sql_client: SnowflakeSqlClient = sql_client # type: ignore diff --git a/dlt/destinations/impl/snowflake/sql_client.py b/dlt/destinations/impl/snowflake/sql_client.py index 4920c20cb9..23b9f65052 100644 --- a/dlt/destinations/impl/snowflake/sql_client.py +++ b/dlt/destinations/impl/snowflake/sql_client.py @@ -31,10 +31,14 @@ def df(self, chunk_size: int = None, **kwargs: Any) -> Optional[DataFrame]: class SnowflakeSqlClient(SqlClientBase[snowflake_lib.SnowflakeConnection], DBTransaction): dbapi: ClassVar[DBApi] = snowflake_lib - capabilities: ClassVar[DestinationCapabilitiesContext] = capabilities() - def __init__(self, dataset_name: str, credentials: SnowflakeCredentials) -> None: - super().__init__(credentials.database, dataset_name) + def __init__( + self, + dataset_name: str, + credentials: SnowflakeCredentials, + capabilities: DestinationCapabilitiesContext, + ) -> None: + super().__init__(credentials.database, dataset_name, capabilities) self._conn: snowflake_lib.SnowflakeConnection = None self.credentials = credentials diff --git a/dlt/destinations/impl/synapse/__init__.py b/dlt/destinations/impl/synapse/__init__.py index cf2d530eb5..6c695e7089 100644 --- a/dlt/destinations/impl/synapse/__init__.py +++ b/dlt/destinations/impl/synapse/__init__.py @@ -16,8 +16,12 @@ def capabilities() -> DestinationCapabilitiesContext: caps.insert_values_writer_type = "select_union" # https://stackoverflow.com/a/77014299 + # similarly to mssql case sensitivity depends on database collation + # https://learn.microsoft.com/en-us/sql/relational-databases/collations/collation-and-unicode-support?view=sql-server-ver16#collations-in-azure-sql-database + # note that special option CATALOG_COLLATION is used to change it caps.escape_identifier = escape_postgres_identifier caps.escape_literal = escape_mssql_literal + # we allow to reconfigure capabilities in the mssql factory caps.has_case_sensitive_identifiers = False # Synapse has a max precision of 38 diff --git a/dlt/destinations/impl/synapse/factory.py b/dlt/destinations/impl/synapse/factory.py index 100878ae05..41fb248056 100644 --- a/dlt/destinations/impl/synapse/factory.py +++ b/dlt/destinations/impl/synapse/factory.py @@ -1,6 +1,7 @@ import typing as t from dlt.common.destination import Destination, DestinationCapabilitiesContext +from dlt.common.normalizers.naming import NamingConvention from dlt.destinations.impl.synapse import capabilities from dlt.destinations.impl.synapse.configuration import ( @@ -36,6 +37,7 @@ def __init__( default_table_index_type: t.Optional[TTableIndexType] = "heap", create_indexes: bool = False, staging_use_msi: bool = False, + has_case_sensitive_identifiers: bool = False, destination_name: t.Optional[str] = None, environment: t.Optional[str] = None, **kwargs: t.Any, @@ -50,6 +52,7 @@ def __init__( default_table_index_type: Maps directly to the default_table_index_type attribute of the SynapseClientConfiguration object. create_indexes: Maps directly to the create_indexes attribute of the SynapseClientConfiguration object. staging_use_msi: Maps directly to the staging_use_msi attribute of the SynapseClientConfiguration object. + has_case_sensitive_identifiers: Are identifiers used by synapse database case sensitive (following the catalog collation) **kwargs: Additional arguments passed to the destination config """ super().__init__( @@ -57,7 +60,21 @@ def __init__( default_table_index_type=default_table_index_type, create_indexes=create_indexes, staging_use_msi=staging_use_msi, + has_case_sensitive_identifiers=has_case_sensitive_identifiers, destination_name=destination_name, environment=environment, **kwargs, ) + + @classmethod + def adjust_capabilities( + cls, + caps: DestinationCapabilitiesContext, + config: SynapseClientConfiguration, + naming: NamingConvention, + ) -> DestinationCapabilitiesContext: + # modify the caps if case sensitive identifiers are requested + if config.has_case_sensitive_identifiers: + caps.has_case_sensitive_identifiers = True + caps.casefold_identifier = str + return super().adjust_capabilities(caps, config, naming) diff --git a/dlt/destinations/impl/synapse/sql_client.py b/dlt/destinations/impl/synapse/sql_client.py index 089c58e57c..05ceee0356 100644 --- a/dlt/destinations/impl/synapse/sql_client.py +++ b/dlt/destinations/impl/synapse/sql_client.py @@ -12,8 +12,6 @@ class SynapseSqlClient(PyOdbcMsSqlClient): - capabilities: ClassVar[DestinationCapabilitiesContext] = capabilities() - def drop_tables(self, *tables: str) -> None: if not tables: return diff --git a/dlt/destinations/impl/synapse/synapse.py b/dlt/destinations/impl/synapse/synapse.py index f52b64b9d9..6e8f9ee3cd 100644 --- a/dlt/destinations/impl/synapse/synapse.py +++ b/dlt/destinations/impl/synapse/synapse.py @@ -1,12 +1,10 @@ import os -from typing import ClassVar, Sequence, List, Dict, Any, Optional, cast +from typing import Sequence, List, Dict, Any, Optional, cast from copy import deepcopy from textwrap import dedent from urllib.parse import urlparse, urlunparse -from dlt import current - -from dlt.common.destination import DestinationCapabilitiesContext +from dlt.common.destination.capabilities import DestinationCapabilitiesContext from dlt.common.destination.reference import ( SupportsStagingDestination, NewLoadJob, @@ -22,19 +20,17 @@ from dlt.common.configuration.specs import AzureCredentialsWithoutDefaults from dlt.destinations.job_impl import NewReferenceJob -from dlt.destinations.sql_jobs import SqlStagingCopyJob, SqlJobParams from dlt.destinations.sql_client import SqlClientBase from dlt.destinations.job_client_impl import SqlJobClientBase, LoadJob, CopyRemoteFileLoadJob from dlt.destinations.exceptions import LoadJobTerminalException from dlt.destinations.impl.mssql.mssql import ( MsSqlTypeMapper, - MsSqlClient, + MsSqlJobClient, VARCHAR_MAX_N, VARBINARY_MAX_N, ) -from dlt.destinations.impl.synapse import capabilities from dlt.destinations.impl.synapse.sql_client import SynapseSqlClient from dlt.destinations.impl.synapse.configuration import SynapseClientConfiguration from dlt.destinations.impl.synapse.synapse_adapter import ( @@ -53,14 +49,17 @@ } -class SynapseClient(MsSqlClient, SupportsStagingDestination): - capabilities: ClassVar[DestinationCapabilitiesContext] = capabilities() - - def __init__(self, schema: Schema, config: SynapseClientConfiguration) -> None: - super().__init__(schema, config) +class SynapseClient(MsSqlJobClient, SupportsStagingDestination): + def __init__( + self, + schema: Schema, + config: SynapseClientConfiguration, + capabilities: DestinationCapabilitiesContext, + ) -> None: + super().__init__(schema, config, capabilities) self.config: SynapseClientConfiguration = config self.sql_client = SynapseSqlClient( - config.normalize_dataset_name(schema), config.credentials + config.normalize_dataset_name(schema), config.credentials, capabilities ) self.active_hints = deepcopy(HINT_TO_SYNAPSE_ATTR) diff --git a/dlt/destinations/impl/weaviate/__init__.py b/dlt/destinations/impl/weaviate/__init__.py index 6ffea5ae76..eaf4ca56f3 100644 --- a/dlt/destinations/impl/weaviate/__init__.py +++ b/dlt/destinations/impl/weaviate/__init__.py @@ -6,7 +6,12 @@ def capabilities() -> DestinationCapabilitiesContext: caps = DestinationCapabilitiesContext() caps.preferred_loader_file_format = "jsonl" caps.supported_loader_file_formats = ["jsonl"] + # weaviate names are case sensitive following GraphQL naming convention + # https://weaviate.io/developers/weaviate/config-refs/schema caps.has_case_sensitive_identifiers = False + # weaviate will upper case first letter of class name and lower case first letter of a property + # we assume that naming convention will do that + caps.casefold_identifier = str caps.max_identifier_length = 200 caps.max_column_identifier_length = 1024 caps.max_query_length = 8 * 1024 * 1024 diff --git a/dlt/destinations/impl/weaviate/ci_naming.py b/dlt/destinations/impl/weaviate/ci_naming.py index cc8936f42d..ab4864f9b0 100644 --- a/dlt/destinations/impl/weaviate/ci_naming.py +++ b/dlt/destinations/impl/weaviate/ci_naming.py @@ -2,6 +2,9 @@ class NamingConvention(WeaviateNamingConvention): + def __init__(self, max_length: int = None, is_case_sensitive: bool = False) -> None: + super().__init__(max_length, is_case_sensitive) + def _lowercase_property(self, identifier: str) -> str: """Lowercase the whole property to become case insensitive""" return identifier.lower() diff --git a/dlt/destinations/impl/weaviate/exceptions.py b/dlt/destinations/impl/weaviate/exceptions.py index ee798e4e76..11e440a811 100644 --- a/dlt/destinations/impl/weaviate/exceptions.py +++ b/dlt/destinations/impl/weaviate/exceptions.py @@ -1,16 +1,16 @@ from dlt.common.destination.exceptions import DestinationException, DestinationTerminalException -class WeaviateBatchError(DestinationException): +class WeaviateGrpcError(DestinationException): pass class PropertyNameConflict(DestinationTerminalException): - def __init__(self) -> None: + def __init__(self, error: str) -> None: super().__init__( "Your data contains items with identical property names when compared case insensitive." " Weaviate cannot handle such data. Please clean up your data before loading or change" " to case insensitive naming convention. See" " https://dlthub.com/docs/dlt-ecosystem/destinations/weaviate#names-normalization for" - " details." + f" details. [{error}]" ) diff --git a/dlt/destinations/impl/weaviate/naming.py b/dlt/destinations/impl/weaviate/naming.py index f5c94c872f..837553d29b 100644 --- a/dlt/destinations/impl/weaviate/naming.py +++ b/dlt/destinations/impl/weaviate/naming.py @@ -7,6 +7,9 @@ class NamingConvention(SnakeCaseNamingConvention): """Normalizes identifiers according to Weaviate documentation: https://weaviate.io/developers/weaviate/config-refs/schema#class""" + def __init__(self, max_length: int = None, is_case_sensitive: bool = True) -> None: + super().__init__(max_length, is_case_sensitive) + RESERVED_PROPERTIES = {"id": "__id", "_id": "___id", "_additional": "__additional"} _RE_UNDERSCORES = re.compile("([^_])__+") _STARTS_DIGIT = re.compile("^[0-9]") diff --git a/dlt/destinations/impl/weaviate/weaviate_client.py b/dlt/destinations/impl/weaviate/weaviate_client.py index 6f4cfad805..64c9de9607 100644 --- a/dlt/destinations/impl/weaviate/weaviate_client.py +++ b/dlt/destinations/impl/weaviate/weaviate_client.py @@ -33,8 +33,10 @@ from dlt.common.schema.typing import TColumnSchema, TColumnType from dlt.common.schema.utils import ( get_columns_names_with_prop, + loads_table, normalize_table_identifiers, pipeline_state_table, + version_table, ) from dlt.common.destination import DestinationCapabilitiesContext from dlt.common.destination.reference import TLoadJobState, LoadJob, JobClientBase, WithStateSync @@ -42,12 +44,10 @@ from dlt.common.storages import FileStorage from dlt.destinations.impl.weaviate.weaviate_adapter import VECTORIZE_HINT, TOKENIZATION_HINT - from dlt.destinations.job_impl import EmptyLoadJob from dlt.destinations.job_client_impl import StorageSchemaInfo, StateInfo -from dlt.destinations.impl.weaviate import capabilities from dlt.destinations.impl.weaviate.configuration import WeaviateClientConfiguration -from dlt.destinations.impl.weaviate.exceptions import PropertyNameConflict, WeaviateBatchError +from dlt.destinations.impl.weaviate.exceptions import PropertyNameConflict, WeaviateGrpcError from dlt.destinations.type_mapping import TypeMapper @@ -108,7 +108,7 @@ def _wrap(self: JobClientBase, *args: Any, **kwargs: Any) -> Any: if "conflict for property" in str(status_ex) or "none vectorizer module" in str( status_ex ): - raise PropertyNameConflict() + raise PropertyNameConflict(str(status_ex)) raise DestinationTerminalException(status_ex) # looks like there are no more terminal exception raise DestinationTransientException(status_ex) @@ -119,23 +119,25 @@ def _wrap(self: JobClientBase, *args: Any, **kwargs: Any) -> Any: return _wrap # type: ignore -def wrap_batch_error(f: TFun) -> TFun: +def wrap_grpc_error(f: TFun) -> TFun: @wraps(f) def _wrap(*args: Any, **kwargs: Any) -> Any: try: return f(*args, **kwargs) # those look like terminal exceptions - except WeaviateBatchError as batch_ex: + except WeaviateGrpcError as batch_ex: errors = batch_ex.args[0] message = errors["error"][0]["message"] # TODO: actually put the job in failed/retry state and prepare exception message with full info on failing item if "invalid" in message and "property" in message and "on class" in message: raise DestinationTerminalException( - f"Batch failed {errors} AND WILL **NOT** BE RETRIED" + f"Grpc (batch, query) failed {errors} AND WILL **NOT** BE RETRIED" ) if "conflict for property" in message: - raise PropertyNameConflict() - raise DestinationTransientException(f"Batch failed {errors} AND WILL BE RETRIED") + raise PropertyNameConflict(message) + raise DestinationTransientException( + f"Grpc (batch, query) failed {errors} AND WILL BE RETRIED" + ) except Exception: raise DestinationTransientException("Batch failed AND WILL BE RETRIED") @@ -178,14 +180,14 @@ def load_batch(self, f: IO[str]) -> None: Weaviate batch supports retries so we do not need to do that. """ - @wrap_batch_error + @wrap_grpc_error def check_batch_result(results: List[StrAny]) -> None: """This kills batch on first error reported""" if results is not None: for result in results: if "result" in result and "errors" in result["result"]: if "error" in result["result"]["errors"]: - raise WeaviateBatchError(result["result"]["errors"]) + raise WeaviateGrpcError(result["result"]["errors"]) with self.db_client.batch( batch_size=self.client_config.batch_size, @@ -237,23 +239,21 @@ def exception(self) -> str: class WeaviateClient(JobClientBase, WithStateSync): """Weaviate client implementation.""" - capabilities: ClassVar[DestinationCapabilitiesContext] = capabilities() + def __init__( + self, + schema: Schema, + config: WeaviateClientConfiguration, + capabilities: DestinationCapabilitiesContext, + ) -> None: + super().__init__(schema, config, capabilities) + # get definitions of the dlt tables, normalize column names and keep for later use + version_table_ = normalize_table_identifiers(version_table(), schema.naming) + self.version_collection_properties = list(version_table_["columns"].keys()) + loads_table_ = normalize_table_identifiers(loads_table(), schema.naming) + self.loads_collection_properties = list(loads_table_["columns"].keys()) + state_table_ = normalize_table_identifiers(pipeline_state_table(), schema.naming) + self.pipeline_state_properties = list(state_table_["columns"].keys()) - def __init__(self, schema: Schema, config: WeaviateClientConfiguration) -> None: - super().__init__(schema, config) - self.version_collection_properties = list( - schema.get_table_columns(schema.version_table_name).keys() - ) - self.loads_collection_properties = list( - schema.get_table_columns(schema.loads_table_name).keys() - ) - # get definition of state table (may not be present in the schema) - state_table = schema.tables.get( - schema.state_table_name, - normalize_table_identifiers(pipeline_state_table(), schema.naming), - ) - # column names are pipeline properties - self.pipeline_state_properties = list(state_table["columns"].keys()) self.config: WeaviateClientConfiguration = config self.db_client = self.create_db_client(config) @@ -460,13 +460,14 @@ def update_stored_schema( return applied_update def _execute_schema_update(self, only_tables: Iterable[str]) -> None: - for table_name in only_tables or self.schema.tables: + for table_name in only_tables or self.schema.tables.keys(): exists, existing_columns = self.get_storage_table(table_name) # TODO: detect columns where vectorization was added or removed and modify it. currently we ignore change of hints new_columns = self.schema.get_new_table_columns( table_name, existing_columns, - case_sensitive=self.capabilities.has_case_sensitive_identifiers, + case_sensitive=self.capabilities.has_case_sensitive_identifiers + and self.capabilities.casefold_identifier is str, ) logger.info(f"Found {len(new_columns)} updates for {table_name} in {self.schema.name}") if len(new_columns) > 0: @@ -502,6 +503,7 @@ def get_stored_state(self, pipeline_name: str) -> Optional[StateInfo]: """Loads compressed state from destination storage""" # normalize properties p_load_id = self.schema.naming.normalize_identifier("load_id") + p_dlt_load_id = self.schema.naming.normalize_identifier("_dlt_load_id") p_pipeline_name = self.schema.naming.normalize_identifier("pipeline_name") p_status = self.schema.naming.normalize_identifier("status") @@ -513,7 +515,7 @@ def get_stored_state(self, pipeline_name: str) -> Optional[StateInfo]: state_records = self.get_records( self.schema.state_table_name, # search by package load id which is guaranteed to increase over time - sort={"path": [p_load_id], "order": "desc"}, + sort={"path": [p_dlt_load_id], "order": "desc"}, where={ "path": [p_pipeline_name], "operator": "Equal", @@ -540,7 +542,8 @@ def get_stored_state(self, pipeline_name: str) -> Optional[StateInfo]: ) # if there is a load for this state which was successful, return the state if len(load_records): - state["dlt_load_id"] = state.pop("_dlt_load_id") + state["dlt_load_id"] = state.pop(p_dlt_load_id) + state.pop("version_hash") return StateInfo(**state) def get_stored_schema(self) -> Optional[StorageSchemaInfo]: @@ -579,6 +582,7 @@ def get_stored_schema_by_hash(self, schema_hash: str) -> Optional[StorageSchemaI return None @wrap_weaviate_error + # @wrap_grpc_error def get_records( self, table_name: str, @@ -605,8 +609,13 @@ def get_records( query = query.with_offset(offset) response = query.do() + # if json rpc is used, weaviate does not raise exceptions + if "errors" in response: + raise WeaviateGrpcError(response["errors"]) full_class_name = self.make_qualified_class_name(table_name) records = response["data"]["Get"][full_class_name] + if records is None: + raise DestinationTransientException(f"Could not obtain records for {full_class_name}") return cast(List[Dict[str, Any]], records) def make_weaviate_class_schema(self, table_name: str) -> Dict[str, Any]: @@ -675,7 +684,14 @@ def restore_file_load(self, file_path: str) -> LoadJob: @wrap_weaviate_error def complete_load(self, load_id: str) -> None: - values = [load_id, self.schema.name, 0, pendulum.now().isoformat()] + # corresponds to order of the columns in loads_table() + values = [ + load_id, + self.schema.name, + 0, + pendulum.now().isoformat(), + self.schema.version_hash, + ] assert len(values) == len(self.loads_collection_properties) properties = {k: v for k, v in zip(self.loads_collection_properties, values)} self.create_object(properties, self.schema.loads_table_name) @@ -693,12 +709,13 @@ def __exit__( def _update_schema_in_storage(self, schema: Schema) -> None: schema_str = json.dumps(schema.to_dict()) + # corresponds to order of the columns in version_table() values = [ - schema.stored_version_hash, - schema.name, schema.version, schema.ENGINE_VERSION, str(pendulum.now().isoformat()), + schema.name, + schema.stored_version_hash, schema_str, ] assert len(values) == len(self.version_collection_properties) diff --git a/dlt/destinations/job_client_impl.py b/dlt/destinations/job_client_impl.py index cc879c8397..c87cc2969f 100644 --- a/dlt/destinations/job_client_impl.py +++ b/dlt/destinations/job_client_impl.py @@ -26,11 +26,15 @@ TTableSchema, TTableFormat, ) -from dlt.common.schema.utils import normalize_table_identifiers, pipeline_state_table +from dlt.common.schema.utils import ( + loads_table, + normalize_table_identifiers, + pipeline_state_table, + version_table, +) from dlt.common.storages import FileStorage from dlt.common.storages.load_package import LoadJobInfo from dlt.common.schema import TColumnSchema, Schema, TTableSchemaColumns, TSchemaTables -from dlt.common.schema.typing import LOADS_TABLE_NAME, VERSION_TABLE_NAME from dlt.common.destination.reference import ( StateInfo, StorageSchemaInfo, @@ -132,23 +136,20 @@ def __init__( config: DestinationClientConfiguration, sql_client: SqlClientBase[TNativeConn], ) -> None: + # get definitions of the dlt tables, normalize column names and keep for later use + version_table_ = normalize_table_identifiers(version_table(), schema.naming) self.version_table_schema_columns = ", ".join( - sql_client.escape_column_name(col) - for col in schema.get_table_columns(schema.version_table_name) + sql_client.escape_column_name(col) for col in version_table_["columns"] ) + loads_table_ = normalize_table_identifiers(loads_table(), schema.naming) self.loads_table_schema_columns = ", ".join( - sql_client.escape_column_name(col) - for col in schema.get_table_columns(schema.loads_table_name) - ) - # get definition of state table (may not be present in the schema) - state_table = schema.tables.get( - schema.state_table_name, - normalize_table_identifiers(pipeline_state_table(), schema.naming), + sql_client.escape_column_name(col) for col in loads_table_["columns"] ) + state_table_ = normalize_table_identifiers(pipeline_state_table(), schema.naming) self.state_table_columns = ", ".join( - sql_client.escape_column_name(col) for col in state_table["columns"] + sql_client.escape_column_name(col) for col in state_table_["columns"] ) - super().__init__(schema, config) + super().__init__(schema, config, sql_client.capabilities) self.sql_client = sql_client assert isinstance(config, DestinationClientDwhConfiguration) self.config: DestinationClientDwhConfiguration = config @@ -295,7 +296,7 @@ def __exit__( def get_storage_tables( self, table_names: Iterable[str] ) -> Iterable[Tuple[str, TTableSchemaColumns]]: - """Uses INFORMATION SCHEMA to retrieve table and column information for tables in `table_names` iterator. + """Uses INFORMATION_SCHEMA to retrieve table and column information for tables in `table_names` iterator. Table names should be normalized according to naming convention and will be further converted to desired casing in order to (in most cases) create case-insensitive name suitable for search in information schema. @@ -307,38 +308,37 @@ def get_storage_tables( if len(table_names) == 0: # empty generator return + # get schema search components + catalog_name, schema_name, folded_table_names = ( + self.sql_client._get_information_schema_components(*table_names) + ) # create table name conversion lookup table name_lookup = { - self.capabilities.casefold_identifier(table_name): table_name - for table_name in table_names + folded_name: name for folded_name, name in zip(folded_table_names, table_names) } # this should never happen: we verify schema for name clashes before loading assert len(name_lookup) == len(table_names), ( f"One or more of tables in {table_names} after applying" f" {self.capabilities.casefold_identifier} produced a clashing name." ) - # get components from full table name - db_params = self.sql_client.fully_qualified_dataset_name(escape=False).split(".", 2) - has_catalog = len(db_params) == 2 - # use cased identifier ie. always lower on redshift and upper (by default) on snowflake - db_params = db_params + list(name_lookup.keys()) - query = f""" SELECT {",".join(self._get_storage_table_query_columns())} FROM INFORMATION_SCHEMA.COLUMNS WHERE """ - if has_catalog: + + db_params = [] + if catalog_name: + db_params.append(catalog_name) query += "table_catalog = %s AND " + db_params.append(schema_name) + db_params = db_params + list(name_lookup.keys()) # placeholder for each table table_placeholders = ",".join(["%s"] * len(table_names)) query += ( f"table_schema = %s AND table_name IN ({table_placeholders}) ORDER BY table_name," " ordinal_position;" ) - print(query) - print(db_params) rows = self.sql_client.execute_sql(query, *db_params) - print(rows) prev_table: str = None storage_columns: TTableSchemaColumns = None for c in rows: @@ -357,7 +357,6 @@ def get_storage_tables( # remove from table_names table_names.remove(prev_table) # add columns - # TODO: in many cases this will not work col_name = c[1] numeric_precision = ( c[4] if self.capabilities.schema_supports_numeric_precision else None @@ -385,15 +384,6 @@ def get_storage_table(self, table_name: str) -> Tuple[bool, TTableSchemaColumns] storage_table = list(self.get_storage_tables([table_name]))[0] return len(storage_table[1]) > 0, storage_table[1] - def _get_storage_table_query_columns(self) -> List[str]: - """Column names used when querying table from information schema. - Override for databases that use different namings. - """ - fields = ["table_name", "column_name", "data_type", "is_nullable"] - if self.capabilities.schema_supports_numeric_precision: - fields += ["numeric_precision", "numeric_scale"] - return fields - @abstractmethod def _from_db_type( self, db_type: str, precision: Optional[int], scale: Optional[int] @@ -403,8 +393,6 @@ def _from_db_type( def get_stored_schema(self) -> StorageSchemaInfo: name = self.sql_client.make_qualified_table_name(self.schema.version_table_name) c_schema_name, c_inserted_at = self._norm_and_escape_columns("schema_name", "inserted_at") - # c_schema_name = self.schema.naming.normalize_identifier("schema_name") - # c_inserted_at = self.schema.naming.normalize_identifier("inserted_at") query = ( f"SELECT {self.version_table_schema_columns} FROM {name} WHERE {c_schema_name} = %s" f" ORDER BY {c_inserted_at} DESC;" @@ -442,6 +430,15 @@ def get_stored_schema_by_hash(self, version_hash: str) -> StorageSchemaInfo: ) return self._row_to_schema_info(query, version_hash) + def _get_storage_table_query_columns(self) -> List[str]: + """Column names used when querying table from information schema. + Override for databases that use different namings. + """ + fields = ["table_name", "column_name", "data_type", "is_nullable"] + if self.capabilities.schema_supports_numeric_precision: + fields += ["numeric_precision", "numeric_scale"] + return fields + def _execute_schema_update_sql(self, only_tables: Iterable[str]) -> TSchemaTables: sql_scripts, schema_update = self._build_schema_update_sql(only_tables) # Stay within max query size when doing DDL. @@ -471,6 +468,7 @@ def _build_schema_update_sql( for table_name, storage_columns in self.get_storage_tables( only_tables or self.schema.tables.keys() ): + # this will skip incomplete columns new_columns = self._create_table_update(table_name, storage_columns) if len(new_columns) > 0: # build and add sql to execute @@ -560,7 +558,8 @@ def _create_table_update( updates = self.schema.get_new_table_columns( table_name, storage_columns, - case_sensitive=self.capabilities.has_case_sensitive_identifiers, + case_sensitive=self.capabilities.has_case_sensitive_identifiers + and self.capabilities.casefold_identifier is str, ) logger.info(f"Found {len(updates)} updates for {table_name} in {self.schema.name}") return updates diff --git a/dlt/destinations/utils.py b/dlt/destinations/utils.py index 8c32c37fd4..e93feb58de 100644 --- a/dlt/destinations/utils.py +++ b/dlt/destinations/utils.py @@ -28,9 +28,9 @@ def ensure_resource(data: Any) -> DltResource: def info_schema_null_to_bool(v: str) -> bool: """Converts INFORMATION SCHEMA truth values to Python bool""" - if v == "NO": + if v in ("NO", "0"): return False - elif v == "YES": + elif v in ("YES", "1"): return True raise ValueError(v) From b1e2b0980a23cc6d1b161bbf1819bdf0c2bbdb28 Mon Sep 17 00:00:00 2001 From: Marcin Rudolf Date: Tue, 11 Jun 2024 22:05:47 +0200 Subject: [PATCH 044/105] adds naming convention to restore state tests, make them essential --- tests/load/pipeline/test_restore_state.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/tests/load/pipeline/test_restore_state.py b/tests/load/pipeline/test_restore_state.py index 1ea9ed880a..6b04285c94 100644 --- a/tests/load/pipeline/test_restore_state.py +++ b/tests/load/pipeline/test_restore_state.py @@ -185,6 +185,7 @@ def test_silently_skip_on_invalid_credentials( destination_config.setup_pipeline(pipeline_name=pipeline_name, dataset_name=dataset_name) +@pytest.mark.essential @pytest.mark.parametrize( "destination_config", destinations_configs( @@ -193,7 +194,9 @@ def test_silently_skip_on_invalid_credentials( ids=lambda x: x.name, ) @pytest.mark.parametrize("use_single_dataset", [True, False]) -@pytest.mark.parametrize("naming_convention", ["sql_upper", "snake_case"]) +@pytest.mark.parametrize( + "naming_convention", ["tests.common.cases.normalizers.title_case", "snake_case"] +) def test_get_schemas_from_destination( destination_config: DestinationTestConfiguration, use_single_dataset: bool, @@ -276,6 +279,7 @@ def _make_dn_name(schema_name: str) -> str: assert len(restored_schemas) == 3 +@pytest.mark.essential @pytest.mark.parametrize( "destination_config", destinations_configs( @@ -283,7 +287,9 @@ def _make_dn_name(schema_name: str) -> str: ), ids=lambda x: x.name, ) -@pytest.mark.parametrize("naming_convention", ["sql_upper"]) +@pytest.mark.parametrize( + "naming_convention", ["tests.common.cases.normalizers.title_case", "snake_case", "sql_upper"] +) def test_restore_state_pipeline( destination_config: DestinationTestConfiguration, naming_convention: str ) -> None: From 210be7068109cdc045560f3af2eb3cf44e205fe8 Mon Sep 17 00:00:00 2001 From: Marcin Rudolf Date: Tue, 11 Jun 2024 22:05:57 +0200 Subject: [PATCH 045/105] fixes table builder tests --- dlt/common/typing.py | 10 ++--- tests/.dlt/config.toml | 4 +- tests/common/cases/normalizers/__init__.py | 0 tests/common/cases/normalizers/title_case.py | 14 +++++++ .../bigquery/test_bigquery_table_builder.py | 17 +++++--- .../test_clickhouse_table_builder.py | 3 ++ tests/load/dremio/test_dremio_client.py | 3 ++ .../load/duckdb/test_duckdb_table_builder.py | 3 ++ ...entials.py => test_mssql_configuration.py} | 33 ++++++++++++++- tests/load/mssql/test_mssql_table_builder.py | 12 +++--- .../postgres/test_postgres_table_builder.py | 42 +++++++++++++++++-- tests/load/redshift/test_redshift_client.py | 35 +++++++++++++++- .../redshift/test_redshift_table_builder.py | 2 + .../snowflake/test_snowflake_table_builder.py | 3 +- .../synapse/test_synapse_configuration.py | 40 +++++++++++++++++- .../synapse/test_synapse_table_builder.py | 13 +++--- tests/load/test_sql_client.py | 2 +- tests/load/weaviate/test_pipeline.py | 6 ++- tests/load/weaviate/test_weaviate_client.py | 14 +++++-- tests/pipeline/test_dlt_versions.py | 13 ++++-- 20 files changed, 230 insertions(+), 39 deletions(-) create mode 100644 tests/common/cases/normalizers/__init__.py create mode 100644 tests/common/cases/normalizers/title_case.py rename tests/load/mssql/{test_mssql_credentials.py => test_mssql_configuration.py} (77%) diff --git a/dlt/common/typing.py b/dlt/common/typing.py index 15fe3f0649..cd535cdcd2 100644 --- a/dlt/common/typing.py +++ b/dlt/common/typing.py @@ -377,12 +377,12 @@ def get_generic_type_argument_from_instance( Type[Any]: type argument or Any if not known """ orig_param_type = Any - # instance of class deriving from generic - if bases_ := get_original_bases(instance): - cls_ = bases_[0] - else: + if cls_ := getattr(instance, "__orig_class__", None): # instance of generic class - cls_ = getattr(instance, "__orig_class__", None) + pass + elif bases_ := get_original_bases(instance): + # instance of class deriving from generic + cls_ = bases_[0] if cls_: orig_param_type = get_args(cls_)[0] if orig_param_type is Any and sample_value is not None: diff --git a/tests/.dlt/config.toml b/tests/.dlt/config.toml index ba86edf417..ec0df4fc15 100644 --- a/tests/.dlt/config.toml +++ b/tests/.dlt/config.toml @@ -1,5 +1,5 @@ -[runtime] -sentry_dsn="https://6f6f7b6f8e0f458a89be4187603b55fe@o1061158.ingest.sentry.io/4504819859914752" +# [runtime] +# sentry_dsn="https://6f6f7b6f8e0f458a89be4187603b55fe@o1061158.ingest.sentry.io/4504819859914752" [tests] bucket_url_gs="gs://ci-test-bucket" diff --git a/tests/common/cases/normalizers/__init__.py b/tests/common/cases/normalizers/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/tests/common/cases/normalizers/title_case.py b/tests/common/cases/normalizers/title_case.py new file mode 100644 index 0000000000..41eb96fcda --- /dev/null +++ b/tests/common/cases/normalizers/title_case.py @@ -0,0 +1,14 @@ +from dlt.common.normalizers.naming.direct import NamingConvention as DirectNamingConvention + + +class NamingConvention(DirectNamingConvention): + """Test case sensitive naming that capitalizes first and last letter and leaves the rest intact""" + + PATH_SEPARATOR = "__" + + def normalize_identifier(self, identifier: str) -> str: + # keep prefix + if identifier == "_dlt": + return "_dlt" + identifier = super().normalize_identifier(identifier) + return identifier[0].upper() + identifier[1:-1] + identifier[-1].upper() diff --git a/tests/load/bigquery/test_bigquery_table_builder.py b/tests/load/bigquery/test_bigquery_table_builder.py index df564192dc..a38b1746f4 100644 --- a/tests/load/bigquery/test_bigquery_table_builder.py +++ b/tests/load/bigquery/test_bigquery_table_builder.py @@ -21,11 +21,15 @@ from dlt.common.schema import Schema from dlt.common.utils import custom_environ from dlt.common.utils import uniq_id + from dlt.destinations.exceptions import DestinationSchemaWillNotUpdate +from dlt.destinations.impl.bigquery import capabilities from dlt.destinations.impl.bigquery.bigquery import BigQueryClient from dlt.destinations.impl.bigquery.bigquery_adapter import bigquery_adapter from dlt.destinations.impl.bigquery.configuration import BigQueryClientConfiguration + from dlt.extract import DltResource + from tests.load.pipeline.utils import ( destinations_configs, DestinationTestConfiguration, @@ -63,6 +67,7 @@ def gcp_client(empty_schema: Schema) -> BigQueryClient: BigQueryClientConfiguration(credentials=creds)._bind_dataset_name( dataset_name=f"test_{uniq_id()}" ), + capabilities(), ) @@ -89,9 +94,9 @@ def test_create_table(gcp_client: BigQueryClient) -> None: sqlfluff.parse(sql, dialect="bigquery") assert sql.startswith("CREATE TABLE") assert "event_test_table" in sql - assert "`col1` INTEGER NOT NULL" in sql + assert "`col1` INT64 NOT NULL" in sql assert "`col2` FLOAT64 NOT NULL" in sql - assert "`col3` BOOLEAN NOT NULL" in sql + assert "`col3` BOOL NOT NULL" in sql assert "`col4` TIMESTAMP NOT NULL" in sql assert "`col5` STRING " in sql assert "`col6` NUMERIC(38,9) NOT NULL" in sql @@ -100,7 +105,7 @@ def test_create_table(gcp_client: BigQueryClient) -> None: assert "`col9` JSON NOT NULL" in sql assert "`col10` DATE" in sql assert "`col11` TIME" in sql - assert "`col1_precision` INTEGER NOT NULL" in sql + assert "`col1_precision` INT64 NOT NULL" in sql assert "`col4_precision` TIMESTAMP NOT NULL" in sql assert "`col5_precision` STRING(25) " in sql assert "`col6_precision` NUMERIC(6,2) NOT NULL" in sql @@ -119,9 +124,9 @@ def test_alter_table(gcp_client: BigQueryClient) -> None: assert sql.startswith("ALTER TABLE") assert sql.count("ALTER TABLE") == 1 assert "event_test_table" in sql - assert "ADD COLUMN `col1` INTEGER NOT NULL" in sql + assert "ADD COLUMN `col1` INT64 NOT NULL" in sql assert "ADD COLUMN `col2` FLOAT64 NOT NULL" in sql - assert "ADD COLUMN `col3` BOOLEAN NOT NULL" in sql + assert "ADD COLUMN `col3` BOOL NOT NULL" in sql assert "ADD COLUMN `col4` TIMESTAMP NOT NULL" in sql assert "ADD COLUMN `col5` STRING" in sql assert "ADD COLUMN `col6` NUMERIC(38,9) NOT NULL" in sql @@ -130,7 +135,7 @@ def test_alter_table(gcp_client: BigQueryClient) -> None: assert "ADD COLUMN `col9` JSON NOT NULL" in sql assert "ADD COLUMN `col10` DATE" in sql assert "ADD COLUMN `col11` TIME" in sql - assert "ADD COLUMN `col1_precision` INTEGER NOT NULL" in sql + assert "ADD COLUMN `col1_precision` INT64 NOT NULL" in sql assert "ADD COLUMN `col4_precision` TIMESTAMP NOT NULL" in sql assert "ADD COLUMN `col5_precision` STRING(25)" in sql assert "ADD COLUMN `col6_precision` NUMERIC(6,2) NOT NULL" in sql diff --git a/tests/load/clickhouse/test_clickhouse_table_builder.py b/tests/load/clickhouse/test_clickhouse_table_builder.py index fd3bf50907..653ca33c38 100644 --- a/tests/load/clickhouse/test_clickhouse_table_builder.py +++ b/tests/load/clickhouse/test_clickhouse_table_builder.py @@ -6,6 +6,8 @@ from dlt.common.schema import Schema from dlt.common.utils import custom_environ, digest128 from dlt.common.utils import uniq_id + +from dlt.destinations.impl.clickhouse import capabilities from dlt.destinations.impl.clickhouse.clickhouse import ClickHouseClient from dlt.destinations.impl.clickhouse.configuration import ( ClickHouseCredentials, @@ -21,6 +23,7 @@ def clickhouse_client(empty_schema: Schema) -> ClickHouseClient: return ClickHouseClient( empty_schema, ClickHouseClientConfiguration(credentials=creds)._bind_dataset_name(f"test_{uniq_id()}"), + capabilities(), ) diff --git a/tests/load/dremio/test_dremio_client.py b/tests/load/dremio/test_dremio_client.py index d0002dc343..a690472f97 100644 --- a/tests/load/dremio/test_dremio_client.py +++ b/tests/load/dremio/test_dremio_client.py @@ -1,6 +1,8 @@ import pytest from dlt.common.schema import TColumnSchema, Schema + +from dlt.destinations.impl.dremio import capabilities from dlt.destinations.impl.dremio.configuration import DremioClientConfiguration, DremioCredentials from dlt.destinations.impl.dremio.dremio import DremioClient from tests.load.utils import empty_schema @@ -15,6 +17,7 @@ def dremio_client(empty_schema: Schema) -> DremioClient: DremioClientConfiguration(credentials=creds)._bind_dataset_name( dataset_name="test_dataset" ), + capabilities(), ) diff --git a/tests/load/duckdb/test_duckdb_table_builder.py b/tests/load/duckdb/test_duckdb_table_builder.py index 545f182ece..b492fa1747 100644 --- a/tests/load/duckdb/test_duckdb_table_builder.py +++ b/tests/load/duckdb/test_duckdb_table_builder.py @@ -5,6 +5,7 @@ from dlt.common.utils import uniq_id from dlt.common.schema import Schema +from dlt.destinations.impl.duckdb import capabilities from dlt.destinations.impl.duckdb.duck import DuckDbClient from dlt.destinations.impl.duckdb.configuration import DuckDbClientConfiguration @@ -25,6 +26,7 @@ def client(empty_schema: Schema) -> DuckDbClient: return DuckDbClient( empty_schema, DuckDbClientConfiguration()._bind_dataset_name(dataset_name="test_" + uniq_id()), + capabilities(), ) @@ -122,6 +124,7 @@ def test_create_table_with_hints(client: DuckDbClient) -> None: DuckDbClientConfiguration(create_indexes=True)._bind_dataset_name( dataset_name="test_" + uniq_id() ), + capabilities(), ) sql = client._get_table_update_sql("event_test_table", mod_update, False)[0] sqlfluff.parse(sql) diff --git a/tests/load/mssql/test_mssql_credentials.py b/tests/load/mssql/test_mssql_configuration.py similarity index 77% rename from tests/load/mssql/test_mssql_credentials.py rename to tests/load/mssql/test_mssql_configuration.py index 7d49196531..75af101e23 100644 --- a/tests/load/mssql/test_mssql_credentials.py +++ b/tests/load/mssql/test_mssql_configuration.py @@ -1,15 +1,46 @@ +import os import pyodbc import pytest from dlt.common.configuration import resolve_configuration, ConfigFieldMissingException from dlt.common.exceptions import SystemConfigurationException +from dlt.common.schema import Schema -from dlt.destinations.impl.mssql.configuration import MsSqlCredentials +from dlt.destinations import mssql +from dlt.destinations.impl.mssql.configuration import MsSqlCredentials, MsSqlClientConfiguration # mark all tests as essential, do not remove pytestmark = pytest.mark.essential +def test_mssql_factory() -> None: + schema = Schema("schema") + dest = mssql() + client = dest.client(schema, MsSqlClientConfiguration()._bind_dataset_name("dataset")) + assert client.config.create_indexes is False + assert client.config.has_case_sensitive_identifiers is False + assert client.capabilities.has_case_sensitive_identifiers is False + assert client.capabilities.casefold_identifier is str + + # set args explicitly + dest = mssql(has_case_sensitive_identifiers=True, create_indexes=True) + client = dest.client(schema, MsSqlClientConfiguration()._bind_dataset_name("dataset")) + assert client.config.create_indexes is True + assert client.config.has_case_sensitive_identifiers is True + assert client.capabilities.has_case_sensitive_identifiers is True + assert client.capabilities.casefold_identifier is str + + # set args via config + os.environ["DESTINATION__CREATE_INDEXES"] = "True" + os.environ["DESTINATION__HAS_CASE_SENSITIVE_IDENTIFIERS"] = "True" + dest = mssql() + client = dest.client(schema, MsSqlClientConfiguration()._bind_dataset_name("dataset")) + assert client.config.create_indexes is True + assert client.config.has_case_sensitive_identifiers is True + assert client.capabilities.has_case_sensitive_identifiers is True + assert client.capabilities.casefold_identifier is str + + def test_mssql_credentials_defaults() -> None: creds = MsSqlCredentials() assert creds.port == 1433 diff --git a/tests/load/mssql/test_mssql_table_builder.py b/tests/load/mssql/test_mssql_table_builder.py index f7a87c14ee..79126572fb 100644 --- a/tests/load/mssql/test_mssql_table_builder.py +++ b/tests/load/mssql/test_mssql_table_builder.py @@ -6,7 +6,8 @@ pytest.importorskip("dlt.destinations.impl.mssql.mssql", reason="MSSQL ODBC driver not installed") -from dlt.destinations.impl.mssql.mssql import MsSqlClient +from dlt.destinations.impl.mssql import capabilities +from dlt.destinations.impl.mssql.mssql import MsSqlJobClient from dlt.destinations.impl.mssql.configuration import MsSqlClientConfiguration, MsSqlCredentials from tests.load.utils import TABLE_UPDATE, empty_schema @@ -16,17 +17,18 @@ @pytest.fixture -def client(empty_schema: Schema) -> MsSqlClient: +def client(empty_schema: Schema) -> MsSqlJobClient: # return client without opening connection - return MsSqlClient( + return MsSqlJobClient( empty_schema, MsSqlClientConfiguration(credentials=MsSqlCredentials())._bind_dataset_name( dataset_name="test_" + uniq_id() ), + capabilities(), ) -def test_create_table(client: MsSqlClient) -> None: +def test_create_table(client: MsSqlJobClient) -> None: # non existing table sql = client._get_table_update_sql("event_test_table", TABLE_UPDATE, False)[0] sqlfluff.parse(sql, dialect="tsql") @@ -50,7 +52,7 @@ def test_create_table(client: MsSqlClient) -> None: assert '"col11_precision" time(3) NOT NULL' in sql -def test_alter_table(client: MsSqlClient) -> None: +def test_alter_table(client: MsSqlJobClient) -> None: # existing table has no columns sql = client._get_table_update_sql("event_test_table", TABLE_UPDATE, True)[0] sqlfluff.parse(sql, dialect="tsql") diff --git a/tests/load/postgres/test_postgres_table_builder.py b/tests/load/postgres/test_postgres_table_builder.py index 7566b8afce..af8f96a907 100644 --- a/tests/load/postgres/test_postgres_table_builder.py +++ b/tests/load/postgres/test_postgres_table_builder.py @@ -4,8 +4,10 @@ from dlt.common.exceptions import TerminalValueError from dlt.common.utils import uniq_id -from dlt.common.schema import Schema +from dlt.common.schema import Schema, utils +from dlt.common.destination import Destination +from dlt.destinations.impl.postgres import capabilities from dlt.destinations.impl.postgres.postgres import PostgresClient from dlt.destinations.impl.postgres.configuration import ( PostgresClientConfiguration, @@ -25,12 +27,26 @@ @pytest.fixture def client(empty_schema: Schema) -> PostgresClient: + return create_client(empty_schema) + + +@pytest.fixture +def cs_client(empty_schema: Schema) -> PostgresClient: + # change normalizer to case sensitive + empty_schema._normalizers_config["names"] = "tests.common.cases.normalizers.title_case" + empty_schema.update_normalizers() + return create_client(empty_schema) + + +def create_client(empty_schema: Schema) -> PostgresClient: # return client without opening connection + config = PostgresClientConfiguration(credentials=PostgresCredentials())._bind_dataset_name( + dataset_name="test_" + uniq_id() + ) return PostgresClient( empty_schema, - PostgresClientConfiguration(credentials=PostgresCredentials())._bind_dataset_name( - dataset_name="test_" + uniq_id() - ), + config, + Destination.adjust_capabilities(capabilities(), config, empty_schema.naming), ) @@ -125,7 +141,25 @@ def test_create_table_with_hints(client: PostgresClient) -> None: create_indexes=False, credentials=PostgresCredentials(), )._bind_dataset_name(dataset_name="test_" + uniq_id()), + capabilities(), ) sql = client._get_table_update_sql("event_test_table", mod_update, False)[0] sqlfluff.parse(sql, dialect="postgres") assert '"col2" double precision NOT NULL' in sql + + +def test_create_table_case_sensitive(cs_client: PostgresClient) -> None: + cs_client.schema.update_table( + utils.new_table("event_test_table", columns=deepcopy(TABLE_UPDATE)) + ) + sql = cs_client._get_table_update_sql( + "Event_test_tablE", + list(cs_client.schema.get_table_columns("Event_test_tablE").values()), + False, + )[0] + sqlfluff.parse(sql, dialect="postgres") + # everything capitalized + assert cs_client.sql_client.fully_qualified_dataset_name(escape=False)[0] == "T" # Test + # every line starts with "Col" + for line in sql.split("\n")[1:]: + assert line.startswith('"Col') diff --git a/tests/load/redshift/test_redshift_client.py b/tests/load/redshift/test_redshift_client.py index 03bb57c3b4..bb923df673 100644 --- a/tests/load/redshift/test_redshift_client.py +++ b/tests/load/redshift/test_redshift_client.py @@ -6,13 +6,18 @@ from dlt.common import json, pendulum from dlt.common.configuration.resolve import resolve_configuration +from dlt.common.schema.schema import Schema from dlt.common.schema.typing import VERSION_TABLE_NAME from dlt.common.storages import FileStorage from dlt.common.storages.schema_storage import SchemaStorage from dlt.common.utils import uniq_id from dlt.destinations.exceptions import DatabaseTerminalException -from dlt.destinations.impl.redshift.configuration import RedshiftCredentials +from dlt.destinations import redshift +from dlt.destinations.impl.redshift.configuration import ( + RedshiftCredentials, + RedshiftClientConfiguration, +) from dlt.destinations.impl.redshift.redshift import RedshiftClient, psycopg2 from tests.common.utils import COMMON_TEST_CASES_PATH @@ -42,6 +47,34 @@ def test_postgres_and_redshift_credentials_defaults() -> None: assert red_cred.port == 5439 +def test_redshift_factory() -> None: + schema = Schema("schema") + dest = redshift() + client = dest.client(schema, RedshiftClientConfiguration()._bind_dataset_name("dataset")) + assert client.config.staging_iam_role is None + assert client.config.has_case_sensitive_identifiers is False + assert client.capabilities.has_case_sensitive_identifiers is False + assert client.capabilities.casefold_identifier is str.lower + + # set args explicitly + dest = redshift(has_case_sensitive_identifiers=True, staging_iam_role="LOADER") + client = dest.client(schema, RedshiftClientConfiguration()._bind_dataset_name("dataset")) + assert client.config.staging_iam_role == "LOADER" + assert client.config.has_case_sensitive_identifiers is True + assert client.capabilities.has_case_sensitive_identifiers is True + assert client.capabilities.casefold_identifier is str + + # set args via config + os.environ["DESTINATION__STAGING_IAM_ROLE"] = "LOADER" + os.environ["DESTINATION__HAS_CASE_SENSITIVE_IDENTIFIERS"] = "True" + dest = redshift() + client = dest.client(schema, RedshiftClientConfiguration()._bind_dataset_name("dataset")) + assert client.config.staging_iam_role == "LOADER" + assert client.config.has_case_sensitive_identifiers is True + assert client.capabilities.has_case_sensitive_identifiers is True + assert client.capabilities.casefold_identifier is str + + @skipifpypy def test_text_too_long(client: RedshiftClient, file_storage: FileStorage) -> None: caps = client.capabilities diff --git a/tests/load/redshift/test_redshift_table_builder.py b/tests/load/redshift/test_redshift_table_builder.py index 2427bc7cfe..b7557f8bc9 100644 --- a/tests/load/redshift/test_redshift_table_builder.py +++ b/tests/load/redshift/test_redshift_table_builder.py @@ -6,6 +6,7 @@ from dlt.common.schema import Schema from dlt.common.configuration import resolve_configuration +from dlt.destinations.impl.redshift import capabilities from dlt.destinations.impl.redshift.redshift import RedshiftClient from dlt.destinations.impl.redshift.configuration import ( RedshiftClientConfiguration, @@ -26,6 +27,7 @@ def client(empty_schema: Schema) -> RedshiftClient: RedshiftClientConfiguration(credentials=RedshiftCredentials())._bind_dataset_name( dataset_name="test_" + uniq_id() ), + capabilities(), ) diff --git a/tests/load/snowflake/test_snowflake_table_builder.py b/tests/load/snowflake/test_snowflake_table_builder.py index bdbe888fb5..194b6bb6fb 100644 --- a/tests/load/snowflake/test_snowflake_table_builder.py +++ b/tests/load/snowflake/test_snowflake_table_builder.py @@ -5,12 +5,12 @@ from dlt.common.utils import uniq_id from dlt.common.schema import Schema +from dlt.destinations.impl.snowflake import capabilities from dlt.destinations.impl.snowflake.snowflake import SnowflakeClient from dlt.destinations.impl.snowflake.configuration import ( SnowflakeClientConfiguration, SnowflakeCredentials, ) -from dlt.destinations.exceptions import DestinationSchemaWillNotUpdate from tests.load.utils import TABLE_UPDATE, empty_schema @@ -27,6 +27,7 @@ def snowflake_client(empty_schema: Schema) -> SnowflakeClient: SnowflakeClientConfiguration(credentials=creds)._bind_dataset_name( dataset_name="test_" + uniq_id() ), + capabilities(), ) diff --git a/tests/load/synapse/test_synapse_configuration.py b/tests/load/synapse/test_synapse_configuration.py index f366d87d09..8aaea03b0f 100644 --- a/tests/load/synapse/test_synapse_configuration.py +++ b/tests/load/synapse/test_synapse_configuration.py @@ -1,8 +1,11 @@ +import os import pytest from dlt.common.configuration import resolve_configuration from dlt.common.exceptions import SystemConfigurationException +from dlt.common.schema import Schema +from dlt.destinations import synapse from dlt.destinations.impl.synapse.configuration import ( SynapseClientConfiguration, SynapseCredentials, @@ -14,7 +17,42 @@ def test_synapse_configuration() -> None: # By default, unique indexes should not be created. - assert SynapseClientConfiguration().create_indexes is False + c = SynapseClientConfiguration() + assert c.create_indexes is False + assert c.has_case_sensitive_identifiers is False + assert c.staging_use_msi is False + + +def test_synapse_factory() -> None: + schema = Schema("schema") + dest = synapse() + client = dest.client(schema, SynapseClientConfiguration()._bind_dataset_name("dataset")) + assert client.config.create_indexes is False + assert client.config.staging_use_msi is False + assert client.config.has_case_sensitive_identifiers is False + assert client.capabilities.has_case_sensitive_identifiers is False + assert client.capabilities.casefold_identifier is str + + # set args explicitly + dest = synapse(has_case_sensitive_identifiers=True, create_indexes=True, staging_use_msi=True) + client = dest.client(schema, SynapseClientConfiguration()._bind_dataset_name("dataset")) + assert client.config.create_indexes is True + assert client.config.staging_use_msi is True + assert client.config.has_case_sensitive_identifiers is True + assert client.capabilities.has_case_sensitive_identifiers is True + assert client.capabilities.casefold_identifier is str + + # set args via config + os.environ["DESTINATION__CREATE_INDEXES"] = "True" + os.environ["DESTINATION__STAGING_USE_MSI"] = "True" + os.environ["DESTINATION__HAS_CASE_SENSITIVE_IDENTIFIERS"] = "True" + dest = synapse() + client = dest.client(schema, SynapseClientConfiguration()._bind_dataset_name("dataset")) + assert client.config.create_indexes is True + assert client.config.staging_use_msi is True + assert client.config.has_case_sensitive_identifiers is True + assert client.capabilities.has_case_sensitive_identifiers is True + assert client.capabilities.casefold_identifier is str def test_parse_native_representation() -> None: diff --git a/tests/load/synapse/test_synapse_table_builder.py b/tests/load/synapse/test_synapse_table_builder.py index 9ee2ebe202..6e3e69b3a3 100644 --- a/tests/load/synapse/test_synapse_table_builder.py +++ b/tests/load/synapse/test_synapse_table_builder.py @@ -7,17 +7,18 @@ from dlt.common.utils import uniq_id from dlt.common.schema import Schema, TColumnHint -from dlt.destinations.impl.synapse.synapse import SynapseClient +from dlt.destinations.impl.synapse import capabilities +from dlt.destinations.impl.synapse.synapse import ( + SynapseClient, + HINT_TO_SYNAPSE_ATTR, + TABLE_INDEX_TYPE_TO_SYNAPSE_ATTR, +) from dlt.destinations.impl.synapse.configuration import ( SynapseClientConfiguration, SynapseCredentials, ) from tests.load.utils import TABLE_UPDATE, empty_schema -from dlt.destinations.impl.synapse.synapse import ( - HINT_TO_SYNAPSE_ATTR, - TABLE_INDEX_TYPE_TO_SYNAPSE_ATTR, -) # mark all tests as essential, do not remove pytestmark = pytest.mark.essential @@ -31,6 +32,7 @@ def client(empty_schema: Schema) -> SynapseClient: SynapseClientConfiguration(credentials=SynapseCredentials())._bind_dataset_name( dataset_name="test_" + uniq_id() ), + capabilities(), ) assert client.config.create_indexes is False return client @@ -44,6 +46,7 @@ def client_with_indexes_enabled(empty_schema: Schema) -> SynapseClient: SynapseClientConfiguration( credentials=SynapseCredentials(), create_indexes=True )._bind_dataset_name(dataset_name="test_" + uniq_id()), + capabilities(), ) assert client.config.create_indexes is True return client diff --git a/tests/load/test_sql_client.py b/tests/load/test_sql_client.py index 26d7884179..7fb7cc26c4 100644 --- a/tests/load/test_sql_client.py +++ b/tests/load/test_sql_client.py @@ -490,7 +490,7 @@ def test_transaction_isolation(client: SqlJobClientBase) -> None: def test_thread(thread_id: Decimal) -> None: # make a copy of the sql_client thread_client = client.sql_client.__class__( - client.sql_client.dataset_name, client.sql_client.credentials + client.sql_client.dataset_name, client.sql_client.credentials, client.capabilities ) with thread_client: with thread_client.begin_transaction(): diff --git a/tests/load/weaviate/test_pipeline.py b/tests/load/weaviate/test_pipeline.py index ee42ab59d8..507a4c4f8d 100644 --- a/tests/load/weaviate/test_pipeline.py +++ b/tests/load/weaviate/test_pipeline.py @@ -4,6 +4,10 @@ import dlt from dlt.common import json +from dlt.common.schema.exceptions import ( + SchemaCorruptedException, + SchemaIdentifierNormalizationClash, +) from dlt.common.utils import uniq_id from dlt.destinations.impl.weaviate import weaviate_adapter @@ -391,7 +395,7 @@ def test_vectorize_property_without_data() -> None: primary_key="vAlue", columns={"vAlue": {"data_type": "text"}}, ) - assert isinstance(pipe_ex.value.__context__, PropertyNameConflict) + assert isinstance(pipe_ex.value.__context__, SchemaIdentifierNormalizationClash) # set the naming convention to case insensitive os.environ["SCHEMA__NAMING"] = "dlt.destinations.impl.weaviate.ci_naming" diff --git a/tests/load/weaviate/test_weaviate_client.py b/tests/load/weaviate/test_weaviate_client.py index 8a1f3c51ad..730c2675f3 100644 --- a/tests/load/weaviate/test_weaviate_client.py +++ b/tests/load/weaviate/test_weaviate_client.py @@ -5,6 +5,7 @@ from dlt.common.schema import Schema from dlt.common.configuration.container import Container from dlt.common.configuration.specs.config_section_context import ConfigSectionContext +from dlt.common.schema.exceptions import SchemaIdentifierNormalizationClash from dlt.common.utils import uniq_id from dlt.common.schema.typing import TWriteDisposition, TColumnSchema, TTableSchemaColumns @@ -119,8 +120,13 @@ def test_case_sensitive_properties_create(client: WeaviateClient) -> None: ) ) client.schema._bump_version() - with pytest.raises(PropertyNameConflict): + with pytest.raises(SchemaIdentifierNormalizationClash) as clash_ex: client.update_stored_schema() + assert clash_ex.value.identifier_type == "column" + assert clash_ex.value.identifier_name == "coL1" + assert clash_ex.value.conflict_identifier_name == "col1" + assert clash_ex.value.table_name == "ColClass" + assert clash_ex.value.naming_name == "dlt.destinations.impl.weaviate.naming" def test_case_insensitive_properties_create(ci_client: WeaviateClient) -> None: @@ -163,7 +169,7 @@ def test_case_sensitive_properties_add(client: WeaviateClient) -> None: ) ) client.schema._bump_version() - with pytest.raises(PropertyNameConflict): + with pytest.raises(SchemaIdentifierNormalizationClash): client.update_stored_schema() # _, table_columns = client.get_storage_table("ColClass") @@ -179,12 +185,13 @@ def test_load_case_sensitive_data(client: WeaviateClient, file_storage: FileStor client.schema.update_table(new_table(class_name, columns=[table_create["col1"]])) client.schema._bump_version() client.update_stored_schema() - # prepare a data item where is name clash due to Weaviate being CI + # prepare a data item where is name clash due to Weaviate being CS data_clash = {"col1": 72187328, "coL1": 726171} # write row with io.BytesIO() as f: write_dataset(client, f, [data_clash], table_create) query = f.getvalue().decode() + class_name = client.schema.naming.normalize_table_identifier(class_name) with pytest.raises(PropertyNameConflict): expect_load_file(client, file_storage, query, class_name) @@ -210,6 +217,7 @@ def test_load_case_sensitive_data_ci(ci_client: WeaviateClient, file_storage: Fi with io.BytesIO() as f: write_dataset(ci_client, f, [data_clash], table_create) query = f.getvalue().decode() + class_name = ci_client.schema.naming.normalize_table_identifier(class_name) expect_load_file(ci_client, file_storage, query, class_name) response = ci_client.query_class(class_name, ["col1"]).do() objects = response["data"]["Get"][ci_client.make_qualified_class_name(class_name)] diff --git a/tests/pipeline/test_dlt_versions.py b/tests/pipeline/test_dlt_versions.py index 2c2ba0292e..26eae36dd6 100644 --- a/tests/pipeline/test_dlt_versions.py +++ b/tests/pipeline/test_dlt_versions.py @@ -19,6 +19,7 @@ TStoredSchema, ) from dlt.common.configuration.resolve import resolve_configuration +from dlt.destinations.impl.duckdb import capabilities from dlt.destinations.impl.duckdb.configuration import DuckDbClientConfiguration from dlt.destinations.impl.duckdb.sql_client import DuckDbSqlClient @@ -74,7 +75,9 @@ def test_pipeline_with_dlt_update(test_storage: FileStorage) -> None: DuckDbClientConfiguration()._bind_dataset_name(dataset_name=GITHUB_DATASET), sections=("destination", "duckdb"), ) - with DuckDbSqlClient(GITHUB_DATASET, duckdb_cfg.credentials) as client: + with DuckDbSqlClient( + GITHUB_DATASET, duckdb_cfg.credentials, capabilities() + ) as client: rows = client.execute_sql(f"SELECT * FROM {LOADS_TABLE_NAME}") # make sure we have just 4 columns assert len(rows[0]) == 4 @@ -115,7 +118,9 @@ def test_pipeline_with_dlt_update(test_storage: FileStorage) -> None: ) assert "_version_hash" in state_dict - with DuckDbSqlClient(GITHUB_DATASET, duckdb_cfg.credentials) as client: + with DuckDbSqlClient( + GITHUB_DATASET, duckdb_cfg.credentials, capabilities() + ) as client: rows = client.execute_sql( f"SELECT * FROM {LOADS_TABLE_NAME} ORDER BY inserted_at" ) @@ -195,7 +200,9 @@ def test_load_package_with_dlt_update(test_storage: FileStorage) -> None: DuckDbClientConfiguration()._bind_dataset_name(dataset_name=GITHUB_DATASET), sections=("destination", "duckdb"), ) - with DuckDbSqlClient(GITHUB_DATASET, duckdb_cfg.credentials) as client: + with DuckDbSqlClient( + GITHUB_DATASET, duckdb_cfg.credentials, capabilities() + ) as client: rows = client.execute_sql("SELECT * FROM issues") assert len(rows) == 70 github_schema = json.loads( From 95b703d9a8baaecb2f80752a462fcd6837afd648 Mon Sep 17 00:00:00 2001 From: Marcin Rudolf Date: Wed, 12 Jun 2024 22:01:17 +0200 Subject: [PATCH 046/105] removes processing hints when exporting schema to import folder, warns on schema import overriding local schema, warns on processing hints present --- dlt/common/normalizers/naming/duck_case.py | 7 ++- dlt/common/normalizers/naming/snake_case.py | 4 +- dlt/common/schema/exceptions.py | 2 +- dlt/common/schema/utils.py | 15 ++++- dlt/common/storages/schema_storage.py | 63 ++++++++++++++------ tests/common/storages/test_schema_storage.py | 28 +++++---- tests/common/utils.py | 16 ++++- tests/extract/test_decorators.py | 2 +- 8 files changed, 98 insertions(+), 39 deletions(-) diff --git a/dlt/common/normalizers/naming/duck_case.py b/dlt/common/normalizers/naming/duck_case.py index 063482a799..295e607f46 100644 --- a/dlt/common/normalizers/naming/duck_case.py +++ b/dlt/common/normalizers/naming/duck_case.py @@ -6,7 +6,12 @@ class NamingConvention(SnakeCaseNamingConvention): _CLEANUP_TABLE = str.maketrans('\n\r"', "___") - _RE_LEADING_DIGITS = None # do not remove leading digits + + def __init__(self, max_length: int = None, is_case_sensitive: bool = True) -> None: + """Case sensitive naming convention preserving all unicode characters except new line(s). Uses __ for path + separation and will replace multiple underscores with a single one. + """ + super().__init__(max_length, is_case_sensitive) @staticmethod @lru_cache(maxsize=None) diff --git a/dlt/common/normalizers/naming/snake_case.py b/dlt/common/normalizers/naming/snake_case.py index 782c9ec781..ffa0bf6968 100644 --- a/dlt/common/normalizers/naming/snake_case.py +++ b/dlt/common/normalizers/naming/snake_case.py @@ -1,5 +1,5 @@ import re -from typing import Any, List, Sequence +from typing import Sequence from functools import lru_cache from dlt.common.normalizers.naming.naming import NamingConvention as BaseNamingConvention @@ -19,7 +19,7 @@ class NamingConvention(BaseNamingConvention): PATH_SEPARATOR = "__" def __init__(self, max_length: int = None, is_case_sensitive: bool = False) -> None: - super().__init__(max_length, False) + super().__init__(max_length, is_case_sensitive) def normalize_identifier(self, identifier: str) -> str: identifier = super().normalize_identifier(identifier) diff --git a/dlt/common/schema/exceptions.py b/dlt/common/schema/exceptions.py index 283069d030..52b34e5959 100644 --- a/dlt/common/schema/exceptions.py +++ b/dlt/common/schema/exceptions.py @@ -210,7 +210,7 @@ def __init__( self.from_naming = from_naming msg = ( f"Attempt to normalize identifiers for a table {table_name} from naming" - f" {str(type(from_naming))} to {str(type(to_naming))} changed one or more identifiers. " + f" {from_naming.name()} to {to_naming.name()} changed one or more identifiers. " ) msg += ( " This table already received data and tables were created at the destination. By" diff --git a/dlt/common/schema/utils.py b/dlt/common/schema/utils.py index 7730cd2b58..039392a03e 100644 --- a/dlt/common/schema/utils.py +++ b/dlt/common/schema/utils.py @@ -551,10 +551,21 @@ def has_table_seen_data(table: TTableSchema) -> bool: def remove_processing_hints(tables: TSchemaTables) -> TSchemaTables: "Removes processing hints like x-normalizer and x-loader from schema tables. Modifies the input tables and returns it for convenience" + for table_name, hints in get_processing_hints(tables).items(): + for hint in hints: + del tables[table_name][hint] # type: ignore[misc] + return tables + + +def get_processing_hints(tables: TSchemaTables) -> Dict[str, List[str]]: + """Finds processing hints in a set of tables and returns table_name: [hints] mapping""" + hints: Dict[str, List[str]] = {} for table in tables.values(): for hint in TTableProcessingHints.__annotations__.keys(): - table.pop(hint, None) # type: ignore[misc] - return tables + if hint in table: + table_hints = hints.setdefault(table["name"], []) + table_hints.append(hint) + return hints def hint_to_column_prop(h: TColumnHint) -> TColumnProp: diff --git a/dlt/common/storages/schema_storage.py b/dlt/common/storages/schema_storage.py index a9643de453..0544de696f 100644 --- a/dlt/common/storages/schema_storage.py +++ b/dlt/common/storages/schema_storage.py @@ -5,7 +5,7 @@ from dlt.common.json import json from dlt.common.configuration import with_config from dlt.common.configuration.accessors import config -from dlt.common.schema.utils import to_pretty_json, to_pretty_yaml +from dlt.common.schema.utils import get_processing_hints, to_pretty_json, to_pretty_yaml from dlt.common.storages.configuration import ( SchemaStorageConfiguration, TSchemaFileFormat, @@ -59,7 +59,7 @@ def load_schema(self, name: str) -> Schema: def save_schema(self, schema: Schema) -> str: """Saves schema to the storage and returns the path relative to storage. - If import schema path is configured and import schema with schena.name exits, it + If import schema path is configured and import schema with schema.name exits, it will be linked to `schema` via `_imported_version_hash`. Such hash is used in `load_schema` to detect if import schema changed and thus to overwrite the storage schema. @@ -74,9 +74,7 @@ def save_schema(self, schema: Schema) -> str: except FileNotFoundError: # just save the schema pass - path = self._save_schema(schema) - if self.config.export_schema_path: - self._export_schema(schema, self.config.export_schema_path) + path = self._save_and_export_schema(schema) return path def save_import_schema_if_not_exists(self, schema: Schema) -> bool: @@ -140,25 +138,32 @@ def _maybe_import_schema(self, name: str, storage_schema: DictStrAny = None) -> f" {rv_schema._imported_version_hash}" ) # if schema was imported, overwrite storage schema - self._save_schema(rv_schema) - if self.config.export_schema_path: - self._export_schema(rv_schema, self.config.export_schema_path) + self._save_and_export_schema(rv_schema, check_processing_hints=True) else: # import schema when imported schema was modified from the last import rv_schema = Schema.from_dict(storage_schema) i_s = Schema.from_dict(imported_schema) if i_s.version_hash != rv_schema._imported_version_hash: + logger.warning( + f"Schema {name} was present in schema storage at" + f" {self.storage.storage_path} but will be overwritten with imported schema" + f" version {i_s.version} and imported hash {i_s.version_hash}" + ) + tables_seen_data = rv_schema.data_tables(seen_data_only=True) + if tables_seen_data: + logger.warning( + f"Schema {name} in schema storage contains tables" + f" ({', '.join(t['name'] for t in tables_seen_data)}) that are present" + " in the destination. If you changed schema of those tables in import" + " schema, consider using one of the refresh options:" + " https://dlthub.com/devel/general-usage/pipeline#refresh-pipeline-data-and-state" + ) + rv_schema.replace_schema_content(i_s, link_to_replaced_schema=True) rv_schema._imported_version_hash = i_s.version_hash - logger.info( - f"Schema {name} was present in {self.storage.storage_path} but is" - f" overwritten with imported schema version {i_s.version} and" - f" imported hash {i_s.version_hash}" - ) + # if schema was imported, overwrite storage schema - self._save_schema(rv_schema) - if self.config.export_schema_path: - self._export_schema(rv_schema, self.config.export_schema_path) + self._save_and_export_schema(rv_schema, check_processing_hints=True) except FileNotFoundError: # no schema to import -> skip silently and return the original if storage_schema is None: @@ -204,7 +209,7 @@ def _export_schema( ) def _save_schema(self, schema: Schema) -> str: - # save a schema to schema store + """Saves schema to schema store and bumps the version""" schema_file = self._file_name_in_store(schema.name, "json") stored_schema = schema.to_dict() saved_path = self.storage.save(schema_file, to_pretty_json(stored_schema)) @@ -213,6 +218,30 @@ def _save_schema(self, schema: Schema) -> str: schema._bump_version() return saved_path + def _save_and_export_schema(self, schema: Schema, check_processing_hints: bool = False) -> str: + """Saves schema to schema store and then exports it. If the export path is the same as import + path, processing hints will be removed. + """ + saved_path = self._save_schema(schema) + if self.config.export_schema_path: + self._export_schema( + schema, + self.config.export_schema_path, + self.config.export_schema_path == self.config.import_schema_path, + ) + # if any processing hints are found we should warn the user + if check_processing_hints and (processing_hints := get_processing_hints(schema.tables)): + msg = ( + f"Imported schema {schema.name} contains processing hints for some tables." + " Processing hints are used by normalizer (x-normalizer) to mark tables that got" + " materialized and that prevents destructive changes to the schema. In most cases" + " import schema should not contain processing hints because it is mostly used to" + " initialize tables in a new dataset. " + ) + msg += "Affected tables are: " + ", ".join(processing_hints.keys()) + logger.warning(msg) + return saved_path + @staticmethod def load_schema_file( path: str, diff --git a/tests/common/storages/test_schema_storage.py b/tests/common/storages/test_schema_storage.py index 25427af105..091e876708 100644 --- a/tests/common/storages/test_schema_storage.py +++ b/tests/common/storages/test_schema_storage.py @@ -7,6 +7,7 @@ from dlt.common.normalizers.utils import explicit_normalizers from dlt.common.schema.schema import Schema from dlt.common.schema.typing import TStoredSchema +from dlt.common.schema.utils import remove_processing_hints from dlt.common.storages.exceptions import ( InStorageSchemaModified, SchemaNotFoundError, @@ -117,9 +118,11 @@ def test_skip_import_if_not_modified(synced_storage: SchemaStorage, storage: Sch _, new_table = storage_schema.coerce_row("event_user", None, row) storage_schema.update_table(new_table) assert storage_schema.is_modified + print("SAVE SCHEMA") storage.save_schema(storage_schema) assert not storage_schema.is_modified # now use synced storage to load schema again + print("LOAD SCHEMA") reloaded_schema = synced_storage.load_schema("ethereum") # the schema was not overwritten assert "event_user" in reloaded_schema.tables @@ -267,10 +270,10 @@ def test_save_store_schema_over_import(ie_storage: SchemaStorage) -> None: ie_storage.save_schema(schema) assert schema.version_hash == schema_hash # we linked schema to import schema - assert schema._imported_version_hash == IMPORTED_VERSION_HASH_ETH_V9 + assert schema._imported_version_hash == IMPORTED_VERSION_HASH_ETH_V9() # load schema and make sure our new schema is here schema = ie_storage.load_schema("ethereum") - assert schema._imported_version_hash == IMPORTED_VERSION_HASH_ETH_V9 + assert schema._imported_version_hash == IMPORTED_VERSION_HASH_ETH_V9() assert schema._stored_version_hash == schema_hash assert schema.version_hash == schema_hash assert schema.previous_hashes == [] @@ -287,7 +290,7 @@ def test_save_store_schema_over_import_sync(synced_storage: SchemaStorage) -> No schema = Schema("ethereum") schema_hash = schema.version_hash synced_storage.save_schema(schema) - assert schema._imported_version_hash == IMPORTED_VERSION_HASH_ETH_V9 + assert schema._imported_version_hash == IMPORTED_VERSION_HASH_ETH_V9() # import schema is overwritten fs = FileStorage(synced_storage.config.import_schema_path) exported_name = synced_storage._file_name_in_store("ethereum", "yaml") @@ -496,22 +499,23 @@ def test_new_live_schema_committed(live_storage: LiveSchemaStorage) -> None: # assert schema.settings["schema_sealed"] is True -def prepare_import_folder(storage: SchemaStorage) -> None: - shutil.copy( - yml_case_path("schemas/eth/ethereum_schema_v8"), - os.path.join(storage.storage.storage_path, "../import/ethereum.schema.yaml"), - ) +def prepare_import_folder(storage: SchemaStorage) -> Schema: + eth_V9 = load_yml_case("schemas/eth/ethereum_schema_v9") + # remove processing hints before installing as import schema + # ethereum schema is a "dirty" schema with processing hints + eth = Schema.from_dict(eth_V9, remove_processing_hints=True) + storage._export_schema(eth, os.path.join(storage.storage.storage_path, "../import/")) + return eth def assert_schema_imported(synced_storage: SchemaStorage, storage: SchemaStorage) -> Schema: prepare_import_folder(synced_storage) - eth_V9: TStoredSchema = load_yml_case("schemas/eth/ethereum_schema_v9") schema = synced_storage.load_schema("ethereum") # is linked to imported schema - schema._imported_version_hash = eth_V9["version_hash"] + schema._imported_version_hash = IMPORTED_VERSION_HASH_ETH_V9() # also was saved in storage assert synced_storage.has_schema("ethereum") - # and has link to imported schema s well (load without import) + # and has link to imported schema as well (load without import) schema = storage.load_schema("ethereum") - assert schema._imported_version_hash == eth_V9["version_hash"] + assert schema._imported_version_hash == IMPORTED_VERSION_HASH_ETH_V9() return schema diff --git a/tests/common/utils.py b/tests/common/utils.py index a234937e56..32741128b8 100644 --- a/tests/common/utils.py +++ b/tests/common/utils.py @@ -9,14 +9,24 @@ from dlt.common import json from dlt.common.typing import StrAny -from dlt.common.schema import utils +from dlt.common.schema import utils, Schema from dlt.common.schema.typing import TTableSchemaColumns from dlt.common.configuration.providers import environ as environ_provider COMMON_TEST_CASES_PATH = "./tests/common/cases/" -# for import schema tests, change when upgrading the schema version -IMPORTED_VERSION_HASH_ETH_V9 = "PgEHvn5+BHV1jNzNYpx9aDpq6Pq1PSSetufj/h0hKg4=" + + +def IMPORTED_VERSION_HASH_ETH_V9() -> str: + # for import schema tests, change when upgrading the schema version + eth_V9 = load_yml_case("schemas/eth/ethereum_schema_v9") + assert eth_V9["version_hash"] == "PgEHvn5+BHV1jNzNYpx9aDpq6Pq1PSSetufj/h0hKg4=" + # remove processing hints before installing as import schema + # ethereum schema is a "dirty" schema with processing hints + eth = Schema.from_dict(eth_V9, remove_processing_hints=True) + return eth.stored_version_hash + + # test sentry DSN TEST_SENTRY_DSN = ( "https://797678dd0af64b96937435326c7d30c1@o1061158.ingest.sentry.io/4504306172821504" diff --git a/tests/extract/test_decorators.py b/tests/extract/test_decorators.py index b706cedb1c..e554824dd7 100644 --- a/tests/extract/test_decorators.py +++ b/tests/extract/test_decorators.py @@ -42,7 +42,7 @@ ) from dlt.extract.items import TableNameMeta -from tests.common.utils import IMPORTED_VERSION_HASH_ETH_V9, load_yml_case +from tests.common.utils import load_yml_case def test_default_resource() -> None: From 4b72b77bbcabf55dc779eb0d0146e7558d6f8b50 Mon Sep 17 00:00:00 2001 From: Marcin Rudolf Date: Wed, 12 Jun 2024 22:02:45 +0200 Subject: [PATCH 047/105] allows to subclass INFO SCHEMA query generation and uses specialized big query override --- dlt/destinations/impl/bigquery/__init__.py | 1 + dlt/destinations/impl/bigquery/bigquery.py | 22 +++++++++- dlt/destinations/job_client_impl.py | 47 +++++++++++++++------- tests/load/test_job_client.py | 5 --- 4 files changed, 53 insertions(+), 22 deletions(-) diff --git a/dlt/destinations/impl/bigquery/__init__.py b/dlt/destinations/impl/bigquery/__init__.py index 60b8c0bfb6..7a4c2bb637 100644 --- a/dlt/destinations/impl/bigquery/__init__.py +++ b/dlt/destinations/impl/bigquery/__init__.py @@ -14,6 +14,7 @@ def capabilities() -> DestinationCapabilitiesContext: caps.has_case_sensitive_identifiers = ( True # there are case insensitive identifiers but dlt does not use them ) + caps.casefold_identifier = str # BQ limit is 4GB but leave a large headroom since buffered writer does not preemptively check size caps.recommended_file_size = int(1024 * 1024 * 1024) caps.format_datetime_literal = format_bigquery_datetime_literal diff --git a/dlt/destinations/impl/bigquery/bigquery.py b/dlt/destinations/impl/bigquery/bigquery.py index c2607202bc..3cdfc793a6 100644 --- a/dlt/destinations/impl/bigquery/bigquery.py +++ b/dlt/destinations/impl/bigquery/bigquery.py @@ -1,11 +1,10 @@ import functools import os from pathlib import Path -from typing import Any, ClassVar, Dict, List, Optional, Sequence, Tuple, Type, cast +from typing import Any, Dict, List, Optional, Sequence, Tuple, cast import google.cloud.bigquery as bigquery # noqa: I250 from google.api_core import exceptions as api_core_exceptions -from google.cloud import exceptions as gcp_exceptions from google.api_core import retry from google.cloud.bigquery.retry import _RETRYABLE_REASONS @@ -371,6 +370,25 @@ def prepare_load_table( ) return table + def _get_info_schema_columns_query( + self, catalog_name: Optional[str], schema_name: str, folded_table_names: List[str] + ) -> Tuple[str, List[Any]]: + """Bigquery needs to scope the INFORMATION_SCHEMA.COLUMNS with project and dataset name so standard query generator cannot be used.""" + # escape schema and catalog names + catalog_name = self.capabilities.escape_identifier(catalog_name) + schema_name = self.capabilities.escape_identifier(schema_name) + + query = f""" +SELECT {",".join(self._get_storage_table_query_columns())} + FROM {catalog_name}.{schema_name}.INFORMATION_SCHEMA.COLUMNS +WHERE """ + + # placeholder for each table + table_placeholders = ",".join(["%s"] * len(folded_table_names)) + query += f"table_name IN ({table_placeholders}) ORDER BY table_name, ordinal_position;" + + return query, folded_table_names + def _get_column_def_sql(self, column: TColumnSchema, table_format: TTableFormat = None) -> str: name = self.sql_client.escape_column_name(column["name"]) column_def_sql = ( diff --git a/dlt/destinations/job_client_impl.py b/dlt/destinations/job_client_impl.py index c87cc2969f..5f89857871 100644 --- a/dlt/destinations/job_client_impl.py +++ b/dlt/destinations/job_client_impl.py @@ -321,22 +321,10 @@ def get_storage_tables( f"One or more of tables in {table_names} after applying" f" {self.capabilities.casefold_identifier} produced a clashing name." ) - query = f""" -SELECT {",".join(self._get_storage_table_query_columns())} - FROM INFORMATION_SCHEMA.COLUMNS -WHERE """ - db_params = [] - if catalog_name: - db_params.append(catalog_name) - query += "table_catalog = %s AND " - db_params.append(schema_name) - db_params = db_params + list(name_lookup.keys()) - # placeholder for each table - table_placeholders = ",".join(["%s"] * len(table_names)) - query += ( - f"table_schema = %s AND table_name IN ({table_placeholders}) ORDER BY table_name," - " ordinal_position;" + # rows = self.sql_client.execute_sql(query, *db_params) + query, db_params = self._get_info_schema_columns_query( + catalog_name, schema_name, folded_table_names ) rows = self.sql_client.execute_sql(query, *db_params) prev_table: str = None @@ -430,6 +418,35 @@ def get_stored_schema_by_hash(self, version_hash: str) -> StorageSchemaInfo: ) return self._row_to_schema_info(query, version_hash) + def _get_info_schema_columns_query( + self, catalog_name: Optional[str], schema_name: str, folded_table_names: List[str] + ) -> Tuple[str, List[Any]]: + """Generates SQL to query INFORMATION_SCHEMA.COLUMNS for a set of tables in `folded_table_names`. Input identifiers must be already + in a form that can be passed to a query via db_params. `catalogue_name` is optional and when None, the part of query selecting it + is skipped. + + Returns: query and list of db_params tuple + """ + query = f""" +SELECT {",".join(self._get_storage_table_query_columns())} + FROM INFORMATION_SCHEMA.COLUMNS +WHERE """ + + db_params = [] + if catalog_name: + db_params.append(catalog_name) + query += "table_catalog = %s AND " + db_params.append(schema_name) + db_params = db_params + folded_table_names + # placeholder for each table + table_placeholders = ",".join(["%s"] * len(folded_table_names)) + query += ( + f"table_schema = %s AND table_name IN ({table_placeholders}) ORDER BY table_name," + " ordinal_position;" + ) + + return query, db_params + def _get_storage_table_query_columns(self) -> List[str]: """Column names used when querying table from information schema. Override for databases that use different namings. diff --git a/tests/load/test_job_client.py b/tests/load/test_job_client.py index a601cd234c..f27cb52788 100644 --- a/tests/load/test_job_client.py +++ b/tests/load/test_job_client.py @@ -45,11 +45,6 @@ from tests.load.pipeline.utils import destinations_configs, DestinationTestConfiguration -# @pytest.fixture(autouse=True) -# def set_environ(): -# os.environ["SCHEMA__NAMING"] = "sql_upper" - - @pytest.fixture def file_storage() -> FileStorage: return FileStorage(TEST_STORAGE_ROOT, file_type="b", makedirs=True) From ab39e06132e41c38d1b7e05136d274ca5a6cd57f Mon Sep 17 00:00:00 2001 From: Marcin Rudolf Date: Wed, 12 Jun 2024 22:03:19 +0200 Subject: [PATCH 048/105] uses correct schema escaping function in sql jobs --- dlt/destinations/sql_jobs.py | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/dlt/destinations/sql_jobs.py b/dlt/destinations/sql_jobs.py index 7168e5d815..c5b1c72df2 100644 --- a/dlt/destinations/sql_jobs.py +++ b/dlt/destinations/sql_jobs.py @@ -361,10 +361,8 @@ def gen_merge_sql( sql: List[str] = [] root_table = table_chain[0] - escape_id = sql_client.capabilities.escape_identifier + escape_column_id = sql_client.escape_column_name escape_lit = sql_client.capabilities.escape_literal - if escape_id is None: - escape_id = DestinationCapabilitiesContext.generic_capabilities().escape_identifier if escape_lit is None: escape_lit = DestinationCapabilitiesContext.generic_capabilities().escape_literal @@ -376,13 +374,13 @@ def gen_merge_sql( # get merge and primary keys from top level primary_keys = list( map( - escape_id, + escape_column_id, get_columns_names_with_prop(root_table, "primary_key"), ) ) merge_keys = list( map( - escape_id, + escape_column_id, get_columns_names_with_prop(root_table, "merge_key"), ) ) @@ -419,7 +417,7 @@ def gen_merge_sql( f" {root_table['name']} so it is not possible to link child tables to it.", ) # get first unique column - unique_column = escape_id(unique_columns[0]) + unique_column = escape_column_id(unique_columns[0]) # create temp table with unique identifier create_delete_temp_table_sql, delete_temp_table_name = ( cls.gen_delete_temp_table_sql( @@ -442,7 +440,7 @@ def gen_merge_sql( f" {table['name']} so it is not possible to refer to top level table" f" {root_table['name']} unique column {unique_column}", ) - root_key_column = escape_id(root_key_columns[0]) + root_key_column = escape_column_id(root_key_columns[0]) sql.append( cls.gen_delete_from_sql( table_name, root_key_column, delete_temp_table_name, unique_column @@ -461,10 +459,10 @@ def gen_merge_sql( hard_delete_col = get_first_column_name_with_prop(root_table, "hard_delete") if hard_delete_col is not None: # any value indicates a delete for non-boolean columns - not_deleted_cond = f"{escape_id(hard_delete_col)} IS NULL" + not_deleted_cond = f"{escape_column_id(hard_delete_col)} IS NULL" if root_table["columns"][hard_delete_col]["data_type"] == "bool": # only True values indicate a delete for boolean columns - not_deleted_cond += f" OR {escape_id(hard_delete_col)} = {escape_lit(False)}" + not_deleted_cond += f" OR {escape_column_id(hard_delete_col)} = {escape_lit(False)}" # get dedup sort information dedup_sort = get_dedup_sort_tuple(root_table) @@ -503,7 +501,7 @@ def gen_merge_sql( uniq_column = unique_column if table.get("parent") is None else root_key_column insert_cond = f"{uniq_column} IN (SELECT * FROM {insert_temp_table_name})" - columns = list(map(escape_id, get_columns_names_with_prop(table, "name"))) + columns = list(map(escape_column_id, get_columns_names_with_prop(table, "name"))) col_str = ", ".join(columns) select_sql = f"SELECT {col_str} FROM {staging_table_name} WHERE {insert_cond}" if len(primary_keys) > 0 and len(table_chain) == 1: @@ -534,9 +532,11 @@ def gen_scd2_sql( # get column names caps = sql_client.capabilities - escape_id = caps.escape_identifier - from_, to = list(map(escape_id, get_validity_column_names(root_table))) # validity columns - hash_ = escape_id( + escape_column_id = sql_client.escape_column_name + from_, to = list( + map(escape_column_id, get_validity_column_names(root_table)) + ) # validity columns + hash_ = escape_column_id( get_first_column_name_with_prop(root_table, "x-row-version") ) # row hash column @@ -568,7 +568,7 @@ def gen_scd2_sql( """) # insert new active records in root table - columns = map(escape_id, list(root_table["columns"].keys())) + columns = map(escape_column_id, list(root_table["columns"].keys())) col_str = ", ".join([c for c in columns if c not in (from_, to)]) sql.append(f""" INSERT INTO {root_table_name} ({col_str}, {from_}, {to}) @@ -592,7 +592,7 @@ def gen_scd2_sql( " it is not possible to link child tables to it.", ) # get first unique column - unique_column = escape_id(unique_columns[0]) + unique_column = escape_column_id(unique_columns[0]) # TODO: - based on deterministic child hashes (OK) # - if row hash changes all is right # - if it does not we only capture new records, while we should replace existing with those in stage From 2ae3ad28917d72f2514d79dfc6593b93e4363f57 Mon Sep 17 00:00:00 2001 From: Marcin Rudolf Date: Thu, 13 Jun 2024 00:11:17 +0200 Subject: [PATCH 049/105] passes pipeline state to package state via extract --- dlt/common/destination/reference.py | 2 +- dlt/common/pipeline.py | 3 --- dlt/pipeline/state_sync.py | 4 ++-- 3 files changed, 3 insertions(+), 6 deletions(-) diff --git a/dlt/common/destination/reference.py b/dlt/common/destination/reference.py index d83729ea36..1d97dd20f0 100644 --- a/dlt/common/destination/reference.py +++ b/dlt/common/destination/reference.py @@ -68,7 +68,7 @@ class StateInfo(NamedTuple): pipeline_name: str state: str created_at: datetime.datetime - dlt_load_id: str = None + dlt_load_id: str @configspec diff --git a/dlt/common/pipeline.py b/dlt/common/pipeline.py index 6cefdd9e6c..c6ee27e58b 100644 --- a/dlt/common/pipeline.py +++ b/dlt/common/pipeline.py @@ -260,9 +260,6 @@ def asstr(self, verbosity: int = 0) -> str: return self._load_packages_asstr(self.load_packages, verbosity) -# reveal_type(ExtractInfo) - - class NormalizeMetrics(StepMetrics): job_metrics: Dict[str, DataWriterMetrics] """Metrics collected per job id during writing of job file""" diff --git a/dlt/pipeline/state_sync.py b/dlt/pipeline/state_sync.py index 980dc1ebe4..d0164946f7 100644 --- a/dlt/pipeline/state_sync.py +++ b/dlt/pipeline/state_sync.py @@ -98,8 +98,8 @@ def state_doc(state: TPipelineState, load_id: str = None) -> TPipelineStateDoc: return doc -def state_resource(state: TPipelineState) -> Tuple[DltResource, TPipelineStateDoc]: - doc = state_doc(state) +def state_resource(state: TPipelineState, load_id: str) -> Tuple[DltResource, TPipelineStateDoc]: + doc = state_doc(state, load_id) return ( dlt.resource( [doc], From 09b7731b8b598dbedf3adfd9a497d715d2e271de Mon Sep 17 00:00:00 2001 From: Marcin Rudolf Date: Thu, 13 Jun 2024 00:12:07 +0200 Subject: [PATCH 050/105] fixes optional normalizers module --- dlt/common/normalizers/utils.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/dlt/common/normalizers/utils.py b/dlt/common/normalizers/utils.py index 00b6278087..24f70cc327 100644 --- a/dlt/common/normalizers/utils.py +++ b/dlt/common/normalizers/utils.py @@ -42,9 +42,8 @@ def import_normalizers( """ # add defaults to normalizer_config normalizers_config["names"] = names = normalizers_config["names"] or "snake_case" - normalizers_config["json"] = item_normalizer = normalizers_config.get("json") or { - "module": "dlt.common.normalizers.json.relational" - } + normalizers_config["json"] = item_normalizer = normalizers_config.get("json") or {} + item_normalizer.setdefault("module", "dlt.common.normalizers.json.relational") json_module = cast(SupportsDataItemNormalizer, import_module(item_normalizer["module"])) return ( From cfd3e5f928fa52e332a75f7527d10371c4888cf4 Mon Sep 17 00:00:00 2001 From: Marcin Rudolf Date: Thu, 13 Jun 2024 00:14:49 +0200 Subject: [PATCH 051/105] excludes version_hash from pipeline state SELECT --- dlt/destinations/job_client_impl.py | 13 +++++++++---- tests/pipeline/test_dlt_versions.py | 12 +++++++++++- 2 files changed, 20 insertions(+), 5 deletions(-) diff --git a/dlt/destinations/job_client_impl.py b/dlt/destinations/job_client_impl.py index 5f89857871..3d384b2c28 100644 --- a/dlt/destinations/job_client_impl.py +++ b/dlt/destinations/job_client_impl.py @@ -29,7 +29,6 @@ from dlt.common.schema.utils import ( loads_table, normalize_table_identifiers, - pipeline_state_table, version_table, ) from dlt.common.storages import FileStorage @@ -55,7 +54,11 @@ from dlt.destinations.sql_jobs import SqlMergeJob, SqlStagingCopyJob from dlt.destinations.typing import TNativeConn from dlt.destinations.sql_client import SqlClientBase -from dlt.destinations.utils import info_schema_null_to_bool, verify_sql_job_client_schema +from dlt.destinations.utils import ( + get_pipeline_state_query_columns, + info_schema_null_to_bool, + verify_sql_job_client_schema, +) # this should suffice for now DDL_COMMANDS = ["ALTER", "CREATE", "DROP"] @@ -145,7 +148,9 @@ def __init__( self.loads_table_schema_columns = ", ".join( sql_client.escape_column_name(col) for col in loads_table_["columns"] ) - state_table_ = normalize_table_identifiers(pipeline_state_table(), schema.naming) + state_table_ = normalize_table_identifiers( + get_pipeline_state_query_columns(), schema.naming + ) self.state_table_columns = ", ".join( sql_client.escape_column_name(col) for col in state_table_["columns"] ) @@ -402,7 +407,7 @@ def get_stored_state(self, pipeline_name: str) -> StateInfo: row = cur.fetchone() if not row: return None - return StateInfo(row[0], row[1], row[2], row[3], pendulum.instance(row[4])) + return StateInfo(row[0], row[1], row[2], row[3], pendulum.instance(row[4]), row[5]) def _norm_and_escape_columns(self, *columns: str) -> Iterator[str]: return map( diff --git a/tests/pipeline/test_dlt_versions.py b/tests/pipeline/test_dlt_versions.py index 26eae36dd6..ae424babca 100644 --- a/tests/pipeline/test_dlt_versions.py +++ b/tests/pipeline/test_dlt_versions.py @@ -1,3 +1,4 @@ +from subprocess import CalledProcessError import sys import pytest import tempfile @@ -103,7 +104,16 @@ def test_pipeline_with_dlt_update(test_storage: FileStorage) -> None: # execute in current version venv = Venv.restore_current() # load all issues - print(venv.run_script("../tests/pipeline/cases/github_pipeline/github_pipeline.py")) + try: + print( + venv.run_script( + "../tests/pipeline/cases/github_pipeline/github_pipeline.py" + ) + ) + except CalledProcessError as cpe: + print(f"script stdout: {cpe.stdout}") + print(f"script stderr: {cpe.stderr}") + raise # hash hash in schema github_schema = json.loads( test_storage.load( From 0edbbfd5ce9a4070cde3e0a59ccd3f5ac818ebaa Mon Sep 17 00:00:00 2001 From: Marcin Rudolf Date: Thu, 13 Jun 2024 00:15:44 +0200 Subject: [PATCH 052/105] passes pipeline state to package state pt.2 --- dlt/extract/extract.py | 16 +++++++++------- dlt/pipeline/pipeline.py | 24 ++++++++++++++++-------- 2 files changed, 25 insertions(+), 15 deletions(-) diff --git a/dlt/extract/extract.py b/dlt/extract/extract.py index f8966c3ced..5769be1a8d 100644 --- a/dlt/extract/extract.py +++ b/dlt/extract/extract.py @@ -170,6 +170,9 @@ def add_item(item: Any) -> bool: class Extract(WithStepInfo[ExtractMetrics, ExtractInfo]): + original_data: Any + """Original data from which the extracted DltSource was created. Will be used to describe in extract info""" + def __init__( self, schema_storage: SchemaStorage, @@ -181,6 +184,7 @@ def __init__( self.collector = collector self.schema_storage = schema_storage self.extract_storage = ExtractStorage(normalize_storage_config) + # TODO: this should be passed together with DltSource to extract() self.original_data: Any = original_data super().__init__() @@ -370,7 +374,9 @@ def extract( load_package_state_update: Optional[Dict[str, Any]] = None, ) -> str: # generate load package to be able to commit all the sources together later - load_id = self.extract_storage.create_load_package(source.discover_schema()) + load_id = self.extract_storage.create_load_package( + source.discover_schema(), reuse_exiting_package=True + ) with Container().injectable_context( SourceSchemaInjectableContext(source.schema) ), Container().injectable_context( @@ -405,14 +411,10 @@ def extract( commit_load_package_state() return load_id - def commit_packages(self, pipline_state_doc: TPipelineStateDoc = None) -> None: - """Commits all extracted packages to normalize storage, and adds the pipeline state to the load package""" + def commit_packages(self) -> None: + """Commits all extracted packages to normalize storage""" # commit load packages for load_id, metrics in self._load_id_metrics.items(): - if pipline_state_doc: - package_state = self.extract_storage.new_packages.get_load_package_state(load_id) - package_state["pipeline_state"] = {**pipline_state_doc, "dlt_load_id": load_id} - self.extract_storage.new_packages.save_load_package_state(load_id, package_state) self.extract_storage.commit_new_load_package( load_id, self.schema_storage[metrics[0]["schema_name"]] ) diff --git a/dlt/pipeline/pipeline.py b/dlt/pipeline/pipeline.py index b399164ea0..fd52ffb359 100644 --- a/dlt/pipeline/pipeline.py +++ b/dlt/pipeline/pipeline.py @@ -448,14 +448,13 @@ def extract( refresh=refresh or self.refresh, ) # extract state - state: TPipelineStateDoc = None if self.config.restore_from_destination: # this will update state version hash so it will not be extracted again by with_state_sync - state = self._bump_version_and_extract_state( + self._bump_version_and_extract_state( self._container[StateInjectableContext].state, True, extract_step ) # commit load packages with state - extract_step.commit_packages(state) + extract_step.commit_packages() return self._get_step_info(extract_step) except Exception as exc: # emit step info @@ -1604,13 +1603,22 @@ def _bump_version_and_extract_state( _, hash_, _ = bump_pipeline_state_version_if_modified(self._props_to_state(state)) should_extract = hash_ != state["_local"].get("_last_extracted_hash") if should_extract and extract_state: - data, doc = state_resource(state) - extract_ = extract or Extract( - self._schema_storage, self._normalize_storage_config(), original_data=data + extract_ = extract or Extract(self._schema_storage, self._normalize_storage_config()) + # create or get load package upfront to get load_id to create state doc + schema = schema or self.default_schema + # note that we preferably retrieve existing package for `schema` + # same thing happens in extract_.extract so the load_id is preserved + load_id = extract_.extract_storage.create_load_package( + schema, reuse_exiting_package=True ) + data, doc = state_resource(state, load_id) + extract_.original_data = data + # append pipeline state to package state + load_package_state_update = load_package_state_update or {} + load_package_state_update["pipeline_state"] = doc self._extract_source( extract_, - data_to_sources(data, self, schema or self.default_schema)[0], + data_to_sources(data, self, schema)[0], 1, 1, load_package_state_update=load_package_state_update, @@ -1619,7 +1627,7 @@ def _bump_version_and_extract_state( mark_state_extracted(state, hash_) # commit only if we created storage if not extract: - extract_.commit_packages(doc) + extract_.commit_packages() return doc return None From 5769ba1675de0dbafdaec58a7a84de5b43442c25 Mon Sep 17 00:00:00 2001 From: Marcin Rudolf Date: Thu, 13 Jun 2024 00:16:01 +0200 Subject: [PATCH 053/105] re-enables sentry tests --- tests/.dlt/config.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/.dlt/config.toml b/tests/.dlt/config.toml index ec0df4fc15..ba86edf417 100644 --- a/tests/.dlt/config.toml +++ b/tests/.dlt/config.toml @@ -1,5 +1,5 @@ -# [runtime] -# sentry_dsn="https://6f6f7b6f8e0f458a89be4187603b55fe@o1061158.ingest.sentry.io/4504819859914752" +[runtime] +sentry_dsn="https://6f6f7b6f8e0f458a89be4187603b55fe@o1061158.ingest.sentry.io/4504819859914752" [tests] bucket_url_gs="gs://ci-test-bucket" From 1f17a44586e2c32583eac2f16c921c221194bd8b Mon Sep 17 00:00:00 2001 From: Marcin Rudolf Date: Thu, 13 Jun 2024 00:16:59 +0200 Subject: [PATCH 054/105] bumps qdrant client, makes test running for local version --- dlt/destinations/impl/qdrant/configuration.py | 4 +- dlt/destinations/impl/qdrant/qdrant_client.py | 100 +++--- poetry.lock | 326 +++++++++++++----- pyproject.toml | 6 +- tests/load/qdrant/test_pipeline.py | 2 + tests/load/qdrant/utils.py | 25 +- 6 files changed, 316 insertions(+), 147 deletions(-) diff --git a/dlt/destinations/impl/qdrant/configuration.py b/dlt/destinations/impl/qdrant/configuration.py index fd11cc7dcb..4d1ed1234d 100644 --- a/dlt/destinations/impl/qdrant/configuration.py +++ b/dlt/destinations/impl/qdrant/configuration.py @@ -18,6 +18,8 @@ class QdrantCredentials(CredentialsConfiguration): location: Optional[str] = None # API key for authentication in Qdrant Cloud. Default: `None` api_key: Optional[str] = None + # Persistence path for QdrantLocal. Default: `None` + path: Optional[str] = None def __str__(self) -> str: return self.location or "localhost" @@ -44,7 +46,7 @@ class QdrantClientOptions(BaseConfiguration): # Default: `None` host: Optional[str] = None # Persistence path for QdrantLocal. Default: `None` - path: Optional[str] = None + # path: Optional[str] = None @configspec diff --git a/dlt/destinations/impl/qdrant/qdrant_client.py b/dlt/destinations/impl/qdrant/qdrant_client.py index 83fdb8001b..03233f7792 100644 --- a/dlt/destinations/impl/qdrant/qdrant_client.py +++ b/dlt/destinations/impl/qdrant/qdrant_client.py @@ -7,8 +7,9 @@ from dlt.common.schema import Schema, TTableSchema, TSchemaTables from dlt.common.schema.utils import ( get_columns_names_with_prop, + loads_table, normalize_table_identifiers, - pipeline_state_table, + version_table, ) from dlt.common.destination import DestinationCapabilitiesContext from dlt.common.destination.reference import TLoadJobState, LoadJob, JobClientBase, WithStateSync @@ -18,7 +19,7 @@ from dlt.destinations.job_impl import EmptyLoadJob from dlt.destinations.job_client_impl import StorageSchemaInfo, StateInfo -from dlt.destinations.impl.qdrant import capabilities +from dlt.destinations.utils import get_pipeline_state_query_columns from dlt.destinations.impl.qdrant.configuration import QdrantClientConfiguration from dlt.destinations.impl.qdrant.qdrant_adapter import VECTORIZE_HINT @@ -54,21 +55,24 @@ def __init__( if self.unique_identifiers else uuid.uuid4() ) - embedding_doc = self._get_embedding_doc(data) payloads.append(data) ids.append(point_id) - docs.append(embedding_doc) - - embedding_model = db_client._get_or_init_model(db_client.embedding_model_name) - embeddings = list( - embedding_model.embed( - docs, - batch_size=self.config.embedding_batch_size, - parallel=self.config.embedding_parallelism, + if len(self.embedding_fields) > 0: + docs.append(self._get_embedding_doc(data)) + + if len(self.embedding_fields) > 0: + embedding_model = db_client._get_or_init_model(db_client.embedding_model_name) + embeddings = list( + embedding_model.embed( + docs, + batch_size=self.config.embedding_batch_size, + parallel=self.config.embedding_parallelism, + ) ) - ) - vector_name = db_client.get_vector_field_name() - embeddings = [{vector_name: embedding.tolist()} for embedding in embeddings] + vector_name = db_client.get_vector_field_name() + embeddings = [{vector_name: embedding.tolist()} for embedding in embeddings] + else: + embeddings = [{}] * len(ids) assert len(embeddings) == len(payloads) == len(ids) self._upload_data(vectors=embeddings, ids=ids, payloads=payloads) @@ -153,21 +157,18 @@ def __init__( capabilities: DestinationCapabilitiesContext, ) -> None: super().__init__(schema, config, capabilities) - self.version_collection_properties = list( - schema.get_table_columns(schema.version_table_name).keys() - ) - self.loads_collection_properties = list( - schema.get_table_columns(schema.loads_table_name).keys() + # get definitions of the dlt tables, normalize column names and keep for later use + version_table_ = normalize_table_identifiers(version_table(), schema.naming) + self.version_collection_properties = list(version_table_["columns"].keys()) + loads_table_ = normalize_table_identifiers(loads_table(), schema.naming) + self.loads_collection_properties = list(loads_table_["columns"].keys()) + state_table_ = normalize_table_identifiers( + get_pipeline_state_query_columns(), schema.naming ) - # get definition of state table (may not be present in the schema) - state_table = schema.tables.get( - schema.state_table_name, - normalize_table_identifiers(pipeline_state_table(), schema.naming), - ) - # column names are pipeline properties - self.pipeline_state_properties = list(state_table["columns"].keys()) + self.pipeline_state_properties = list(state_table_["columns"].keys()) + self.config: QdrantClientConfiguration = config - self.db_client: QC = QdrantClient._create_db_client(config) + self.db_client: QC = None self.model = config.model @property @@ -229,6 +230,8 @@ def _create_collection(self, full_collection_name: str) -> None: self.db_client.create_collection( collection_name=full_collection_name, vectors_config=vectors_config ) + # TODO: we can use index hints to create indexes on properties or full text + # self.db_client.create_payload_index(full_collection_name, "_dlt_load_id", field_type="float") def _create_point_no_vector(self, obj: Dict[str, Any], collection_name: str) -> None: """Inserts a point into a Qdrant collection without a vector. @@ -326,14 +329,11 @@ def get_stored_state(self, pipeline_name: str) -> Optional[StateInfo]: """ # normalize property names p_load_id = self.schema.naming.normalize_identifier("load_id") + p_dlt_load_id = self.schema.naming.normalize_identifier("_dlt_load_id") p_pipeline_name = self.schema.naming.normalize_identifier("pipeline_name") + # p_created_at = self.schema.naming.normalize_identifier("created_at") - # this works only because we create points that have no vectors - # with decreasing ids. so newest (lowest ids) go first - # TODO: this does not work because we look for state first and state has UUID4 - # TODO: look for 10 last load ids and find the state associated with them - - limit = 10 + limit = 100 offset = None while True: try: @@ -350,14 +350,20 @@ def get_stored_state(self, pipeline_name: str) -> Optional[StateInfo]: ) ] ), + # search by package load id which is guaranteed to increase over time + # order_by=models.OrderBy( + # key=p_created_at, + # # direction=models.Direction.DESC, + # ), limit=limit, offset=offset, ) + # print("state_r", state_records) if len(state_records) == 0: return None for state_record in state_records: state = state_record.payload - load_id = state["_dlt_load_id"] + load_id = state[p_dlt_load_id] scroll_table_name = self._make_qualified_collection_name( self.schema.loads_table_name ) @@ -373,7 +379,7 @@ def get_stored_state(self, pipeline_name: str) -> Optional[StateInfo]: ), ) if load_records.count > 0: - state["dlt_load_id"] = state.pop("_dlt_load_id") + state["dlt_load_id"] = state.pop(p_dlt_load_id) return StateInfo(**state) except Exception: return None @@ -385,6 +391,9 @@ def get_stored_schema(self) -> Optional[StorageSchemaInfo]: p_schema_name = self.schema.naming.normalize_identifier("schema_name") # this works only because we create points that have no vectors # with decreasing ids. so newest (lowest ids) go first + # we do not use order_by because it requires and index to be created + # and this behavior is different for local and cloud qdrant + # p_inserted_at = self.schema.naming.normalize_identifier("inserted_at") response = self.db_client.scroll( scroll_table_name, with_payload=True, @@ -397,6 +406,10 @@ def get_stored_schema(self) -> Optional[StorageSchemaInfo]: ] ), limit=1, + # order_by=models.OrderBy( + # key=p_inserted_at, + # direction=models.Direction.DESC, + # ) ) record = response[0][0].payload return StorageSchemaInfo(**record) @@ -437,13 +450,14 @@ def restore_file_load(self, file_path: str) -> LoadJob: return EmptyLoadJob.from_file_path(file_path, "completed") def complete_load(self, load_id: str) -> None: - values = [load_id, self.schema.name, 0, str(pendulum.now())] + values = [load_id, self.schema.name, 0, str(pendulum.now()), self.schema.version_hash] assert len(values) == len(self.loads_collection_properties) properties = {k: v for k, v in zip(self.loads_collection_properties, values)} loads_table_name = self._make_qualified_collection_name(self.schema.loads_table_name) self._create_point_no_vector(properties, loads_table_name) def __enter__(self) -> "QdrantClient": + self.db_client = QdrantClient._create_db_client(self.config) return self def __exit__( @@ -452,16 +466,18 @@ def __exit__( exc_val: BaseException, exc_tb: TracebackType, ) -> None: - pass + if self.db_client: + self.db_client.close() + self.db_client = None def _update_schema_in_storage(self, schema: Schema) -> None: schema_str = json.dumps(schema.to_dict()) values = [ - schema.stored_version_hash, - schema.name, schema.version, schema.ENGINE_VERSION, - str(pendulum.now()), + str(pendulum.now().isoformat()), + schema.name, + schema.stored_version_hash, schema_str, ] assert len(values) == len(self.version_collection_properties) @@ -488,6 +504,10 @@ def _collection_exists(self, table_name: str, qualify_table_name: bool = True) - ) self.db_client.get_collection(table_name) return True + except ValueError as e: + if "not found" in str(e): + return False + raise e except UnexpectedResponse as e: if e.status_code == 404: return False diff --git a/poetry.lock b/poetry.lock index 31c9fd08ce..aa0129cad1 100644 --- a/poetry.lock +++ b/poetry.lock @@ -2658,21 +2658,27 @@ test = ["pytest (>=6)"] [[package]] name = "fastembed" -version = "0.1.1" +version = "0.2.6" description = "Fast, light, accurate library built for retrieval embedding generation" optional = true -python-versions = ">=3.8.0,<3.12" +python-versions = "<3.13,>=3.8.0" files = [ - {file = "fastembed-0.1.1-py3-none-any.whl", hash = "sha256:131413ae52cd72f4c8cced7a675f8269dbfd1a852abade3c815e265114bcc05a"}, - {file = "fastembed-0.1.1.tar.gz", hash = "sha256:f7e524ee4f74bb8aad16be5b687d1f77f608d40e96e292c87881dc36baf8f4c7"}, + {file = "fastembed-0.2.6-py3-none-any.whl", hash = "sha256:3e18633291722087abebccccd7fcdffafef643cb22d203370d7fad4fa83c10fb"}, + {file = "fastembed-0.2.6.tar.gz", hash = "sha256:adaed5b46e19cc1bbe5f98f2b3ffecfc4d2a48d27512e28ff5bfe92a42649a66"}, ] [package.dependencies] -onnx = ">=1.11,<2.0" -onnxruntime = ">=1.15,<2.0" +huggingface-hub = ">=0.20,<0.21" +loguru = ">=0.7.2,<0.8.0" +numpy = [ + {version = ">=1.21", markers = "python_version < \"3.12\""}, + {version = ">=1.26", markers = "python_version >= \"3.12\""}, +] +onnx = ">=1.15.0,<2.0.0" +onnxruntime = ">=1.17.0,<2.0.0" requests = ">=2.31,<3.0" -tokenizers = ">=0.13,<0.14" -tqdm = ">=4.65,<5.0" +tokenizers = ">=0.15.1,<0.16.0" +tqdm = ">=4.66,<5.0" [[package]] name = "filelock" @@ -4082,6 +4088,38 @@ cli = ["click (==8.*)", "pygments (==2.*)", "rich (>=10,<14)"] http2 = ["h2 (>=3,<5)"] socks = ["socksio (==1.*)"] +[[package]] +name = "huggingface-hub" +version = "0.20.3" +description = "Client library to download and publish models, datasets and other repos on the huggingface.co hub" +optional = true +python-versions = ">=3.8.0" +files = [ + {file = "huggingface_hub-0.20.3-py3-none-any.whl", hash = "sha256:d988ae4f00d3e307b0c80c6a05ca6dbb7edba8bba3079f74cda7d9c2e562a7b6"}, + {file = "huggingface_hub-0.20.3.tar.gz", hash = "sha256:94e7f8e074475fbc67d6a71957b678e1b4a74ff1b64a644fd6cbb83da962d05d"}, +] + +[package.dependencies] +filelock = "*" +fsspec = ">=2023.5.0" +packaging = ">=20.9" +pyyaml = ">=5.1" +requests = "*" +tqdm = ">=4.42.1" +typing-extensions = ">=3.7.4.3" + +[package.extras] +all = ["InquirerPy (==0.3.4)", "Jinja2", "Pillow", "aiohttp", "gradio", "jedi", "mypy (==1.5.1)", "numpy", "pydantic (>1.1,<2.0)", "pydantic (>1.1,<3.0)", "pytest", "pytest-asyncio", "pytest-cov", "pytest-env", "pytest-rerunfailures", "pytest-vcr", "pytest-xdist", "ruff (>=0.1.3)", "soundfile", "types-PyYAML", "types-requests", "types-simplejson", "types-toml", "types-tqdm", "types-urllib3", "typing-extensions (>=4.8.0)", "urllib3 (<2.0)"] +cli = ["InquirerPy (==0.3.4)"] +dev = ["InquirerPy (==0.3.4)", "Jinja2", "Pillow", "aiohttp", "gradio", "jedi", "mypy (==1.5.1)", "numpy", "pydantic (>1.1,<2.0)", "pydantic (>1.1,<3.0)", "pytest", "pytest-asyncio", "pytest-cov", "pytest-env", "pytest-rerunfailures", "pytest-vcr", "pytest-xdist", "ruff (>=0.1.3)", "soundfile", "types-PyYAML", "types-requests", "types-simplejson", "types-toml", "types-tqdm", "types-urllib3", "typing-extensions (>=4.8.0)", "urllib3 (<2.0)"] +fastai = ["fastai (>=2.4)", "fastcore (>=1.3.27)", "toml"] +inference = ["aiohttp", "pydantic (>1.1,<2.0)", "pydantic (>1.1,<3.0)"] +quality = ["mypy (==1.5.1)", "ruff (>=0.1.3)"] +tensorflow = ["graphviz", "pydot", "tensorflow"] +testing = ["InquirerPy (==0.3.4)", "Jinja2", "Pillow", "aiohttp", "gradio", "jedi", "numpy", "pydantic (>1.1,<2.0)", "pydantic (>1.1,<3.0)", "pytest", "pytest-asyncio", "pytest-cov", "pytest-env", "pytest-rerunfailures", "pytest-vcr", "pytest-xdist", "soundfile", "urllib3 (<2.0)"] +torch = ["torch"] +typing = ["types-PyYAML", "types-requests", "types-simplejson", "types-toml", "types-tqdm", "types-urllib3", "typing-extensions (>=4.8.0)"] + [[package]] name = "humanfriendly" version = "10.0" @@ -4535,6 +4573,24 @@ sqlalchemy = ["sqlalchemy"] test = ["mock", "pytest", "pytest-cov (<2.6)"] zmq = ["pyzmq"] +[[package]] +name = "loguru" +version = "0.7.2" +description = "Python logging made (stupidly) simple" +optional = true +python-versions = ">=3.5" +files = [ + {file = "loguru-0.7.2-py3-none-any.whl", hash = "sha256:003d71e3d3ed35f0f8984898359d65b79e5b21943f78af86aa5491210429b8eb"}, + {file = "loguru-0.7.2.tar.gz", hash = "sha256:e671a53522515f34fd406340ee968cb9ecafbc4b36c679da03c18fd8d0bd51ac"}, +] + +[package.dependencies] +colorama = {version = ">=0.3.4", markers = "sys_platform == \"win32\""} +win32-setctime = {version = ">=1.0.0", markers = "sys_platform == \"win32\""} + +[package.extras] +dev = ["Sphinx (==7.2.5)", "colorama (==0.4.5)", "colorama (==0.4.6)", "exceptiongroup (==1.1.3)", "freezegun (==1.1.0)", "freezegun (==1.2.2)", "mypy (==v0.910)", "mypy (==v0.971)", "mypy (==v1.4.1)", "mypy (==v1.5.1)", "pre-commit (==3.4.0)", "pytest (==6.1.2)", "pytest (==7.4.0)", "pytest-cov (==2.12.1)", "pytest-cov (==4.1.0)", "pytest-mypy-plugins (==1.9.3)", "pytest-mypy-plugins (==3.0.0)", "sphinx-autobuild (==2021.3.14)", "sphinx-rtd-theme (==1.3.0)", "tox (==3.27.1)", "tox (==4.11.0)"] + [[package]] name = "lxml" version = "4.9.3" @@ -5586,35 +5642,36 @@ reference = ["Pillow", "google-re2"] [[package]] name = "onnxruntime" -version = "1.16.1" +version = "1.18.0" description = "ONNX Runtime is a runtime accelerator for Machine Learning models" optional = true python-versions = "*" files = [ - {file = "onnxruntime-1.16.1-cp310-cp310-macosx_10_15_x86_64.whl", hash = "sha256:28b2c7f444b4119950b69370801cd66067f403d19cbaf2a444735d7c269cce4a"}, - {file = "onnxruntime-1.16.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:c24e04f33e7899f6aebb03ed51e51d346c1f906b05c5569d58ac9a12d38a2f58"}, - {file = "onnxruntime-1.16.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9fa93b166f2d97063dc9f33c5118c5729a4a5dd5617296b6dbef42f9047b3e81"}, - {file = "onnxruntime-1.16.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:042dd9201b3016ee18f8f8bc4609baf11ff34ca1ff489c0a46bcd30919bf883d"}, - {file = "onnxruntime-1.16.1-cp310-cp310-win32.whl", hash = "sha256:c20aa0591f305012f1b21aad607ed96917c86ae7aede4a4dd95824b3d124ceb7"}, - {file = "onnxruntime-1.16.1-cp310-cp310-win_amd64.whl", hash = "sha256:5581873e578917bea76d6434ee7337e28195d03488dcf72d161d08e9398c6249"}, - {file = "onnxruntime-1.16.1-cp311-cp311-macosx_10_15_x86_64.whl", hash = "sha256:ef8c0c8abf5f309aa1caf35941380839dc5f7a2fa53da533be4a3f254993f120"}, - {file = "onnxruntime-1.16.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:e680380bea35a137cbc3efd67a17486e96972901192ad3026ee79c8d8fe264f7"}, - {file = "onnxruntime-1.16.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5e62cc38ce1a669013d0a596d984762dc9c67c56f60ecfeee0d5ad36da5863f6"}, - {file = "onnxruntime-1.16.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:025c7a4d57bd2e63b8a0f84ad3df53e419e3df1cc72d63184f2aae807b17c13c"}, - {file = "onnxruntime-1.16.1-cp311-cp311-win32.whl", hash = "sha256:9ad074057fa8d028df248b5668514088cb0937b6ac5954073b7fb9b2891ffc8c"}, - {file = "onnxruntime-1.16.1-cp311-cp311-win_amd64.whl", hash = "sha256:d5e43a3478bffc01f817ecf826de7b25a2ca1bca8547d70888594ab80a77ad24"}, - {file = "onnxruntime-1.16.1-cp38-cp38-macosx_10_15_x86_64.whl", hash = "sha256:3aef4d70b0930e29a8943eab248cd1565664458d3a62b2276bd11181f28fd0a3"}, - {file = "onnxruntime-1.16.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:55a7b843a57c8ca0c8ff169428137958146081d5d76f1a6dd444c4ffcd37c3c2"}, - {file = "onnxruntime-1.16.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:62c631af1941bf3b5f7d063d24c04aacce8cff0794e157c497e315e89ac5ad7b"}, - {file = "onnxruntime-1.16.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5671f296c3d5c233f601e97a10ab5a1dd8e65ba35c7b7b0c253332aba9dff330"}, - {file = "onnxruntime-1.16.1-cp38-cp38-win32.whl", hash = "sha256:eb3802305023dd05e16848d4e22b41f8147247894309c0c27122aaa08793b3d2"}, - {file = "onnxruntime-1.16.1-cp38-cp38-win_amd64.whl", hash = "sha256:fecfb07443d09d271b1487f401fbdf1ba0c829af6fd4fe8f6af25f71190e7eb9"}, - {file = "onnxruntime-1.16.1-cp39-cp39-macosx_10_15_x86_64.whl", hash = "sha256:de3e12094234db6545c67adbf801874b4eb91e9f299bda34c62967ef0050960f"}, - {file = "onnxruntime-1.16.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:ff723c2a5621b5e7103f3be84d5aae1e03a20621e72219dddceae81f65f240af"}, - {file = "onnxruntime-1.16.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:14a7fb3073aaf6b462e3d7fb433320f7700558a8892e5021780522dc4574292a"}, - {file = "onnxruntime-1.16.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:963159f1f699b0454cd72fcef3276c8a1aab9389a7b301bcd8e320fb9d9e8597"}, - {file = "onnxruntime-1.16.1-cp39-cp39-win32.whl", hash = "sha256:85771adb75190db9364b25ddec353ebf07635b83eb94b64ed014f1f6d57a3857"}, - {file = "onnxruntime-1.16.1-cp39-cp39-win_amd64.whl", hash = "sha256:d32d2b30799c1f950123c60ae8390818381fd5f88bdf3627eeca10071c155dc5"}, + {file = "onnxruntime-1.18.0-cp310-cp310-macosx_11_0_universal2.whl", hash = "sha256:5a3b7993a5ecf4a90f35542a4757e29b2d653da3efe06cdd3164b91167bbe10d"}, + {file = "onnxruntime-1.18.0-cp310-cp310-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:15b944623b2cdfe7f7945690bfb71c10a4531b51997c8320b84e7b0bb59af902"}, + {file = "onnxruntime-1.18.0-cp310-cp310-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:2e61ce5005118064b1a0ed73ebe936bc773a102f067db34108ea6c64dd62a179"}, + {file = "onnxruntime-1.18.0-cp310-cp310-win32.whl", hash = "sha256:a4fc8a2a526eb442317d280610936a9f73deece06c7d5a91e51570860802b93f"}, + {file = "onnxruntime-1.18.0-cp310-cp310-win_amd64.whl", hash = "sha256:71ed219b768cab004e5cd83e702590734f968679bf93aa488c1a7ffbe6e220c3"}, + {file = "onnxruntime-1.18.0-cp311-cp311-macosx_11_0_universal2.whl", hash = "sha256:3d24bd623872a72a7fe2f51c103e20fcca2acfa35d48f2accd6be1ec8633d960"}, + {file = "onnxruntime-1.18.0-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:f15e41ca9b307a12550bfd2ec93f88905d9fba12bab7e578f05138ad0ae10d7b"}, + {file = "onnxruntime-1.18.0-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:1f45ca2887f62a7b847d526965686b2923efa72538c89b7703c7b3fe970afd59"}, + {file = "onnxruntime-1.18.0-cp311-cp311-win32.whl", hash = "sha256:9e24d9ecc8781323d9e2eeda019b4b24babc4d624e7d53f61b1fe1a929b0511a"}, + {file = "onnxruntime-1.18.0-cp311-cp311-win_amd64.whl", hash = "sha256:f8608398976ed18aef450d83777ff6f77d0b64eced1ed07a985e1a7db8ea3771"}, + {file = "onnxruntime-1.18.0-cp312-cp312-macosx_11_0_universal2.whl", hash = "sha256:f1d79941f15fc40b1ee67738b2ca26b23e0181bf0070b5fb2984f0988734698f"}, + {file = "onnxruntime-1.18.0-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:99e8caf3a8565c853a22d323a3eebc2a81e3de7591981f085a4f74f7a60aab2d"}, + {file = "onnxruntime-1.18.0-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:498d2b8380635f5e6ebc50ec1b45f181588927280f32390fb910301d234f97b8"}, + {file = "onnxruntime-1.18.0-cp312-cp312-win32.whl", hash = "sha256:ba7cc0ce2798a386c082aaa6289ff7e9bedc3dee622eef10e74830cff200a72e"}, + {file = "onnxruntime-1.18.0-cp312-cp312-win_amd64.whl", hash = "sha256:1fa175bd43f610465d5787ae06050c81f7ce09da2bf3e914eb282cb8eab363ef"}, + {file = "onnxruntime-1.18.0-cp38-cp38-macosx_11_0_universal2.whl", hash = "sha256:0284c579c20ec8b1b472dd190290a040cc68b6caec790edb960f065d15cf164a"}, + {file = "onnxruntime-1.18.0-cp38-cp38-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:d47353d036d8c380558a5643ea5f7964d9d259d31c86865bad9162c3e916d1f6"}, + {file = "onnxruntime-1.18.0-cp38-cp38-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:885509d2b9ba4b01f08f7fa28d31ee54b6477953451c7ccf124a84625f07c803"}, + {file = "onnxruntime-1.18.0-cp38-cp38-win32.whl", hash = "sha256:8614733de3695656411d71fc2f39333170df5da6c7efd6072a59962c0bc7055c"}, + {file = "onnxruntime-1.18.0-cp38-cp38-win_amd64.whl", hash = "sha256:47af3f803752fce23ea790fd8d130a47b2b940629f03193f780818622e856e7a"}, + {file = "onnxruntime-1.18.0-cp39-cp39-macosx_11_0_universal2.whl", hash = "sha256:9153eb2b4d5bbab764d0aea17adadffcfc18d89b957ad191b1c3650b9930c59f"}, + {file = "onnxruntime-1.18.0-cp39-cp39-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:2c7fd86eca727c989bb8d9c5104f3c45f7ee45f445cc75579ebe55d6b99dfd7c"}, + {file = "onnxruntime-1.18.0-cp39-cp39-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ac67a4de9c1326c4d87bcbfb652c923039b8a2446bb28516219236bec3b494f5"}, + {file = "onnxruntime-1.18.0-cp39-cp39-win32.whl", hash = "sha256:6ffb445816d06497df7a6dd424b20e0b2c39639e01e7fe210e247b82d15a23b9"}, + {file = "onnxruntime-1.18.0-cp39-cp39-win_amd64.whl", hash = "sha256:46de6031cb6745f33f7eca9e51ab73e8c66037fb7a3b6b4560887c5b55ab5d5d"}, ] [package.dependencies] @@ -5886,8 +5943,8 @@ files = [ [package.dependencies] numpy = [ {version = ">=1.20.3", markers = "python_version < \"3.10\""}, - {version = ">=1.23.2", markers = "python_version >= \"3.11\""}, {version = ">=1.21.0", markers = "python_version >= \"3.10\" and python_version < \"3.11\""}, + {version = ">=1.23.2", markers = "python_version >= \"3.11\""}, ] python-dateutil = ">=2.8.2" pytz = ">=2020.1" @@ -7266,30 +7323,30 @@ files = [ [[package]] name = "qdrant-client" -version = "1.6.4" +version = "1.9.1" description = "Client library for the Qdrant vector search engine" optional = true -python-versions = ">=3.8,<3.13" +python-versions = ">=3.8" files = [ - {file = "qdrant_client-1.6.4-py3-none-any.whl", hash = "sha256:db4696978d6a62d78ff60f70b912383f1e467bda3053f732b01ddb5f93281b10"}, - {file = "qdrant_client-1.6.4.tar.gz", hash = "sha256:bbd65f383b6a55a9ccf4e301250fa925179340dd90cfde9b93ce4230fd68867b"}, + {file = "qdrant_client-1.9.1-py3-none-any.whl", hash = "sha256:b9b7e0e5c1a51410d8bb5106a869a51e12f92ab45a99030f27aba790553bd2c8"}, + {file = "qdrant_client-1.9.1.tar.gz", hash = "sha256:186b9c31d95aefe8f2db84b7746402d7365bd63b305550e530e31bde2002ce79"}, ] [package.dependencies] -fastembed = {version = "0.1.1", optional = true, markers = "python_version < \"3.12\" and extra == \"fastembed\""} +fastembed = {version = "0.2.6", optional = true, markers = "python_version < \"3.13\" and extra == \"fastembed\""} grpcio = ">=1.41.0" grpcio-tools = ">=1.41.0" -httpx = {version = ">=0.14.0", extras = ["http2"]} +httpx = {version = ">=0.20.0", extras = ["http2"]} numpy = [ {version = ">=1.21", markers = "python_version >= \"3.8\" and python_version < \"3.12\""}, {version = ">=1.26", markers = "python_version >= \"3.12\""}, ] portalocker = ">=2.7.0,<3.0.0" pydantic = ">=1.10.8" -urllib3 = ">=1.26.14,<2.0.0" +urllib3 = ">=1.26.14,<3" [package.extras] -fastembed = ["fastembed (==0.1.1)"] +fastembed = ["fastembed (==0.2.6)"] [[package]] name = "redshift-connector" @@ -8414,56 +8471,129 @@ twisted = ["twisted"] [[package]] name = "tokenizers" -version = "0.13.3" -description = "Fast and Customizable Tokenizers" +version = "0.15.2" +description = "" optional = true -python-versions = "*" +python-versions = ">=3.7" files = [ - {file = "tokenizers-0.13.3-cp310-cp310-macosx_10_11_x86_64.whl", hash = "sha256:f3835c5be51de8c0a092058a4d4380cb9244fb34681fd0a295fbf0a52a5fdf33"}, - {file = "tokenizers-0.13.3-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:4ef4c3e821730f2692489e926b184321e887f34fb8a6b80b8096b966ba663d07"}, - {file = "tokenizers-0.13.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c5fd1a6a25353e9aa762e2aae5a1e63883cad9f4e997c447ec39d071020459bc"}, - {file = "tokenizers-0.13.3-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ee0b1b311d65beab83d7a41c56a1e46ab732a9eed4460648e8eb0bd69fc2d059"}, - {file = "tokenizers-0.13.3-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5ef4215284df1277dadbcc5e17d4882bda19f770d02348e73523f7e7d8b8d396"}, - {file = "tokenizers-0.13.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a4d53976079cff8a033f778fb9adca2d9d69d009c02fa2d71a878b5f3963ed30"}, - {file = "tokenizers-0.13.3-cp310-cp310-win32.whl", hash = "sha256:1f0e3b4c2ea2cd13238ce43548959c118069db7579e5d40ec270ad77da5833ce"}, - {file = "tokenizers-0.13.3-cp310-cp310-win_amd64.whl", hash = "sha256:89649c00d0d7211e8186f7a75dfa1db6996f65edce4b84821817eadcc2d3c79e"}, - {file = "tokenizers-0.13.3-cp311-cp311-macosx_10_11_universal2.whl", hash = "sha256:56b726e0d2bbc9243872b0144515ba684af5b8d8cd112fb83ee1365e26ec74c8"}, - {file = "tokenizers-0.13.3-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:cc5c022ce692e1f499d745af293ab9ee6f5d92538ed2faf73f9708c89ee59ce6"}, - {file = "tokenizers-0.13.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f55c981ac44ba87c93e847c333e58c12abcbb377a0c2f2ef96e1a266e4184ff2"}, - {file = "tokenizers-0.13.3-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f247eae99800ef821a91f47c5280e9e9afaeed9980fc444208d5aa6ba69ff148"}, - {file = "tokenizers-0.13.3-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:4b3e3215d048e94f40f1c95802e45dcc37c5b05eb46280fc2ccc8cd351bff839"}, - {file = "tokenizers-0.13.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9ba2b0bf01777c9b9bc94b53764d6684554ce98551fec496f71bc5be3a03e98b"}, - {file = "tokenizers-0.13.3-cp311-cp311-win32.whl", hash = "sha256:cc78d77f597d1c458bf0ea7c2a64b6aa06941c7a99cb135b5969b0278824d808"}, - {file = "tokenizers-0.13.3-cp311-cp311-win_amd64.whl", hash = "sha256:ecf182bf59bd541a8876deccf0360f5ae60496fd50b58510048020751cf1724c"}, - {file = "tokenizers-0.13.3-cp37-cp37m-macosx_10_11_x86_64.whl", hash = "sha256:0527dc5436a1f6bf2c0327da3145687d3bcfbeab91fed8458920093de3901b44"}, - {file = "tokenizers-0.13.3-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:07cbb2c307627dc99b44b22ef05ff4473aa7c7cc1fec8f0a8b37d8a64b1a16d2"}, - {file = "tokenizers-0.13.3-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4560dbdeaae5b7ee0d4e493027e3de6d53c991b5002d7ff95083c99e11dd5ac0"}, - {file = "tokenizers-0.13.3-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:64064bd0322405c9374305ab9b4c07152a1474370327499911937fd4a76d004b"}, - {file = "tokenizers-0.13.3-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b8c6e2ab0f2e3d939ca66aa1d596602105fe33b505cd2854a4c1717f704c51de"}, - {file = "tokenizers-0.13.3-cp37-cp37m-win32.whl", hash = "sha256:6cc29d410768f960db8677221e497226e545eaaea01aa3613fa0fdf2cc96cff4"}, - {file = "tokenizers-0.13.3-cp37-cp37m-win_amd64.whl", hash = "sha256:fc2a7fdf864554a0dacf09d32e17c0caa9afe72baf9dd7ddedc61973bae352d8"}, - {file = "tokenizers-0.13.3-cp38-cp38-macosx_10_11_x86_64.whl", hash = "sha256:8791dedba834c1fc55e5f1521be325ea3dafb381964be20684b92fdac95d79b7"}, - {file = "tokenizers-0.13.3-cp38-cp38-macosx_12_0_arm64.whl", hash = "sha256:d607a6a13718aeb20507bdf2b96162ead5145bbbfa26788d6b833f98b31b26e1"}, - {file = "tokenizers-0.13.3-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3791338f809cd1bf8e4fee6b540b36822434d0c6c6bc47162448deee3f77d425"}, - {file = "tokenizers-0.13.3-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c2f35f30e39e6aab8716f07790f646bdc6e4a853816cc49a95ef2a9016bf9ce6"}, - {file = "tokenizers-0.13.3-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:310204dfed5aa797128b65d63538a9837cbdd15da2a29a77d67eefa489edda26"}, - {file = "tokenizers-0.13.3-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a0f9b92ea052305166559f38498b3b0cae159caea712646648aaa272f7160963"}, - {file = "tokenizers-0.13.3-cp38-cp38-win32.whl", hash = "sha256:9a3fa134896c3c1f0da6e762d15141fbff30d094067c8f1157b9fdca593b5806"}, - {file = "tokenizers-0.13.3-cp38-cp38-win_amd64.whl", hash = "sha256:8e7b0cdeace87fa9e760e6a605e0ae8fc14b7d72e9fc19c578116f7287bb873d"}, - {file = "tokenizers-0.13.3-cp39-cp39-macosx_10_11_x86_64.whl", hash = "sha256:00cee1e0859d55507e693a48fa4aef07060c4bb6bd93d80120e18fea9371c66d"}, - {file = "tokenizers-0.13.3-cp39-cp39-macosx_12_0_arm64.whl", hash = "sha256:a23ff602d0797cea1d0506ce69b27523b07e70f6dda982ab8cf82402de839088"}, - {file = "tokenizers-0.13.3-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:70ce07445050b537d2696022dafb115307abdffd2a5c106f029490f84501ef97"}, - {file = "tokenizers-0.13.3-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:280ffe95f50eaaf655b3a1dc7ff1d9cf4777029dbbc3e63a74e65a056594abc3"}, - {file = "tokenizers-0.13.3-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:97acfcec592f7e9de8cadcdcda50a7134423ac8455c0166b28c9ff04d227b371"}, - {file = "tokenizers-0.13.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dd7730c98a3010cd4f523465867ff95cd9d6430db46676ce79358f65ae39797b"}, - {file = "tokenizers-0.13.3-cp39-cp39-win32.whl", hash = "sha256:48625a108029cb1ddf42e17a81b5a3230ba6888a70c9dc14e81bc319e812652d"}, - {file = "tokenizers-0.13.3-cp39-cp39-win_amd64.whl", hash = "sha256:bc0a6f1ba036e482db6453571c9e3e60ecd5489980ffd95d11dc9f960483d783"}, - {file = "tokenizers-0.13.3.tar.gz", hash = "sha256:2e546dbb68b623008a5442353137fbb0123d311a6d7ba52f2667c8862a75af2e"}, -] - -[package.extras] -dev = ["black (==22.3)", "datasets", "numpy", "pytest", "requests"] -docs = ["setuptools-rust", "sphinx", "sphinx-rtd-theme"] + {file = "tokenizers-0.15.2-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:52f6130c9cbf70544287575a985bf44ae1bda2da7e8c24e97716080593638012"}, + {file = "tokenizers-0.15.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:054c1cc9c6d68f7ffa4e810b3d5131e0ba511b6e4be34157aa08ee54c2f8d9ee"}, + {file = "tokenizers-0.15.2-cp310-cp310-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:a9b9b070fdad06e347563b88c278995735292ded1132f8657084989a4c84a6d5"}, + {file = "tokenizers-0.15.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ea621a7eef4b70e1f7a4e84dd989ae3f0eeb50fc8690254eacc08acb623e82f1"}, + {file = "tokenizers-0.15.2-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:cf7fd9a5141634fa3aa8d6b7be362e6ae1b4cda60da81388fa533e0b552c98fd"}, + {file = "tokenizers-0.15.2-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:44f2a832cd0825295f7179eaf173381dc45230f9227ec4b44378322d900447c9"}, + {file = "tokenizers-0.15.2-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:8b9ec69247a23747669ec4b0ca10f8e3dfb3545d550258129bd62291aabe8605"}, + {file = "tokenizers-0.15.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:40b6a4c78da863ff26dbd5ad9a8ecc33d8a8d97b535172601cf00aee9d7ce9ce"}, + {file = "tokenizers-0.15.2-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:5ab2a4d21dcf76af60e05af8063138849eb1d6553a0d059f6534357bce8ba364"}, + {file = "tokenizers-0.15.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:a47acfac7e511f6bbfcf2d3fb8c26979c780a91e06fb5b9a43831b2c0153d024"}, + {file = "tokenizers-0.15.2-cp310-none-win32.whl", hash = "sha256:064ff87bb6acdbd693666de9a4b692add41308a2c0ec0770d6385737117215f2"}, + {file = "tokenizers-0.15.2-cp310-none-win_amd64.whl", hash = "sha256:3b919afe4df7eb6ac7cafd2bd14fb507d3f408db7a68c43117f579c984a73843"}, + {file = "tokenizers-0.15.2-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:89cd1cb93e4b12ff39bb2d626ad77e35209de9309a71e4d3d4672667b4b256e7"}, + {file = "tokenizers-0.15.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:cfed5c64e5be23d7ee0f0e98081a25c2a46b0b77ce99a4f0605b1ec43dd481fa"}, + {file = "tokenizers-0.15.2-cp311-cp311-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:a907d76dcfda37023ba203ab4ceeb21bc5683436ebefbd895a0841fd52f6f6f2"}, + {file = "tokenizers-0.15.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:20ea60479de6fc7b8ae756b4b097572372d7e4032e2521c1bbf3d90c90a99ff0"}, + {file = "tokenizers-0.15.2-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:48e2b9335be2bc0171df9281385c2ed06a15f5cf121c44094338306ab7b33f2c"}, + {file = "tokenizers-0.15.2-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:112a1dd436d2cc06e6ffdc0b06d55ac019a35a63afd26475205cb4b1bf0bfbff"}, + {file = "tokenizers-0.15.2-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:4620cca5c2817177ee8706f860364cc3a8845bc1e291aaf661fb899e5d1c45b0"}, + {file = "tokenizers-0.15.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ccd73a82751c523b3fc31ff8194702e4af4db21dc20e55b30ecc2079c5d43cb7"}, + {file = "tokenizers-0.15.2-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:107089f135b4ae7817affe6264f8c7a5c5b4fd9a90f9439ed495f54fcea56fb4"}, + {file = "tokenizers-0.15.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:0ff110ecc57b7aa4a594396525a3451ad70988e517237fe91c540997c4e50e29"}, + {file = "tokenizers-0.15.2-cp311-none-win32.whl", hash = "sha256:6d76f00f5c32da36c61f41c58346a4fa7f0a61be02f4301fd30ad59834977cc3"}, + {file = "tokenizers-0.15.2-cp311-none-win_amd64.whl", hash = "sha256:cc90102ed17271cf0a1262babe5939e0134b3890345d11a19c3145184b706055"}, + {file = "tokenizers-0.15.2-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:f86593c18d2e6248e72fb91c77d413a815153b8ea4e31f7cd443bdf28e467670"}, + {file = "tokenizers-0.15.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:0774bccc6608eca23eb9d620196687c8b2360624619623cf4ba9dc9bd53e8b51"}, + {file = "tokenizers-0.15.2-cp312-cp312-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:d0222c5b7c9b26c0b4822a82f6a7011de0a9d3060e1da176f66274b70f846b98"}, + {file = "tokenizers-0.15.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3835738be1de66624fff2f4f6f6684775da4e9c00bde053be7564cbf3545cc66"}, + {file = "tokenizers-0.15.2-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:0143e7d9dcd811855c1ce1ab9bf5d96d29bf5e528fd6c7824d0465741e8c10fd"}, + {file = "tokenizers-0.15.2-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:db35825f6d54215f6b6009a7ff3eedee0848c99a6271c870d2826fbbedf31a38"}, + {file = "tokenizers-0.15.2-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3f5e64b0389a2be47091d8cc53c87859783b837ea1a06edd9d8e04004df55a5c"}, + {file = "tokenizers-0.15.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9e0480c452217edd35eca56fafe2029fb4d368b7c0475f8dfa3c5c9c400a7456"}, + {file = "tokenizers-0.15.2-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:a33ab881c8fe70474980577e033d0bc9a27b7ab8272896e500708b212995d834"}, + {file = "tokenizers-0.15.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:a308a607ca9de2c64c1b9ba79ec9a403969715a1b8ba5f998a676826f1a7039d"}, + {file = "tokenizers-0.15.2-cp312-none-win32.whl", hash = "sha256:b8fcfa81bcb9447df582c5bc96a031e6df4da2a774b8080d4f02c0c16b42be0b"}, + {file = "tokenizers-0.15.2-cp312-none-win_amd64.whl", hash = "sha256:38d7ab43c6825abfc0b661d95f39c7f8af2449364f01d331f3b51c94dcff7221"}, + {file = "tokenizers-0.15.2-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:38bfb0204ff3246ca4d5e726e8cc8403bfc931090151e6eede54d0e0cf162ef0"}, + {file = "tokenizers-0.15.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:9c861d35e8286a53e06e9e28d030b5a05bcbf5ac9d7229e561e53c352a85b1fc"}, + {file = "tokenizers-0.15.2-cp313-cp313-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:936bf3842db5b2048eaa53dade907b1160f318e7c90c74bfab86f1e47720bdd6"}, + {file = "tokenizers-0.15.2-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:620beacc3373277700d0e27718aa8b25f7b383eb8001fba94ee00aeea1459d89"}, + {file = "tokenizers-0.15.2-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:2735ecbbf37e52db4ea970e539fd2d450d213517b77745114f92867f3fc246eb"}, + {file = "tokenizers-0.15.2-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:473c83c5e2359bb81b0b6fde870b41b2764fcdd36d997485e07e72cc3a62264a"}, + {file = "tokenizers-0.15.2-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:968fa1fb3c27398b28a4eca1cbd1e19355c4d3a6007f7398d48826bbe3a0f728"}, + {file = "tokenizers-0.15.2-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:865c60ae6eaebdde7da66191ee9b7db52e542ed8ee9d2c653b6d190a9351b980"}, + {file = "tokenizers-0.15.2-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:7c0d8b52664ab2d4a8d6686eb5effc68b78608a9008f086a122a7b2996befbab"}, + {file = "tokenizers-0.15.2-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:f33dfbdec3784093a9aebb3680d1f91336c56d86cc70ddf88708251da1fe9064"}, + {file = "tokenizers-0.15.2-cp37-cp37m-macosx_10_12_x86_64.whl", hash = "sha256:d44ba80988ff9424e33e0a49445072ac7029d8c0e1601ad25a0ca5f41ed0c1d6"}, + {file = "tokenizers-0.15.2-cp37-cp37m-macosx_11_0_arm64.whl", hash = "sha256:dce74266919b892f82b1b86025a613956ea0ea62a4843d4c4237be2c5498ed3a"}, + {file = "tokenizers-0.15.2-cp37-cp37m-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:0ef06b9707baeb98b316577acb04f4852239d856b93e9ec3a299622f6084e4be"}, + {file = "tokenizers-0.15.2-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c73e2e74bbb07910da0d37c326869f34113137b23eadad3fc00856e6b3d9930c"}, + {file = "tokenizers-0.15.2-cp37-cp37m-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:4eeb12daf02a59e29f578a865f55d87cd103ce62bd8a3a5874f8fdeaa82e336b"}, + {file = "tokenizers-0.15.2-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9ba9f6895af58487ca4f54e8a664a322f16c26bbb442effd01087eba391a719e"}, + {file = "tokenizers-0.15.2-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:ccec77aa7150e38eec6878a493bf8c263ff1fa8a62404e16c6203c64c1f16a26"}, + {file = "tokenizers-0.15.2-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f3f40604f5042ff210ba82743dda2b6aa3e55aa12df4e9f2378ee01a17e2855e"}, + {file = "tokenizers-0.15.2-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:5645938a42d78c4885086767c70923abad047163d809c16da75d6b290cb30bbe"}, + {file = "tokenizers-0.15.2-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:05a77cbfebe28a61ab5c3891f9939cc24798b63fa236d84e5f29f3a85a200c00"}, + {file = "tokenizers-0.15.2-cp37-none-win32.whl", hash = "sha256:361abdc068e8afe9c5b818769a48624687fb6aaed49636ee39bec4e95e1a215b"}, + {file = "tokenizers-0.15.2-cp37-none-win_amd64.whl", hash = "sha256:7ef789f83eb0f9baeb4d09a86cd639c0a5518528f9992f38b28e819df397eb06"}, + {file = "tokenizers-0.15.2-cp38-cp38-macosx_10_12_x86_64.whl", hash = "sha256:4fe1f74a902bee74a3b25aff180fbfbf4f8b444ab37c4d496af7afd13a784ed2"}, + {file = "tokenizers-0.15.2-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:4c4b89038a684f40a6b15d6b09f49650ac64d951ad0f2a3ea9169687bbf2a8ba"}, + {file = "tokenizers-0.15.2-cp38-cp38-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:d05a1b06f986d41aed5f2de464c003004b2df8aaf66f2b7628254bcbfb72a438"}, + {file = "tokenizers-0.15.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:508711a108684111ec8af89d3a9e9e08755247eda27d0ba5e3c50e9da1600f6d"}, + {file = "tokenizers-0.15.2-cp38-cp38-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:daa348f02d15160cb35439098ac96e3a53bacf35885072611cd9e5be7d333daa"}, + {file = "tokenizers-0.15.2-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:494fdbe5932d3416de2a85fc2470b797e6f3226c12845cadf054dd906afd0442"}, + {file = "tokenizers-0.15.2-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c2d60f5246f4da9373f75ff18d64c69cbf60c3bca597290cea01059c336d2470"}, + {file = "tokenizers-0.15.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:93268e788825f52de4c7bdcb6ebc1fcd4a5442c02e730faa9b6b08f23ead0e24"}, + {file = "tokenizers-0.15.2-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:6fc7083ab404019fc9acafe78662c192673c1e696bd598d16dc005bd663a5cf9"}, + {file = "tokenizers-0.15.2-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:41e39b41e5531d6b2122a77532dbea60e171ef87a3820b5a3888daa847df4153"}, + {file = "tokenizers-0.15.2-cp38-none-win32.whl", hash = "sha256:06cd0487b1cbfabefb2cc52fbd6b1f8d4c37799bd6c6e1641281adaa6b2504a7"}, + {file = "tokenizers-0.15.2-cp38-none-win_amd64.whl", hash = "sha256:5179c271aa5de9c71712e31cb5a79e436ecd0d7532a408fa42a8dbfa4bc23fd9"}, + {file = "tokenizers-0.15.2-cp39-cp39-macosx_10_12_x86_64.whl", hash = "sha256:82f8652a74cc107052328b87ea8b34291c0f55b96d8fb261b3880216a9f9e48e"}, + {file = "tokenizers-0.15.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:02458bee6f5f3139f1ebbb6d042b283af712c0981f5bc50edf771d6b762d5e4f"}, + {file = "tokenizers-0.15.2-cp39-cp39-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:c9a09cd26cca2e1c349f91aa665309ddb48d71636370749414fbf67bc83c5343"}, + {file = "tokenizers-0.15.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:158be8ea8554e5ed69acc1ce3fbb23a06060bd4bbb09029431ad6b9a466a7121"}, + {file = "tokenizers-0.15.2-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:1ddba9a2b0c8c81633eca0bb2e1aa5b3a15362b1277f1ae64176d0f6eba78ab1"}, + {file = "tokenizers-0.15.2-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3ef5dd1d39797044642dbe53eb2bc56435308432e9c7907728da74c69ee2adca"}, + {file = "tokenizers-0.15.2-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:454c203164e07a860dbeb3b1f4a733be52b0edbb4dd2e5bd75023ffa8b49403a"}, + {file = "tokenizers-0.15.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0cf6b7f1d4dc59af960e6ffdc4faffe6460bbfa8dce27a58bf75755ffdb2526d"}, + {file = "tokenizers-0.15.2-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:2ef09bbc16519f6c25d0c7fc0c6a33a6f62923e263c9d7cca4e58b8c61572afb"}, + {file = "tokenizers-0.15.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:c9a2ebdd2ad4ec7a68e7615086e633857c85e2f18025bd05d2a4399e6c5f7169"}, + {file = "tokenizers-0.15.2-cp39-none-win32.whl", hash = "sha256:918fbb0eab96fe08e72a8c2b5461e9cce95585d82a58688e7f01c2bd546c79d0"}, + {file = "tokenizers-0.15.2-cp39-none-win_amd64.whl", hash = "sha256:524e60da0135e106b254bd71f0659be9f89d83f006ea9093ce4d1fab498c6d0d"}, + {file = "tokenizers-0.15.2-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:6a9b648a58281c4672212fab04e60648fde574877d0139cd4b4f93fe28ca8944"}, + {file = "tokenizers-0.15.2-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:7c7d18b733be6bbca8a55084027f7be428c947ddf871c500ee603e375013ffba"}, + {file = "tokenizers-0.15.2-pp310-pypy310_pp73-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:13ca3611de8d9ddfbc4dc39ef54ab1d2d4aaa114ac8727dfdc6a6ec4be017378"}, + {file = "tokenizers-0.15.2-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:237d1bf3361cf2e6463e6c140628e6406766e8b27274f5fcc62c747ae3c6f094"}, + {file = "tokenizers-0.15.2-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:67a0fe1e49e60c664915e9fb6b0cb19bac082ab1f309188230e4b2920230edb3"}, + {file = "tokenizers-0.15.2-pp310-pypy310_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:4e022fe65e99230b8fd89ebdfea138c24421f91c1a4f4781a8f5016fd5cdfb4d"}, + {file = "tokenizers-0.15.2-pp310-pypy310_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:d857be2df69763362ac699f8b251a8cd3fac9d21893de129bc788f8baaef2693"}, + {file = "tokenizers-0.15.2-pp37-pypy37_pp73-macosx_10_12_x86_64.whl", hash = "sha256:708bb3e4283177236309e698da5fcd0879ce8fd37457d7c266d16b550bcbbd18"}, + {file = "tokenizers-0.15.2-pp37-pypy37_pp73-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:64c35e09e9899b72a76e762f9854e8750213f67567787d45f37ce06daf57ca78"}, + {file = "tokenizers-0.15.2-pp37-pypy37_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c1257f4394be0d3b00de8c9e840ca5601d0a4a8438361ce9c2b05c7d25f6057b"}, + {file = "tokenizers-0.15.2-pp37-pypy37_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:02272fe48280e0293a04245ca5d919b2c94a48b408b55e858feae9618138aeda"}, + {file = "tokenizers-0.15.2-pp37-pypy37_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:dc3ad9ebc76eabe8b1d7c04d38be884b8f9d60c0cdc09b0aa4e3bcf746de0388"}, + {file = "tokenizers-0.15.2-pp37-pypy37_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:32e16bdeffa7c4f46bf2152172ca511808b952701d13e7c18833c0b73cb5c23f"}, + {file = "tokenizers-0.15.2-pp38-pypy38_pp73-macosx_10_12_x86_64.whl", hash = "sha256:fb16ba563d59003028b678d2361a27f7e4ae0ab29c7a80690efa20d829c81fdb"}, + {file = "tokenizers-0.15.2-pp38-pypy38_pp73-macosx_11_0_arm64.whl", hash = "sha256:2277c36d2d6cdb7876c274547921a42425b6810d38354327dd65a8009acf870c"}, + {file = "tokenizers-0.15.2-pp38-pypy38_pp73-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:1cf75d32e8d250781940d07f7eece253f2fe9ecdb1dc7ba6e3833fa17b82fcbc"}, + {file = "tokenizers-0.15.2-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f1b3b31884dc8e9b21508bb76da80ebf7308fdb947a17affce815665d5c4d028"}, + {file = "tokenizers-0.15.2-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b10122d8d8e30afb43bb1fe21a3619f62c3e2574bff2699cf8af8b0b6c5dc4a3"}, + {file = "tokenizers-0.15.2-pp38-pypy38_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:d88b96ff0fe8e91f6ef01ba50b0d71db5017fa4e3b1d99681cec89a85faf7bf7"}, + {file = "tokenizers-0.15.2-pp38-pypy38_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:37aaec5a52e959892870a7c47cef80c53797c0db9149d458460f4f31e2fb250e"}, + {file = "tokenizers-0.15.2-pp39-pypy39_pp73-macosx_10_12_x86_64.whl", hash = "sha256:e2ea752f2b0fe96eb6e2f3adbbf4d72aaa1272079b0dfa1145507bd6a5d537e6"}, + {file = "tokenizers-0.15.2-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:4b19a808d8799fda23504a5cd31d2f58e6f52f140380082b352f877017d6342b"}, + {file = "tokenizers-0.15.2-pp39-pypy39_pp73-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:64c86e5e068ac8b19204419ed8ca90f9d25db20578f5881e337d203b314f4104"}, + {file = "tokenizers-0.15.2-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:de19c4dc503c612847edf833c82e9f73cd79926a384af9d801dcf93f110cea4e"}, + {file = "tokenizers-0.15.2-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ea09acd2fe3324174063d61ad620dec3bcf042b495515f27f638270a7d466e8b"}, + {file = "tokenizers-0.15.2-pp39-pypy39_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:cf27fd43472e07b57cf420eee1e814549203d56de00b5af8659cb99885472f1f"}, + {file = "tokenizers-0.15.2-pp39-pypy39_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:7ca22bd897537a0080521445d91a58886c8c04084a6a19e6c78c586e0cfa92a5"}, + {file = "tokenizers-0.15.2.tar.gz", hash = "sha256:e6e9c6e019dd5484be5beafc775ae6c925f4c69a3487040ed09b45e13df2cb91"}, +] + +[package.dependencies] +huggingface_hub = ">=0.16.4,<1.0" + +[package.extras] +dev = ["tokenizers[testing]"] +docs = ["setuptools_rust", "sphinx", "sphinx_rtd_theme"] testing = ["black (==22.3)", "datasets", "numpy", "pytest", "requests"] [[package]] @@ -8977,6 +9107,20 @@ files = [ {file = "win_precise_time-1.4.2-cp39-cp39-win_amd64.whl", hash = "sha256:3f510fa92d9c39ea533c983e1d62c7bc66fdf0a3e3c3bdda48d4ebb634ff7034"}, ] +[[package]] +name = "win32-setctime" +version = "1.1.0" +description = "A small Python utility to set file creation time on Windows" +optional = true +python-versions = ">=3.5" +files = [ + {file = "win32_setctime-1.1.0-py3-none-any.whl", hash = "sha256:231db239e959c2fe7eb1d7dc129f11172354f98361c4fa2d6d2d7e278baa8aad"}, + {file = "win32_setctime-1.1.0.tar.gz", hash = "sha256:15cf5750465118d6929ae4de4eb46e8edae9a5634350c01ba582df868e932cb2"}, +] + +[package.extras] +dev = ["black (>=19.3b0)", "pytest (>=4.6.2)"] + [[package]] name = "wrapt" version = "1.15.0" @@ -9282,4 +9426,4 @@ weaviate = ["weaviate-client"] [metadata] lock-version = "2.0" python-versions = ">=3.8.1,<3.13" -content-hash = "9644e603fdf7b7ca6d177247950370b86ba1c84849deb7cfd83510086cb2e193" +content-hash = "4ca3a8cc02b7064f5650cc7f77d4e374ab057da8b40687a4c7c8df19c705b7a9" diff --git a/pyproject.toml b/pyproject.toml index 36ee683de9..e273511d8b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -73,9 +73,9 @@ pipdeptree = {version = ">=2.9.0,<2.10", optional = true} pyathena = {version = ">=2.9.6", optional = true} weaviate-client = {version = ">=3.22", optional = true} adlfs = {version = ">=2022.4.0", optional = true} -pyodbc = {version = "^4.0.39", optional = true} -qdrant-client = {version = "^1.6.4", optional = true, extras = ["fastembed"]} -databricks-sql-connector = {version = ">=3", optional = true} +pyodbc = {version = ">=4.0.39", optional = true} +qdrant-client = {version = ">=1.8", optional = true, extras = ["fastembed"]} +databricks-sql-connector = {version = ">=2.9.3", optional = true} clickhouse-driver = { version = ">=0.2.7", optional = true } clickhouse-connect = { version = ">=0.7.7", optional = true } deltalake = { version = ">=0.17.4", optional = true } diff --git a/tests/load/qdrant/test_pipeline.py b/tests/load/qdrant/test_pipeline.py index d50b50282a..b8101053fc 100644 --- a/tests/load/qdrant/test_pipeline.py +++ b/tests/load/qdrant/test_pipeline.py @@ -68,6 +68,8 @@ def some_data(): assert schema state = client.get_stored_state("test_pipeline_append") assert state + state = client.get_stored_state("unknown_pipeline") + assert state is None def test_pipeline_append() -> None: diff --git a/tests/load/qdrant/utils.py b/tests/load/qdrant/utils.py index 74d5db9715..3b12d15f86 100644 --- a/tests/load/qdrant/utils.py +++ b/tests/load/qdrant/utils.py @@ -20,16 +20,16 @@ def assert_collection( expected_items_count: int = None, items: List[Any] = None, ) -> None: - client: QdrantClient = pipeline.destination_client() # type: ignore[assignment] + client: QdrantClient + with pipeline.destination_client() as client: # type: ignore[assignment] + # Check if collection exists + exists = client._collection_exists(collection_name) + assert exists - # Check if collection exists - exists = client._collection_exists(collection_name) - assert exists - - qualified_collection_name = client._make_qualified_collection_name(collection_name) - point_records, offset = client.db_client.scroll( - qualified_collection_name, with_payload=True, limit=50 - ) + qualified_collection_name = client._make_qualified_collection_name(collection_name) + point_records, offset = client.db_client.scroll( + qualified_collection_name, with_payload=True, limit=50 + ) if expected_items_count is not None: assert expected_items_count == len(point_records) @@ -55,10 +55,11 @@ def has_collections(client): if Container()[PipelineContext].is_active(): # take existing pipeline p = dlt.pipeline() - client: QdrantClient = p.destination_client() # type: ignore[assignment] + client: QdrantClient - if has_collections(client): - client.drop_storage() + with p.destination_client() as client: # type: ignore[assignment] + if has_collections(client): + client.drop_storage() p._wipe_working_folder() # deactivate context From 71e418bb3fdbaa1a06f212e7fda4e7e9777a4e22 Mon Sep 17 00:00:00 2001 From: Marcin Rudolf Date: Thu, 13 Jun 2024 00:17:11 +0200 Subject: [PATCH 055/105] makes weaviate running --- dlt/common/normalizers/naming/sql_cs_v1.py | 20 ++++ .../impl/weaviate/weaviate_client.py | 17 +-- dlt/destinations/utils.py | 11 +- .../dlt-ecosystem/destinations/weaviate.md | 2 +- .../common/cases/normalizers}/sql_upper.py | 0 .../schema/test_normalize_identifiers.py | 38 ++----- tests/load/pipeline/test_duckdb.py | 6 +- tests/load/pipeline/test_restore_state.py | 39 +++++-- tests/load/weaviate/test_weaviate_client.py | 10 +- tests/load/weaviate/utils.py | 104 +++++++++--------- 10 files changed, 146 insertions(+), 101 deletions(-) create mode 100644 dlt/common/normalizers/naming/sql_cs_v1.py rename {dlt/common/normalizers/naming => tests/common/cases/normalizers}/sql_upper.py (100%) diff --git a/dlt/common/normalizers/naming/sql_cs_v1.py b/dlt/common/normalizers/naming/sql_cs_v1.py new file mode 100644 index 0000000000..f83e4259a5 --- /dev/null +++ b/dlt/common/normalizers/naming/sql_cs_v1.py @@ -0,0 +1,20 @@ +from typing import Any, Sequence + +from dlt.common.normalizers.naming.naming import NamingConvention as BaseNamingConvention + + +class NamingConvention(BaseNamingConvention): + PATH_SEPARATOR = "__" + + _CLEANUP_TABLE = str.maketrans(".\n\r'\"▶", "______") + + def normalize_identifier(self, identifier: str) -> str: + identifier = super().normalize_identifier(identifier) + norm_identifier = identifier.translate(self._CLEANUP_TABLE) + return self.shorten_identifier(norm_identifier, identifier, self.max_length) + + def make_path(self, *identifiers: Any) -> str: + return self.PATH_SEPARATOR.join(filter(lambda x: x.strip(), identifiers)) + + def break_path(self, path: str) -> Sequence[str]: + return [ident for ident in path.split(self.PATH_SEPARATOR) if ident.strip()] diff --git a/dlt/destinations/impl/weaviate/weaviate_client.py b/dlt/destinations/impl/weaviate/weaviate_client.py index 64c9de9607..8d51c62af3 100644 --- a/dlt/destinations/impl/weaviate/weaviate_client.py +++ b/dlt/destinations/impl/weaviate/weaviate_client.py @@ -35,12 +35,10 @@ get_columns_names_with_prop, loads_table, normalize_table_identifiers, - pipeline_state_table, version_table, ) from dlt.common.destination import DestinationCapabilitiesContext from dlt.common.destination.reference import TLoadJobState, LoadJob, JobClientBase, WithStateSync -from dlt.common.data_types import TDataType from dlt.common.storages import FileStorage from dlt.destinations.impl.weaviate.weaviate_adapter import VECTORIZE_HINT, TOKENIZATION_HINT @@ -49,6 +47,7 @@ from dlt.destinations.impl.weaviate.configuration import WeaviateClientConfiguration from dlt.destinations.impl.weaviate.exceptions import PropertyNameConflict, WeaviateGrpcError from dlt.destinations.type_mapping import TypeMapper +from dlt.destinations.utils import get_pipeline_state_query_columns NON_VECTORIZED_CLASS = { @@ -251,11 +250,13 @@ def __init__( self.version_collection_properties = list(version_table_["columns"].keys()) loads_table_ = normalize_table_identifiers(loads_table(), schema.naming) self.loads_collection_properties = list(loads_table_["columns"].keys()) - state_table_ = normalize_table_identifiers(pipeline_state_table(), schema.naming) + state_table_ = normalize_table_identifiers( + get_pipeline_state_query_columns(), schema.naming + ) self.pipeline_state_properties = list(state_table_["columns"].keys()) self.config: WeaviateClientConfiguration = config - self.db_client = self.create_db_client(config) + self.db_client: weaviate.Client = None self._vectorizer_config = { "vectorizer": config.vectorizer, @@ -529,7 +530,7 @@ def get_stored_state(self, pipeline_name: str) -> Optional[StateInfo]: if len(state_records) == 0: return None for state in state_records: - load_id = state["_dlt_load_id"] + load_id = state[p_dlt_load_id] load_records = self.get_records( self.schema.loads_table_name, where={ @@ -543,7 +544,6 @@ def get_stored_state(self, pipeline_name: str) -> Optional[StateInfo]: # if there is a load for this state which was successful, return the state if len(load_records): state["dlt_load_id"] = state.pop(p_dlt_load_id) - state.pop("version_hash") return StateInfo(**state) def get_stored_schema(self) -> Optional[StorageSchemaInfo]: @@ -582,7 +582,6 @@ def get_stored_schema_by_hash(self, schema_hash: str) -> Optional[StorageSchemaI return None @wrap_weaviate_error - # @wrap_grpc_error def get_records( self, table_name: str, @@ -697,6 +696,7 @@ def complete_load(self, load_id: str) -> None: self.create_object(properties, self.schema.loads_table_name) def __enter__(self) -> "WeaviateClient": + self.db_client = self.create_db_client(self.config) return self def __exit__( @@ -705,7 +705,8 @@ def __exit__( exc_val: BaseException, exc_tb: TracebackType, ) -> None: - pass + if self.db_client: + self.db_client = None def _update_schema_in_storage(self, schema: Schema) -> None: schema_str = json.dumps(schema.to_dict()) diff --git a/dlt/destinations/utils.py b/dlt/destinations/utils.py index e93feb58de..d24ad7c5a7 100644 --- a/dlt/destinations/utils.py +++ b/dlt/destinations/utils.py @@ -4,11 +4,12 @@ from dlt.common import logger from dlt.common.schema import Schema from dlt.common.schema.exceptions import SchemaCorruptedException -from dlt.common.schema.typing import MERGE_STRATEGIES +from dlt.common.schema.typing import MERGE_STRATEGIES, TTableSchema from dlt.common.schema.utils import ( get_columns_names_with_prop, get_first_column_name_with_prop, has_column_with_prop, + pipeline_state_table, ) from typing import Any, cast, Tuple, Dict, Type @@ -51,6 +52,14 @@ def parse_db_data_type_str_with_precision(db_type: str) -> Tuple[str, Optional[i return db_type, None, None +def get_pipeline_state_query_columns() -> TTableSchema: + """We get definition of pipeline state table without columns we do not need for the query""" + state_table = pipeline_state_table() + # we do not need version_hash to be backward compatible as long as we can + state_table["columns"].pop("version_hash") + return state_table + + def verify_sql_job_client_schema(schema: Schema, warnings: bool = True) -> List[Exception]: log = logger.warning if warnings else logger.info # collect all exceptions to show all problems in the schema diff --git a/docs/website/docs/dlt-ecosystem/destinations/weaviate.md b/docs/website/docs/dlt-ecosystem/destinations/weaviate.md index 11d1276ceb..c6597fadce 100644 --- a/docs/website/docs/dlt-ecosystem/destinations/weaviate.md +++ b/docs/website/docs/dlt-ecosystem/destinations/weaviate.md @@ -252,7 +252,7 @@ it will be normalized to: so your best course of action is to clean up the data yourself before loading and use the default naming convention. Nevertheless, you can configure the alternative in `config.toml`: ```toml [schema] -naming="dlt.destinations.weaviate.impl.ci_naming" +naming="dlt.destinations.impl.weaviate.ci_naming" ``` ## Additional destination options diff --git a/dlt/common/normalizers/naming/sql_upper.py b/tests/common/cases/normalizers/sql_upper.py similarity index 100% rename from dlt/common/normalizers/naming/sql_upper.py rename to tests/common/cases/normalizers/sql_upper.py diff --git a/tests/common/schema/test_normalize_identifiers.py b/tests/common/schema/test_normalize_identifiers.py index b84dbc9f1a..2b7599e65e 100644 --- a/tests/common/schema/test_normalize_identifiers.py +++ b/tests/common/schema/test_normalize_identifiers.py @@ -1,36 +1,22 @@ from copy import deepcopy import os -from typing import Callable, List, Sequence, cast +from typing import Callable import pytest -from dlt.common import pendulum, json +from dlt.common import json from dlt.common.configuration import resolve_configuration from dlt.common.configuration.container import Container from dlt.common.normalizers.naming.naming import NamingConvention -from dlt.common.schema.migrations import migrate_schema from dlt.common.storages import SchemaStorageConfiguration from dlt.common.destination.capabilities import DestinationCapabilitiesContext -from dlt.common.exceptions import DictValidationException -from dlt.common.normalizers.naming import snake_case, direct, sql_upper -from dlt.common.typing import DictStrAny, StrAny -from dlt.common.utils import uniq_id -from dlt.common.schema import TColumnSchema, Schema, TStoredSchema, utils, TColumnHint -from dlt.common.schema.exceptions import ( - InvalidSchemaName, - ParentTableNotFoundException, - SchemaEngineNoUpgradePathException, - TableIdentifiersFrozen, -) -from dlt.common.schema.typing import ( - LOADS_TABLE_NAME, - SIMPLE_REGEX_PREFIX, - VERSION_TABLE_NAME, - TColumnName, - TSimpleRegex, - COLUMN_HINTS, -) +from dlt.common.normalizers.naming import snake_case, direct +from dlt.common.schema import TColumnSchema, Schema, TStoredSchema, utils +from dlt.common.schema.exceptions import TableIdentifiersFrozen +from dlt.common.schema.typing import SIMPLE_REGEX_PREFIX from dlt.common.storages import SchemaStorage -from tests.common.utils import load_json_case, load_yml_case, COMMON_TEST_CASES_PATH + +from tests.common.cases.normalizers import sql_upper +from tests.common.utils import load_json_case, load_yml_case @pytest.fixture @@ -178,7 +164,7 @@ def test_update_normalizers() -> None: # save default hints in original form default_hints = schema._settings["default_hints"] - os.environ["SCHEMA__NAMING"] = "sql_upper" + os.environ["SCHEMA__NAMING"] = "tests.common.cases.normalizers.sql_upper" schema.update_normalizers() assert isinstance(schema.naming, sql_upper.NamingConvention) # print(schema.to_pretty_yaml()) @@ -188,7 +174,7 @@ def test_update_normalizers() -> None: assert schema.tables["ISSUES"]["resource"] == "issues" # make sure normalizer config is replaced - assert schema._normalizers_config["names"] == "sql_upper" + assert schema._normalizers_config["names"] == "tests.common.cases.normalizers.sql_upper" assert "allow_identifier_change_on_table_with_data" not in schema._normalizers_config # regexes are uppercased @@ -273,7 +259,7 @@ def test_raise_on_change_identifier_table_with_data() -> None: # mark issues table to seen data and change naming to sql upper issues_table = schema.tables["issues"] issues_table["x-normalizer"] = {"seen-data": True} - os.environ["SCHEMA__NAMING"] = "sql_upper" + os.environ["SCHEMA__NAMING"] = "tests.common.cases.normalizers.sql_upper" with pytest.raises(TableIdentifiersFrozen) as fr_ex: schema.update_normalizers() assert fr_ex.value.table_name == "issues" diff --git a/tests/load/pipeline/test_duckdb.py b/tests/load/pipeline/test_duckdb.py index 3f9821cee0..a32ec82aa4 100644 --- a/tests/load/pipeline/test_duckdb.py +++ b/tests/load/pipeline/test_duckdb.py @@ -1,6 +1,7 @@ import pytest import os +from dlt.common.schema.exceptions import SchemaIdentifierNormalizationClash from dlt.common.time import ensure_pendulum_datetime from dlt.destinations.exceptions import DatabaseTerminalException from dlt.pipeline.exceptions import PipelineStepFailed @@ -54,7 +55,10 @@ def test_duck_case_names(destination_config: DestinationTestConfiguration) -> No table_name="🦚peacocks🦚", loader_file_format=destination_config.file_format, ) - assert isinstance(pip_ex.value.__context__, DatabaseTerminalException) + assert isinstance(pip_ex.value.__context__, SchemaIdentifierNormalizationClash) + assert pip_ex.value.__context__.conflict_identifier_name == "🦚Peacocks🦚" + assert pip_ex.value.__context__.identifier_name == "🦚peacocks🦚" + assert pip_ex.value.__context__.identifier_type == "table" # show tables and columns with pipeline.sql_client() as client: diff --git a/tests/load/pipeline/test_restore_state.py b/tests/load/pipeline/test_restore_state.py index 6b04285c94..14ae7fa814 100644 --- a/tests/load/pipeline/test_restore_state.py +++ b/tests/load/pipeline/test_restore_state.py @@ -77,7 +77,7 @@ def test_restore_state_utils(destination_config: DestinationTestConfiguration) - initial_state["_local"]["_last_extracted_at"] = pendulum.now() initial_state["_local"]["_last_extracted_hash"] = initial_state["_version_hash"] # add _dlt_id and _dlt_load_id - resource, _ = state_resource(initial_state) + resource, _ = state_resource(initial_state, "not_used_load_id") resource.apply_hints( columns={ "_dlt_id": {"name": "_dlt_id", "data_type": "text", "nullable": False}, @@ -195,15 +195,19 @@ def test_silently_skip_on_invalid_credentials( ) @pytest.mark.parametrize("use_single_dataset", [True, False]) @pytest.mark.parametrize( - "naming_convention", ["tests.common.cases.normalizers.title_case", "snake_case"] + "naming_convention", + [ + "tests.common.cases.normalizers.title_case", + "snake_case", + "tests.common.cases.normalizers.sql_upper", + ], ) def test_get_schemas_from_destination( destination_config: DestinationTestConfiguration, use_single_dataset: bool, naming_convention: str, ) -> None: - # use specific naming convention - os.environ["SCHEMA__NAMING"] = naming_convention + set_naming_env(destination_config.destination, naming_convention) pipeline_name = "pipe_" + uniq_id() dataset_name = "state_test_" + uniq_id() @@ -288,13 +292,17 @@ def _make_dn_name(schema_name: str) -> str: ids=lambda x: x.name, ) @pytest.mark.parametrize( - "naming_convention", ["tests.common.cases.normalizers.title_case", "snake_case", "sql_upper"] + "naming_convention", + [ + "tests.common.cases.normalizers.title_case", + "snake_case", + "tests.common.cases.normalizers.sql_upper", + ], ) def test_restore_state_pipeline( destination_config: DestinationTestConfiguration, naming_convention: str ) -> None: - # use specific naming convention - os.environ["SCHEMA__NAMING"] = naming_convention + set_naming_env(destination_config.destination, naming_convention) # enable restoring from destination os.environ["RESTORE_FROM_DESTINATION"] = "True" pipeline_name = "pipe_" + uniq_id() @@ -471,6 +479,9 @@ def test_restore_schemas_while_import_schemas_exist( # make sure schema got imported schema = p.schemas["ethereum"] assert "blocks" in schema.tables + # allow to modify tables even if naming convention is changed. some of the tables in ethereum schema + # have processing hints that lock the table schema. so when weaviate changes naming convention we have an exception + os.environ["SCHEMA__ALLOW_IDENTIFIER_CHANGE_ON_TABLE_WITH_DATA"] = "true" # extract some additional data to upgrade schema in the pipeline p.run( @@ -516,7 +527,7 @@ def test_restore_schemas_while_import_schemas_exist( assert normalized_annotations in schema.tables # check if attached to import schema - assert schema._imported_version_hash == IMPORTED_VERSION_HASH_ETH_V9 + assert schema._imported_version_hash == IMPORTED_VERSION_HASH_ETH_V9() # extract some data with restored pipeline p.run( ["C", "D", "E"], table_name="blacklist", loader_file_format=destination_config.file_format @@ -729,3 +740,15 @@ def prepare_import_folder(p: Pipeline) -> None: common_yml_case_path("schemas/eth/ethereum_schema_v5"), os.path.join(p._schema_storage.config.import_schema_path, "ethereum.schema.yaml"), ) + + +def set_naming_env(destination: str, naming_convention: str) -> None: + # snake case is for default convention so do not set it + if naming_convention != "snake_case": + # path convention to test weaviate ci_naming + if destination == "weaviate": + if naming_convention.endswith("sql_upper"): + pytest.skip(f"{naming_convention} not supported on weaviate") + else: + naming_convention = "dlt.destinations.impl.weaviate.ci_naming" + os.environ["SCHEMA__NAMING"] = naming_convention diff --git a/tests/load/weaviate/test_weaviate_client.py b/tests/load/weaviate/test_weaviate_client.py index 730c2675f3..17c1a9828c 100644 --- a/tests/load/weaviate/test_weaviate_client.py +++ b/tests/load/weaviate/test_weaviate_client.py @@ -59,11 +59,11 @@ def make_client(naming_convention: str) -> Iterator[WeaviateClient]: "test_schema", {"names": f"dlt.destinations.impl.weaviate.{naming_convention}", "json": None}, ) - _client = get_client_instance(schema) - try: - yield _client - finally: - _client.drop_storage() + with get_client_instance(schema) as _client: + try: + yield _client + finally: + _client.drop_storage() @pytest.fixture diff --git a/tests/load/weaviate/utils.py b/tests/load/weaviate/utils.py index 1b2a74fcb8..650666c8ab 100644 --- a/tests/load/weaviate/utils.py +++ b/tests/load/weaviate/utils.py @@ -22,53 +22,56 @@ def assert_class( expected_items_count: int = None, items: List[Any] = None, ) -> None: - client: WeaviateClient = pipeline.destination_client() # type: ignore[assignment] - vectorizer_name: str = client._vectorizer_config["vectorizer"] # type: ignore[assignment] - - # Check if class exists - schema = client.get_class_schema(class_name) - assert schema is not None - - columns = pipeline.default_schema.get_table_columns(class_name) - - properties = {prop["name"]: prop for prop in schema["properties"]} - assert set(properties.keys()) == set(columns.keys()) - - # make sure expected columns are vectorized - for column_name, column in columns.items(): - prop = properties[column_name] - assert prop["moduleConfig"][vectorizer_name]["skip"] == ( - not column.get(VECTORIZE_HINT, False) - ) - # tokenization - if TOKENIZATION_HINT in column: - assert prop["tokenization"] == column[TOKENIZATION_HINT] # type: ignore[literal-required] - - # if there's a single vectorize hint, class must have vectorizer enabled - if get_columns_names_with_prop(pipeline.default_schema.get_table(class_name), VECTORIZE_HINT): - assert schema["vectorizer"] == vectorizer_name - else: - assert schema["vectorizer"] == "none" - - # response = db_client.query.get(class_name, list(properties.keys())).do() - response = client.query_class(class_name, list(properties.keys())).do() - objects = response["data"]["Get"][client.make_qualified_class_name(class_name)] - - if expected_items_count is not None: - assert expected_items_count == len(objects) - - if items is None: - return - - # TODO: Remove this once we have a better way comparing the data - drop_keys = ["_dlt_id", "_dlt_load_id"] - objects_without_dlt_keys = [ - {k: v for k, v in obj.items() if k not in drop_keys} for obj in objects - ] - - # pytest compares content wise but ignores order of elements of dict - # assert sorted(objects_without_dlt_keys, key=lambda d: d['doc_id']) == sorted(data, key=lambda d: d['doc_id']) - assert_unordered_list_equal(objects_without_dlt_keys, items) + client: WeaviateClient + with pipeline.destination_client() as client: # type: ignore[assignment] + vectorizer_name: str = client._vectorizer_config["vectorizer"] # type: ignore[assignment] + + # Check if class exists + schema = client.get_class_schema(class_name) + assert schema is not None + + columns = pipeline.default_schema.get_table_columns(class_name) + + properties = {prop["name"]: prop for prop in schema["properties"]} + assert set(properties.keys()) == set(columns.keys()) + + # make sure expected columns are vectorized + for column_name, column in columns.items(): + prop = properties[column_name] + assert prop["moduleConfig"][vectorizer_name]["skip"] == ( + not column.get(VECTORIZE_HINT, False) + ) + # tokenization + if TOKENIZATION_HINT in column: + assert prop["tokenization"] == column[TOKENIZATION_HINT] # type: ignore[literal-required] + + # if there's a single vectorize hint, class must have vectorizer enabled + if get_columns_names_with_prop( + pipeline.default_schema.get_table(class_name), VECTORIZE_HINT + ): + assert schema["vectorizer"] == vectorizer_name + else: + assert schema["vectorizer"] == "none" + + # response = db_client.query.get(class_name, list(properties.keys())).do() + response = client.query_class(class_name, list(properties.keys())).do() + objects = response["data"]["Get"][client.make_qualified_class_name(class_name)] + + if expected_items_count is not None: + assert expected_items_count == len(objects) + + if items is None: + return + + # TODO: Remove this once we have a better way comparing the data + drop_keys = ["_dlt_id", "_dlt_load_id"] + objects_without_dlt_keys = [ + {k: v for k, v in obj.items() if k not in drop_keys} for obj in objects + ] + + # pytest compares content wise but ignores order of elements of dict + # assert sorted(objects_without_dlt_keys, key=lambda d: d['doc_id']) == sorted(data, key=lambda d: d['doc_id']) + assert_unordered_list_equal(objects_without_dlt_keys, items) def delete_classes(p, class_list): @@ -87,10 +90,9 @@ def schema_has_classes(client): if Container()[PipelineContext].is_active(): # take existing pipeline p = dlt.pipeline() - client = p.destination_client() - - if schema_has_classes(client): - client.drop_storage() + with p.destination_client() as client: + if schema_has_classes(client): + client.drop_storage() p._wipe_working_folder() # deactivate context From ce414e19d8ea00cb558e15bf1a8a0c3d877d9859 Mon Sep 17 00:00:00 2001 From: Marcin Rudolf Date: Thu, 13 Jun 2024 23:38:58 +0200 Subject: [PATCH 056/105] uses schemata to find databases on athena --- dlt/destinations/impl/athena/athena.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/dlt/destinations/impl/athena/athena.py b/dlt/destinations/impl/athena/athena.py index 1f11a27521..8d0ffb1d0c 100644 --- a/dlt/destinations/impl/athena/athena.py +++ b/dlt/destinations/impl/athena/athena.py @@ -362,12 +362,6 @@ def execute_query(self, query: AnyStr, *args: Any, **kwargs: Any) -> Iterator[DB yield DBApiCursorImpl(cursor) # type: ignore - def has_dataset(self) -> bool: - # PRESTO escaping for queries - query = f"""SHOW DATABASES LIKE {self.fully_qualified_dataset_name()};""" - rows = self.execute_sql(query) - return len(rows) > 0 - class AthenaClient(SqlJobClientWithStaging, SupportsStagingDestination): def __init__( From bde61a9fcaa58c96270c49a91e61be743043d4ae Mon Sep 17 00:00:00 2001 From: Marcin Rudolf Date: Thu, 13 Jun 2024 23:39:57 +0200 Subject: [PATCH 057/105] uses api get_table for hidden dataset on bigquery to reflect schemas, support case insensitive datasets --- dlt/destinations/impl/bigquery/__init__.py | 6 ++-- dlt/destinations/impl/bigquery/bigquery.py | 34 ++++++++++++++++++- .../impl/bigquery/configuration.py | 1 + dlt/destinations/impl/bigquery/factory.py | 25 ++++++++++++++ dlt/destinations/impl/bigquery/sql_client.py | 13 ++++++- tests/load/test_job_client.py | 23 ++++++++----- 6 files changed, 89 insertions(+), 13 deletions(-) diff --git a/dlt/destinations/impl/bigquery/__init__.py b/dlt/destinations/impl/bigquery/__init__.py index 7a4c2bb637..120d487757 100644 --- a/dlt/destinations/impl/bigquery/__init__.py +++ b/dlt/destinations/impl/bigquery/__init__.py @@ -9,11 +9,11 @@ def capabilities() -> DestinationCapabilitiesContext: caps.supported_loader_file_formats = ["jsonl", "parquet"] caps.preferred_staging_file_format = "parquet" caps.supported_staging_file_formats = ["parquet", "jsonl"] + # BigQuery is by default case sensitive but that cannot be turned off for a dataset + # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#case_sensitivity caps.escape_identifier = escape_hive_identifier caps.escape_literal = None - caps.has_case_sensitive_identifiers = ( - True # there are case insensitive identifiers but dlt does not use them - ) + caps.has_case_sensitive_identifiers = True caps.casefold_identifier = str # BQ limit is 4GB but leave a large headroom since buffered writer does not preemptively check size caps.recommended_file_size = int(1024 * 1024 * 1024) diff --git a/dlt/destinations/impl/bigquery/bigquery.py b/dlt/destinations/impl/bigquery/bigquery.py index 3cdfc793a6..8d870487f5 100644 --- a/dlt/destinations/impl/bigquery/bigquery.py +++ b/dlt/destinations/impl/bigquery/bigquery.py @@ -1,10 +1,11 @@ import functools import os from pathlib import Path -from typing import Any, Dict, List, Optional, Sequence, Tuple, cast +from typing import Any, Dict, Iterable, List, Optional, Sequence, Tuple, cast import google.cloud.bigquery as bigquery # noqa: I250 from google.api_core import exceptions as api_core_exceptions +from google.cloud import exceptions as gcp_exceptions from google.api_core import retry from google.cloud.bigquery.retry import _RETRYABLE_REASONS @@ -370,6 +371,37 @@ def prepare_load_table( ) return table + def get_storage_tables( + self, table_names: Iterable[str] + ) -> Iterable[Tuple[str, TTableSchemaColumns]]: + print(self.sql_client.fully_qualified_dataset_name()) + """Gets table schemas from BigQuery using INFORMATION_SCHEMA or get_table for hidden datasets""" + if not self.sql_client.is_hidden_dataset: + return super().get_storage_tables(table_names) + + # use the api to get storage tables for hidden dataset + schema_tables: List[Tuple[str, TTableSchemaColumns]] = [] + for table_name in table_names: + try: + schema_table: TTableSchemaColumns = {} + table = self.sql_client.native_connection.get_table( + self.sql_client.make_qualified_table_name(table_name, escape=False), + retry=self.sql_client._default_retry, + timeout=self.config.http_timeout, + ) + for c in table.schema: + schema_c: TColumnSchema = { + "name": c.name, + "nullable": c.is_nullable, + **self._from_db_type(c.field_type, c.precision, c.scale), + } + schema_table[c.name] = schema_c + schema_tables.append((table_name, schema_table)) + except gcp_exceptions.NotFound: + # table is not present + schema_tables.append((table_name, {})) + return schema_tables + def _get_info_schema_columns_query( self, catalog_name: Optional[str], schema_name: str, folded_table_names: List[str] ) -> Tuple[str, List[Any]]: diff --git a/dlt/destinations/impl/bigquery/configuration.py b/dlt/destinations/impl/bigquery/configuration.py index f69e85ca3d..0e2403f7d9 100644 --- a/dlt/destinations/impl/bigquery/configuration.py +++ b/dlt/destinations/impl/bigquery/configuration.py @@ -14,6 +14,7 @@ class BigQueryClientConfiguration(DestinationClientDwhWithStagingConfiguration): destination_type: Final[str] = dataclasses.field(default="bigquery", init=False, repr=False, compare=False) # type: ignore credentials: GcpServiceAccountCredentials = None location: str = "US" + has_case_sensitive_identifiers: bool = True http_timeout: float = 15.0 # connection timeout for http request to BigQuery api file_upload_timeout: float = 30 * 60.0 # a timeout for file upload when loading local files diff --git a/dlt/destinations/impl/bigquery/factory.py b/dlt/destinations/impl/bigquery/factory.py index bee55fa164..8d4da4cfc8 100644 --- a/dlt/destinations/impl/bigquery/factory.py +++ b/dlt/destinations/impl/bigquery/factory.py @@ -1,5 +1,6 @@ import typing as t +from dlt.common.normalizers.naming import NamingConvention from dlt.destinations.impl.bigquery.configuration import BigQueryClientConfiguration from dlt.common.configuration.specs import GcpServiceAccountCredentials from dlt.destinations.impl.bigquery import capabilities @@ -26,14 +27,38 @@ def __init__( self, credentials: t.Optional[GcpServiceAccountCredentials] = None, location: t.Optional[str] = None, + has_case_sensitive_identifiers: bool = None, destination_name: t.Optional[str] = None, environment: t.Optional[str] = None, **kwargs: t.Any, ) -> None: + """Configure the MsSql destination to use in a pipeline. + + All arguments provided here supersede other configuration sources such as environment variables and dlt config files. + + Args: + credentials: Credentials to connect to the mssql database. Can be an instance of `GcpServiceAccountCredentials` or + a dict or string with service accounts credentials as used in the Google Cloud + location: A location where the datasets will be created, eg. "EU". The default is "US" + has_case_sensitive_identifiers: Is the dataset case-sensitive, defaults to True + **kwargs: Additional arguments passed to the destination config + """ super().__init__( credentials=credentials, location=location, + has_case_sensitive_identifiers=has_case_sensitive_identifiers, destination_name=destination_name, environment=environment, **kwargs, ) + + @classmethod + def adjust_capabilities( + cls, + caps: DestinationCapabilitiesContext, + config: BigQueryClientConfiguration, + naming: NamingConvention, + ) -> DestinationCapabilitiesContext: + # modify the caps if case sensitive identifiers are requested + caps.has_case_sensitive_identifiers = config.has_case_sensitive_identifiers + return super().adjust_capabilities(caps, config, naming) diff --git a/dlt/destinations/impl/bigquery/sql_client.py b/dlt/destinations/impl/bigquery/sql_client.py index 3f79081988..c178ebf419 100644 --- a/dlt/destinations/impl/bigquery/sql_client.py +++ b/dlt/destinations/impl/bigquery/sql_client.py @@ -191,8 +191,11 @@ def has_dataset(self) -> bool: return False def create_dataset(self) -> None: + dataset = bigquery.Dataset(self.fully_qualified_dataset_name(escape=False)) + dataset.location = self.location + dataset.is_case_insensitive = not self.capabilities.has_case_sensitive_identifiers self._client.create_dataset( - self.fully_qualified_dataset_name(escape=False), + dataset, retry=self._default_retry, timeout=self.http_timeout, ) @@ -241,6 +244,14 @@ def catalog_name(self, escape: bool = True) -> Optional[str]: project_id = self.capabilities.escape_identifier(project_id) return project_id + @property + def is_hidden_dataset(self) -> bool: + """Tells if the dataset associated with sql_client is a hidden dataset. + + Hidden datasets are not present in information schema. + """ + return self.dataset_name.startswith("_") + @classmethod def _make_database_exception(cls, ex: Exception) -> Exception: if not cls.is_dbapi_exception(ex): diff --git a/tests/load/test_job_client.py b/tests/load/test_job_client.py index f27cb52788..999cea8553 100644 --- a/tests/load/test_job_client.py +++ b/tests/load/test_job_client.py @@ -197,11 +197,11 @@ def test_complete_load(client: SqlJobClientBase) -> None: @pytest.mark.parametrize( "client", - destinations_configs(default_sql_configs=True, subset=["redshift", "postgres", "duckdb"]), + destinations_configs(default_sql_configs=True), indirect=True, ids=lambda x: x.name, ) -def test_schema_update_create_table_redshift(client: SqlJobClientBase) -> None: +def test_schema_update_create_table(client: SqlJobClientBase) -> None: # infer typical rasa event schema schema = client.schema table_name = "event_test_table" + uniq_id() @@ -232,7 +232,15 @@ def test_schema_update_create_table_redshift(client: SqlJobClientBase) -> None: indirect=True, ids=lambda x: x.name, ) -def test_schema_update_create_table_bigquery(client: SqlJobClientBase) -> None: +@pytest.mark.parametrize("dataset_name", (None, "_hidden_ds")) +def test_schema_update_create_table_bigquery(client: SqlJobClientBase, dataset_name: str) -> None: + # patch dataset name + if dataset_name: + # drop existing dataset + client.drop_storage() + client.sql_client.dataset_name = dataset_name + "_" + uniq_id() + client.initialize_storage() + # infer typical rasa event schema schema = client.schema # this will be partition @@ -249,11 +257,10 @@ def test_schema_update_create_table_bigquery(client: SqlJobClientBase) -> None: assert table_update["timestamp"]["partition"] is True assert table_update["_dlt_id"]["nullable"] is False _, storage_columns = client.get_storage_table("event_test_table") - assert storage_columns["timestamp"]["partition"] is True - assert storage_columns["sender_id"]["cluster"] is True + # check if all columns present + assert storage_columns.keys() == client.schema.tables["event_test_table"]["columns"].keys() _, storage_columns = client.get_storage_table("_dlt_version") - assert storage_columns["version"]["partition"] is False - assert storage_columns["version"]["cluster"] is False + assert storage_columns.keys() == client.schema.tables["_dlt_version"]["columns"].keys() @pytest.mark.parametrize( @@ -434,7 +441,7 @@ def _assert_columns_order(sql_: str) -> None: if hasattr(client.sql_client, "escape_ddl_identifier"): col_name = client.sql_client.escape_ddl_identifier(c["name"]) else: - col_name = client.capabilities.escape_identifier(c["name"]) + col_name = client.sql_client.escape_column_name(c["name"]) print(col_name) # find column names idx = sql_.find(col_name, idx) From 036e3ddf3875916748edd65ec78ec021b9f94729 Mon Sep 17 00:00:00 2001 From: Marcin Rudolf Date: Thu, 13 Jun 2024 23:40:32 +0200 Subject: [PATCH 058/105] adds naming conventions to two restore state tests --- tests/common/storages/test_schema_storage.py | 22 +++++--------------- 1 file changed, 5 insertions(+), 17 deletions(-) diff --git a/tests/common/storages/test_schema_storage.py b/tests/common/storages/test_schema_storage.py index 091e876708..383fa5d1ed 100644 --- a/tests/common/storages/test_schema_storage.py +++ b/tests/common/storages/test_schema_storage.py @@ -1,13 +1,10 @@ import os -import shutil import pytest import yaml from dlt.common import json from dlt.common.normalizers.utils import explicit_normalizers from dlt.common.schema.schema import Schema -from dlt.common.schema.typing import TStoredSchema -from dlt.common.schema.utils import remove_processing_hints from dlt.common.storages.exceptions import ( InStorageSchemaModified, SchemaNotFoundError, @@ -21,9 +18,9 @@ ) from tests.utils import autouse_test_storage, TEST_STORAGE_ROOT +from tests.common.storages.utils import prepare_eth_import_folder from tests.common.utils import ( load_yml_case, - yml_case_path, COMMON_TEST_CASES_PATH, IMPORTED_VERSION_HASH_ETH_V9, ) @@ -237,7 +234,7 @@ def test_getter(storage: SchemaStorage) -> None: def test_getter_with_import(ie_storage: SchemaStorage) -> None: with pytest.raises(KeyError): ie_storage["ethereum"] - prepare_import_folder(ie_storage) + prepare_eth_import_folder(ie_storage) # schema will be imported schema = ie_storage["ethereum"] assert schema.name == "ethereum" @@ -263,7 +260,7 @@ def test_getter_with_import(ie_storage: SchemaStorage) -> None: def test_save_store_schema_over_import(ie_storage: SchemaStorage) -> None: - prepare_import_folder(ie_storage) + prepare_eth_import_folder(ie_storage) # we have ethereum schema to be imported but we create new schema and save it schema = Schema("ethereum") schema_hash = schema.version_hash @@ -286,7 +283,7 @@ def test_save_store_schema_over_import(ie_storage: SchemaStorage) -> None: def test_save_store_schema_over_import_sync(synced_storage: SchemaStorage) -> None: # as in test_save_store_schema_over_import but we export the new schema immediately to overwrite the imported schema - prepare_import_folder(synced_storage) + prepare_eth_import_folder(synced_storage) schema = Schema("ethereum") schema_hash = schema.version_hash synced_storage.save_schema(schema) @@ -499,17 +496,8 @@ def test_new_live_schema_committed(live_storage: LiveSchemaStorage) -> None: # assert schema.settings["schema_sealed"] is True -def prepare_import_folder(storage: SchemaStorage) -> Schema: - eth_V9 = load_yml_case("schemas/eth/ethereum_schema_v9") - # remove processing hints before installing as import schema - # ethereum schema is a "dirty" schema with processing hints - eth = Schema.from_dict(eth_V9, remove_processing_hints=True) - storage._export_schema(eth, os.path.join(storage.storage.storage_path, "../import/")) - return eth - - def assert_schema_imported(synced_storage: SchemaStorage, storage: SchemaStorage) -> Schema: - prepare_import_folder(synced_storage) + prepare_eth_import_folder(synced_storage) schema = synced_storage.load_schema("ethereum") # is linked to imported schema schema._imported_version_hash = IMPORTED_VERSION_HASH_ETH_V9() From 8546763005a89cad62f63cea2ff1f2ee138d415e Mon Sep 17 00:00:00 2001 From: Marcin Rudolf Date: Thu, 13 Jun 2024 23:41:00 +0200 Subject: [PATCH 059/105] fixes escape identifiers to column escape --- .../impl/clickhouse/clickhouse.py | 4 +- dlt/destinations/impl/dremio/dremio.py | 6 +- dlt/destinations/impl/mssql/mssql.py | 2 +- dlt/destinations/impl/redshift/redshift.py | 6 +- dlt/destinations/sql_jobs.py | 2 +- dlt/pipeline/pipeline.py | 4 +- tests/common/storages/utils.py | 12 ++++ tests/load/pipeline/test_pipelines.py | 35 ++++++++++-- tests/load/pipeline/test_restore_state.py | 31 ++++++++--- tests/load/test_insert_job_client.py | 55 +++++++++---------- tests/pipeline/test_dlt_versions.py | 1 + tests/pipeline/utils.py | 2 +- 12 files changed, 104 insertions(+), 56 deletions(-) diff --git a/dlt/destinations/impl/clickhouse/clickhouse.py b/dlt/destinations/impl/clickhouse/clickhouse.py index c556fab08e..3696c5036c 100644 --- a/dlt/destinations/impl/clickhouse/clickhouse.py +++ b/dlt/destinations/impl/clickhouse/clickhouse.py @@ -326,7 +326,7 @@ def _get_column_def_sql(self, c: TColumnSchema, table_format: TTableFormat = Non ) return ( - f"{self.capabilities.escape_identifier(c['name'])} {type_with_nullability_modifier} {hints_str}" + f"{self.sql_client.escape_column_name(c['name'])} {type_with_nullability_modifier} {hints_str}" .strip() ) @@ -356,7 +356,7 @@ def _get_table_update_sql( sql[0] = f"{sql[0]}\nENGINE = {TABLE_ENGINE_TYPE_TO_CLICKHOUSE_ATTR.get(table_type)}" if primary_key_list := [ - self.capabilities.escape_identifier(c["name"]) + self.sql_client.escape_column_name(c["name"]) for c in new_columns if c.get("primary_key") ]: diff --git a/dlt/destinations/impl/dremio/dremio.py b/dlt/destinations/impl/dremio/dremio.py index 1552bd5b3e..c35ef619ed 100644 --- a/dlt/destinations/impl/dremio/dremio.py +++ b/dlt/destinations/impl/dremio/dremio.py @@ -177,7 +177,7 @@ def _get_table_update_sql( if not generate_alter: partition_list = [ - self.capabilities.escape_identifier(c["name"]) + self.sql_client.escape_column_name(c["name"]) for c in new_columns if c.get("partition") ] @@ -185,7 +185,7 @@ def _get_table_update_sql( sql[0] += "\nPARTITION BY (" + ",".join(partition_list) + ")" sort_list = [ - self.capabilities.escape_identifier(c["name"]) for c in new_columns if c.get("sort") + self.sql_client.escape_column_name(c["name"]) for c in new_columns if c.get("sort") ] if sort_list: sql[0] += "\nLOCALSORT BY (" + ",".join(sort_list) + ")" @@ -198,7 +198,7 @@ def _from_db_type( return self.type_mapper.from_db_type(bq_t, precision, scale) def _get_column_def_sql(self, c: TColumnSchema, table_format: TTableFormat = None) -> str: - name = self.capabilities.escape_identifier(c["name"]) + name = self.sql_client.escape_column_name(c["name"]) return ( f"{name} {self.type_mapper.to_db_type(c)} {self._gen_not_null(c.get('nullable', True))}" ) diff --git a/dlt/destinations/impl/mssql/mssql.py b/dlt/destinations/impl/mssql/mssql.py index c651c3eea0..555a3193a7 100644 --- a/dlt/destinations/impl/mssql/mssql.py +++ b/dlt/destinations/impl/mssql/mssql.py @@ -185,7 +185,7 @@ def _get_column_def_sql(self, c: TColumnSchema, table_format: TTableFormat = Non for h in self.active_hints.keys() if c.get(h, False) is True ) - column_name = self.capabilities.escape_identifier(c["name"]) + column_name = self.sql_client.escape_column_name(c["name"]) return f"{column_name} {db_type} {hints_str} {self._gen_not_null(c.get('nullable', True))}" def _create_replace_followup_jobs( diff --git a/dlt/destinations/impl/redshift/redshift.py b/dlt/destinations/impl/redshift/redshift.py index a753a22166..faa037078a 100644 --- a/dlt/destinations/impl/redshift/redshift.py +++ b/dlt/destinations/impl/redshift/redshift.py @@ -30,9 +30,7 @@ from dlt.destinations.sql_jobs import SqlMergeJob from dlt.destinations.exceptions import DatabaseTerminalException, LoadJobTerminalException from dlt.destinations.job_client_impl import CopyRemoteFileLoadJob, LoadJob -from dlt.destinations.impl.postgres.configuration import PostgresCredentials from dlt.destinations.impl.postgres.sql_client import Psycopg2SqlClient -from dlt.destinations.impl.redshift import capabilities from dlt.destinations.impl.redshift.configuration import RedshiftClientConfiguration from dlt.destinations.job_impl import NewReferenceJob from dlt.destinations.sql_client import SqlClientBase @@ -148,7 +146,6 @@ def execute(self, table: TTableSchema, bucket_path: str) -> None: "CREDENTIALS" f" 'aws_access_key_id={aws_access_key};aws_secret_access_key={aws_secret_key}'" ) - table_name = table["name"] # get format ext = os.path.splitext(bucket_path)[1][1:] @@ -188,10 +185,9 @@ def execute(self, table: TTableSchema, bucket_path: str) -> None: raise ValueError(f"Unsupported file type {ext} for Redshift.") with self._sql_client.begin_transaction(): - dataset_name = self._sql_client.dataset_name # TODO: if we ever support csv here remember to add column names to COPY self._sql_client.execute_sql(f""" - COPY {dataset_name}.{table_name} + COPY {self._sql_client.make_qualified_table_name(table['name'])} FROM '{bucket_path}' {file_type} {dateformat} diff --git a/dlt/destinations/sql_jobs.py b/dlt/destinations/sql_jobs.py index c5b1c72df2..b9539fe114 100644 --- a/dlt/destinations/sql_jobs.py +++ b/dlt/destinations/sql_jobs.py @@ -117,7 +117,7 @@ def _generate_insert_sql( table_name = sql_client.make_qualified_table_name(table["name"]) columns = ", ".join( map( - sql_client.capabilities.escape_identifier, + sql_client.escape_column_name, get_columns_names_with_prop(table, "name"), ) ) diff --git a/dlt/pipeline/pipeline.py b/dlt/pipeline/pipeline.py index fd52ffb359..48f37f1be3 100644 --- a/dlt/pipeline/pipeline.py +++ b/dlt/pipeline/pipeline.py @@ -1612,7 +1612,9 @@ def _bump_version_and_extract_state( schema, reuse_exiting_package=True ) data, doc = state_resource(state, load_id) - extract_.original_data = data + # keep the original data to be used in the metrics + if extract_.original_data is None: + extract_.original_data = data # append pipeline state to package state load_package_state_update = load_package_state_update or {} load_package_state_update["pipeline_state"] = doc diff --git a/tests/common/storages/utils.py b/tests/common/storages/utils.py index 3bfc3374a4..1b5a68948b 100644 --- a/tests/common/storages/utils.py +++ b/tests/common/storages/utils.py @@ -21,9 +21,12 @@ ) from dlt.common.storages import DataItemStorage, FileStorage from dlt.common.storages.fsspec_filesystem import FileItem, FileItemDict +from dlt.common.storages.schema_storage import SchemaStorage from dlt.common.typing import StrAny, TDataItems from dlt.common.utils import uniq_id +from tests.common.utils import load_yml_case + TEST_SAMPLE_FILES = "tests/common/storages/samples" MINIMALLY_EXPECTED_RELATIVE_PATHS = { "csv/freshman_kgs.csv", @@ -199,3 +202,12 @@ def assert_package_info( # get dict package_info.asdict() return package_info + + +def prepare_eth_import_folder(storage: SchemaStorage) -> Schema: + eth_V9 = load_yml_case("schemas/eth/ethereum_schema_v9") + # remove processing hints before installing as import schema + # ethereum schema is a "dirty" schema with processing hints + eth = Schema.from_dict(eth_V9, remove_processing_hints=True) + storage._export_schema(eth, storage.config.import_schema_path) + return eth diff --git a/tests/load/pipeline/test_pipelines.py b/tests/load/pipeline/test_pipelines.py index 9e89ab8fdc..8c4b8cec29 100644 --- a/tests/load/pipeline/test_pipelines.py +++ b/tests/load/pipeline/test_pipelines.py @@ -13,7 +13,8 @@ from dlt.common.destination.reference import WithStagingDataset from dlt.common.schema.exceptions import CannotCoerceColumnException from dlt.common.schema.schema import Schema -from dlt.common.schema.typing import VERSION_TABLE_NAME +from dlt.common.schema.typing import PIPELINE_STATE_TABLE_NAME, VERSION_TABLE_NAME +from dlt.common.schema.utils import pipeline_state_table from dlt.common.typing import TDataItem from dlt.common.utils import uniq_id @@ -137,10 +138,27 @@ def data_fun() -> Iterator[Any]: destinations_configs(default_sql_configs=True, all_buckets_filesystem_configs=True), ids=lambda x: x.name, ) -def test_default_schema_name(destination_config: DestinationTestConfiguration) -> None: +@pytest.mark.parametrize("use_single_dataset", [True, False]) +@pytest.mark.parametrize( + "naming_convention", + [ + "duck_case", + "snake_case", + "sql_cs_v1", + ], +) +def test_default_schema_name( + destination_config: DestinationTestConfiguration, + use_single_dataset: bool, + naming_convention: str, +) -> None: + os.environ["SCHEMA__NAMING"] = naming_convention destination_config.setup() dataset_name = "dataset_" + uniq_id() - data = ["a", "b", "c"] + data = [ + {"id": idx, "CamelInfo": uniq_id(), "GEN_ERIC": alpha} + for idx, alpha in [(0, "A"), (0, "B"), (0, "C")] + ] p = dlt.pipeline( "test_default_schema_name", @@ -149,16 +167,25 @@ def test_default_schema_name(destination_config: DestinationTestConfiguration) - staging=destination_config.staging, dataset_name=dataset_name, ) + p.config.use_single_dataset = use_single_dataset p.extract(data, table_name="test", schema=Schema("default")) p.normalize() info = p.load() + print(info) # try to restore pipeline r_p = dlt.attach("test_default_schema_name", TEST_STORAGE_ROOT) schema = r_p.default_schema assert schema.name == "default" - assert_table(p, "test", data, info=info) + # check if dlt ables have exactly the required schemas + # TODO: uncomment to check dlt tables schemas + # assert ( + # r_p.default_schema.tables[PIPELINE_STATE_TABLE_NAME]["columns"] + # == pipeline_state_table()["columns"] + # ) + + # assert_table(p, "test", data, info=info) @pytest.mark.parametrize( diff --git a/tests/load/pipeline/test_restore_state.py b/tests/load/pipeline/test_restore_state.py index 14ae7fa814..6ddc43bab2 100644 --- a/tests/load/pipeline/test_restore_state.py +++ b/tests/load/pipeline/test_restore_state.py @@ -6,6 +6,7 @@ import dlt from dlt.common import pendulum +from dlt.common.destination.capabilities import DestinationCapabilitiesContext from dlt.common.schema.schema import Schema, utils from dlt.common.schema.utils import normalize_table_identifiers from dlt.common.utils import uniq_id @@ -199,7 +200,6 @@ def test_silently_skip_on_invalid_credentials( [ "tests.common.cases.normalizers.title_case", "snake_case", - "tests.common.cases.normalizers.sql_upper", ], ) def test_get_schemas_from_destination( @@ -213,6 +213,7 @@ def test_get_schemas_from_destination( dataset_name = "state_test_" + uniq_id() p = destination_config.setup_pipeline(pipeline_name=pipeline_name, dataset_name=dataset_name) + assert_naming_to_caps(destination_config.destination, p.destination.capabilities()) p.config.use_single_dataset = use_single_dataset def _make_dn_name(schema_name: str) -> str: @@ -287,7 +288,10 @@ def _make_dn_name(schema_name: str) -> str: @pytest.mark.parametrize( "destination_config", destinations_configs( - default_sql_configs=True, default_vector_configs=True, all_buckets_filesystem_configs=True + default_sql_configs=True, + all_staging_configs=True, + default_vector_configs=True, + all_buckets_filesystem_configs=True, ), ids=lambda x: x.name, ) @@ -296,7 +300,6 @@ def _make_dn_name(schema_name: str) -> str: [ "tests.common.cases.normalizers.title_case", "snake_case", - "tests.common.cases.normalizers.sql_upper", ], ) def test_restore_state_pipeline( @@ -308,6 +311,7 @@ def test_restore_state_pipeline( pipeline_name = "pipe_" + uniq_id() dataset_name = "state_test_" + uniq_id() p = destination_config.setup_pipeline(pipeline_name=pipeline_name, dataset_name=dataset_name) + assert_naming_to_caps(destination_config.destination, p.destination.capabilities()) def some_data_gen(param: str) -> Any: dlt.current.source_state()[param] = param @@ -735,11 +739,9 @@ def some_data(param: str) -> Any: def prepare_import_folder(p: Pipeline) -> None: - os.makedirs(p._schema_storage.config.import_schema_path, exist_ok=True) - shutil.copy( - common_yml_case_path("schemas/eth/ethereum_schema_v5"), - os.path.join(p._schema_storage.config.import_schema_path, "ethereum.schema.yaml"), - ) + from tests.common.storages.utils import prepare_eth_import_folder + + prepare_eth_import_folder(p._schema_storage) def set_naming_env(destination: str, naming_convention: str) -> None: @@ -752,3 +754,16 @@ def set_naming_env(destination: str, naming_convention: str) -> None: else: naming_convention = "dlt.destinations.impl.weaviate.ci_naming" os.environ["SCHEMA__NAMING"] = naming_convention + + +def assert_naming_to_caps(destination: str, caps: DestinationCapabilitiesContext) -> None: + naming = Schema("test").naming + if ( + not caps.has_case_sensitive_identifiers + and caps.casefold_identifier is not str + and naming.is_case_sensitive + ): + pytest.skip( + f"Skipping for case insensitive destination {destination} with case folding because" + f" naming {naming.name()} is case sensitive" + ) diff --git a/tests/load/test_insert_job_client.py b/tests/load/test_insert_job_client.py index 1c035f7f68..57c3947cca 100644 --- a/tests/load/test_insert_job_client.py +++ b/tests/load/test_insert_job_client.py @@ -14,7 +14,7 @@ from tests.load.utils import expect_load_file, prepare_table, yield_client_with_storage from tests.load.pipeline.utils import destinations_configs -DEFAULT_SUBSET = ["duckdb", "redshift", "postgres", "mssql", "synapse"] +DEFAULT_SUBSET = ["duckdb", "redshift", "postgres", "mssql", "synapse", "motherduck"] @pytest.fixture @@ -176,7 +176,6 @@ def test_loading_errors(client: InsertValuesJobClient, file_storage: FileStorage ids=lambda x: x.name, ) def test_query_split(client: InsertValuesJobClient, file_storage: FileStorage) -> None: - mocked_caps = client.sql_client.__class__.capabilities writer_type = client.capabilities.insert_values_writer_type insert_sql = prepare_insert_statement(10, writer_type) @@ -185,10 +184,10 @@ def test_query_split(client: InsertValuesJobClient, file_storage: FileStorage) - elif writer_type == "select_union": pre, post, sep = ("SELECT ", "", " UNION ALL\n") + # caps are instance and are attr of sql client instance so it is safe to mock them + client.sql_client.capabilities.max_query_length = 2 # this guarantees that we execute inserts line by line - with patch.object(mocked_caps, "max_query_length", 2), patch.object( - client.sql_client, "execute_fragments" - ) as mocked_fragments: + with patch.object(client.sql_client, "execute_fragments") as mocked_fragments: user_table_name = prepare_table(client) expect_load_file(client, file_storage, insert_sql, user_table_name) # print(mocked_fragments.mock_calls) @@ -211,9 +210,8 @@ def test_query_split(client: InsertValuesJobClient, file_storage: FileStorage) - # set query length so it reads data until separator ("," or " UNION ALL") (followed by \n) query_length = (idx - start_idx - 1) * 2 - with patch.object(mocked_caps, "max_query_length", query_length), patch.object( - client.sql_client, "execute_fragments" - ) as mocked_fragments: + client.sql_client.capabilities.max_query_length = query_length + with patch.object(client.sql_client, "execute_fragments") as mocked_fragments: user_table_name = prepare_table(client) expect_load_file(client, file_storage, insert_sql, user_table_name) # split in 2 on ',' @@ -221,9 +219,8 @@ def test_query_split(client: InsertValuesJobClient, file_storage: FileStorage) - # so it reads until "\n" query_length = (idx - start_idx) * 2 - with patch.object(mocked_caps, "max_query_length", query_length), patch.object( - client.sql_client, "execute_fragments" - ) as mocked_fragments: + client.sql_client.capabilities.max_query_length = query_length + with patch.object(client.sql_client, "execute_fragments") as mocked_fragments: user_table_name = prepare_table(client) expect_load_file(client, file_storage, insert_sql, user_table_name) # split in 2 on separator ("," or " UNION ALL") @@ -235,9 +232,8 @@ def test_query_split(client: InsertValuesJobClient, file_storage: FileStorage) - elif writer_type == "select_union": offset = 1 query_length = (len(insert_sql) - start_idx - offset) * 2 - with patch.object(mocked_caps, "max_query_length", query_length), patch.object( - client.sql_client, "execute_fragments" - ) as mocked_fragments: + client.sql_client.capabilities.max_query_length = query_length + with patch.object(client.sql_client, "execute_fragments") as mocked_fragments: user_table_name = prepare_table(client) expect_load_file(client, file_storage, insert_sql, user_table_name) # split in 2 on ',' @@ -251,22 +247,21 @@ def assert_load_with_max_query( max_query_length: int, ) -> None: # load and check for real - mocked_caps = client.sql_client.__class__.capabilities - with patch.object(mocked_caps, "max_query_length", max_query_length): - user_table_name = prepare_table(client) - insert_sql = prepare_insert_statement( - insert_lines, client.capabilities.insert_values_writer_type - ) - expect_load_file(client, file_storage, insert_sql, user_table_name) - canonical_name = client.sql_client.make_qualified_table_name(user_table_name) - rows_count = client.sql_client.execute_sql(f"SELECT COUNT(1) FROM {canonical_name}")[0][0] - assert rows_count == insert_lines - # get all uniq ids in order - rows = client.sql_client.execute_sql( - f"SELECT _dlt_id FROM {canonical_name} ORDER BY timestamp ASC;" - ) - v_ids = list(map(lambda i: i[0], rows)) - assert list(map(str, range(0, insert_lines))) == v_ids + client.sql_client.capabilities.max_query_length = max_query_length + user_table_name = prepare_table(client) + insert_sql = prepare_insert_statement( + insert_lines, client.capabilities.insert_values_writer_type + ) + expect_load_file(client, file_storage, insert_sql, user_table_name) + canonical_name = client.sql_client.make_qualified_table_name(user_table_name) + rows_count = client.sql_client.execute_sql(f"SELECT COUNT(1) FROM {canonical_name}")[0][0] + assert rows_count == insert_lines + # get all uniq ids in order + rows = client.sql_client.execute_sql( + f"SELECT _dlt_id FROM {canonical_name} ORDER BY timestamp ASC;" + ) + v_ids = list(map(lambda i: i[0], rows)) + assert list(map(str, range(0, insert_lines))) == v_ids client.sql_client.execute_sql(f"DELETE FROM {canonical_name}") diff --git a/tests/pipeline/test_dlt_versions.py b/tests/pipeline/test_dlt_versions.py index ae424babca..7628c6d358 100644 --- a/tests/pipeline/test_dlt_versions.py +++ b/tests/pipeline/test_dlt_versions.py @@ -122,6 +122,7 @@ def test_pipeline_with_dlt_update(test_storage: FileStorage) -> None: ) assert github_schema["engine_version"] == 9 assert "schema_version_hash" in github_schema["tables"][LOADS_TABLE_NAME]["columns"] + # print(github_schema["tables"][PIPELINE_STATE_TABLE_NAME]) # load state state_dict = json.loads( test_storage.load(f".dlt/pipelines/{GITHUB_PIPELINE_NAME}/state.json") diff --git a/tests/pipeline/utils.py b/tests/pipeline/utils.py index 7affcc5a81..3b4ae33445 100644 --- a/tests/pipeline/utils.py +++ b/tests/pipeline/utils.py @@ -198,7 +198,7 @@ def _load_tables_to_dicts_sql( for table_name in table_names: table_rows = [] columns = schema.get_table_columns(table_name).keys() - query_columns = ",".join(map(p.sql_client().capabilities.escape_identifier, columns)) + query_columns = ",".join(map(p.sql_client().escape_column_name, columns)) with p.sql_client() as c: query_columns = ",".join(map(c.escape_column_name, columns)) From f57e28645fc665da285b933c3b3688ef734656ad Mon Sep 17 00:00:00 2001 From: Marcin Rudolf Date: Fri, 14 Jun 2024 00:02:23 +0200 Subject: [PATCH 060/105] fix conflicts in docs --- .../website/docs/general-usage/destination.md | 10 ++-------- docs/website/docs/general-usage/schema.md | 20 +++---------------- 2 files changed, 5 insertions(+), 25 deletions(-) diff --git a/docs/website/docs/general-usage/destination.md b/docs/website/docs/general-usage/destination.md index 561fbe80dd..f8e39d2d08 100644 --- a/docs/website/docs/general-usage/destination.md +++ b/docs/website/docs/general-usage/destination.md @@ -86,7 +86,7 @@ private_key = "please set me up!" client_email = "please set me up!" ``` -You can then use this destination in your pipeline as follows: +You can then use this destination in your pipeline as follows: ```py import dlt from dlt.common.destination import Destination @@ -117,23 +117,17 @@ Obviously, dlt will access the destination when you instantiate [sql_client](../ ::: -<<<<<<< HEAD -## Declare external destination -You can implement [your own destination](../walkthroughs/create-new-destination.md) and pass the destination class type or instance to `dlt` pipeline. - ## Control how dlt creates table, column and other identifiers - case folding - case sensitivity - +(TODO) 1. Redshift - always lower case, no matter which naming convention used. case insensitive 2. Athena - always lower case, no matter which naming convention used. uses different catalogue and query engines that are incompatible -======= ## Create new destination You have two ways to implement a new destination: 1. You can use `@dlt.destination` decorator and [implement a sink function](../dlt-ecosystem/destinations/destination.md). This is perfect way to implement reverse ETL destinations that push data back to REST APIs. 2. You can implement [a full destination](../walkthroughs/create-new-destination.md) where you have a full control over load jobs and schema migration. ->>>>>>> devel diff --git a/docs/website/docs/general-usage/schema.md b/docs/website/docs/general-usage/schema.md index c83298c43b..0d41483ca1 100644 --- a/docs/website/docs/general-usage/schema.md +++ b/docs/website/docs/general-usage/schema.md @@ -36,19 +36,15 @@ the order is lost. ## Naming convention -<<<<<<< HEAD -Each schema contains [naming convention](naming-convention.md) that tells `dlt` how to translate identifiers to the -namespace that the destination understands. This convention can be configured, changed in code or enforced via -destination. -======= `dlt` creates tables, child tables and column schemas from the data. The data being loaded, typically JSON documents, contains identifiers (i.e. key names in a dictionary) with any Unicode characters, any lengths and naming styles. On the other hand the destinations accept very strict namespaces for their identifiers. Like Redshift that accepts case-insensitive alphanumeric identifiers with maximum 127 characters. -Each schema contains `naming convention` that tells `dlt` how to translate identifiers to the -namespace that the destination understands. +Each schema contains [naming convention](naming-convention.md) that tells `dlt` how to translate identifiers to the +namespace that the destination understands. This convention can be configured, changed in code or enforced via +destination. The default naming convention: @@ -81,7 +77,6 @@ Opting for `"direct"` naming bypasses most name normalization processes. This me The naming convention is configurable and users can easily create their own conventions that i.e. pass all the identifiers unchanged if the destination accepts that (i.e. DuckDB). ->>>>>>> devel ## Data normalizer @@ -228,14 +223,9 @@ and columns are inferred from data. For example you can assign **primary_key** h ### Data type autodetectors You can define a set of functions that will be used to infer the data type of the column from a -<<<<<<< HEAD value. The functions are run from top to bottom on the lists. Look in `detections.py` to see what is available. **iso_timestamp** detector that looks for ISO 8601 strings and converts them to **timestamp** is enabled by default. -======= -value. The functions are run from top to bottom on the lists. Look in [`detections.py`](https://github.com/dlt-hub/dlt/blob/devel/dlt/common/schema/detections.py) to see what is -available. ->>>>>>> devel ```yaml settings: @@ -411,10 +401,6 @@ def textual(nesting_level: int): schema.remove_type_detection("iso_timestamp") # convert UNIX timestamp (float, withing a year from NOW) into timestamp schema.add_type_detection("timestamp") -<<<<<<< HEAD -======= - schema._compile_settings() ->>>>>>> devel return dlt.resource([]) ``` From cf50bd4aeaf24702677adf18d272a0afd3c3155d Mon Sep 17 00:00:00 2001 From: Marcin Rudolf Date: Sat, 15 Jun 2024 22:33:38 +0200 Subject: [PATCH 061/105] adjusts capabilities in capabilities() method, uses config and naming optionally --- dlt/common/destination/reference.py | 73 ++++++++++++++++++++++++----- dlt/common/normalizers/utils.py | 25 ---------- 2 files changed, 62 insertions(+), 36 deletions(-) diff --git a/dlt/common/destination/reference.py b/dlt/common/destination/reference.py index 1d97dd20f0..110c9271a0 100644 --- a/dlt/common/destination/reference.py +++ b/dlt/common/destination/reference.py @@ -25,6 +25,7 @@ import inspect from dlt.common import logger +from dlt.common.configuration.specs.base_configuration import extract_inner_hint from dlt.common.destination.utils import verify_schema_capabilities from dlt.common.normalizers.naming import NamingConvention from dlt.common.schema import Schema, TTableSchema, TSchemaTables @@ -44,6 +45,7 @@ from dlt.common.storages import FileStorage from dlt.common.storages.load_storage import ParsedLoadJobFileName from dlt.common.storages.load_package import LoadJobInfo +from dlt.common.typing import get_all_types_of_class_in_union TLoaderReplaceStrategy = Literal["truncate-and-insert", "insert-from-staging", "staging-optimized"] TDestinationConfig = TypeVar("TDestinationConfig", bound="DestinationClientConfiguration") @@ -93,6 +95,10 @@ def __str__(self) -> str: def on_resolved(self) -> None: self.destination_name = self.destination_name or self.destination_type + @classmethod + def credentials_type(cls) -> Type[CredentialsConfiguration]: + return extract_inner_hint(cls.get_resolvable_fields()["credentials"]) + @configspec class DestinationClientDwhConfiguration(DestinationClientConfiguration): @@ -413,7 +419,10 @@ class Destination(ABC, Generic[TDestinationConfig, TDestinationClient]): with credentials and other config params. """ - config_params: Optional[Dict[str, Any]] = None + config_params: Dict[str, Any] + """Explicit config params, overriding any injected or default values.""" + caps_params: Dict[str, Any] + """Explicit capabilities params, overriding any default values for this destination""" def __init__(self, **kwargs: Any) -> None: # Create initial unresolved destination config @@ -421,9 +430,27 @@ def __init__(self, **kwargs: Any) -> None: # to supersede config from the environment or pipeline args sig = inspect.signature(self.__class__.__init__) params = sig.parameters - self.config_params = { - k: v for k, v in kwargs.items() if k not in params or v != params[k].default - } + + # get available args + spec = self.spec + spec_fields = spec.get_resolvable_fields() + caps_fields = DestinationCapabilitiesContext.get_resolvable_fields() + + # remove default kwargs + kwargs = {k: v for k, v in kwargs.items() if k not in params or v != params[k].default} + + # warn on unknown params + for k in list(kwargs): + if k not in spec_fields and k not in caps_fields: + logger.warning( + f"When initializing destination factory of type {self.destination_type}," + f" argument {k} is not a valid field in {spec.__name__} or destination" + " capabilities" + ) + kwargs.pop(k) + + self.config_params = {k: v for k, v in kwargs.items() if k in spec_fields} + self.caps_params = {k: v for k, v in kwargs.items() if k in caps_fields} @property @abstractmethod @@ -431,9 +458,30 @@ def spec(self) -> Type[TDestinationConfig]: """A spec of destination configuration that also contains destination credentials""" ... + def capabilities( + self, config: Optional[TDestinationConfig] = None, naming: Optional[NamingConvention] = None + ) -> DestinationCapabilitiesContext: + """Destination capabilities ie. supported loader file formats, identifier name lengths, naming conventions, escape function etc. + Explicit caps arguments passed to the factory init and stored in `caps_params` are applied. + + If `config` is provided, it is used to adjust the capabilities, otherwise the explicit config composed just of `config_params` passed + to factory init is applied + If `naming` is provided, the case sensitivity and case folding are adjusted. + """ + caps = self._raw_capabilities() + caps.update(self.caps_params) + # get explicit config if final config not passed + if config is None: + # create mock credentials to avoid credentials being resolved + credentials = self.spec.credentials_type()() + credentials.__is_resolved__ = True + config = self.spec(credentials=credentials) + config = self.configuration(config, accept_partial=True) + return self.adjust_capabilities(caps, config, naming) + @abstractmethod - def capabilities(self) -> DestinationCapabilitiesContext: - """Destination capabilities ie. supported loader file formats, identifier name lengths, naming conventions, escape function etc.""" + def _raw_capabilities(self) -> DestinationCapabilitiesContext: + """Returns raw capabilities, before being adjusted with naming convention and config""" ... @property @@ -456,7 +504,9 @@ def client_class(self) -> Type[TDestinationClient]: """A job client class responsible for starting and resuming load jobs""" ... - def configuration(self, initial_config: TDestinationConfig) -> TDestinationConfig: + def configuration( + self, initial_config: TDestinationConfig, accept_partial: bool = False + ) -> TDestinationConfig: """Get a fully resolved destination config from the initial config""" config = resolve_configuration( @@ -464,6 +514,7 @@ def configuration(self, initial_config: TDestinationConfig) -> TDestinationConfi sections=(known_sections.DESTINATION, self.destination_name), # Already populated values will supersede resolved env config explicit_value=self.config_params, + accept_partial=accept_partial, ) return config @@ -472,18 +523,18 @@ def client( ) -> TDestinationClient: """Returns a configured instance of the destination's job client""" config = self.configuration(initial_config) - caps = self.adjust_capabilities(self.capabilities(), config, schema.naming) - return self.client_class(schema, config, caps) + return self.client_class(schema, config, self.capabilities(config, schema.naming)) @classmethod def adjust_capabilities( cls, caps: DestinationCapabilitiesContext, config: TDestinationConfig, - naming: NamingConvention, + naming: Optional[NamingConvention], ) -> DestinationCapabilitiesContext: """Adjust the capabilities to match the case sensitivity as requested by naming convention.""" - if not naming.is_case_sensitive: + # if naming not provided, skip the adjustment + if not naming or not naming.is_case_sensitive: # all destinations are configured to be case insensitive so there's nothing to adjust return caps if not caps.has_case_sensitive_identifiers: diff --git a/dlt/common/normalizers/utils.py b/dlt/common/normalizers/utils.py index 24f70cc327..a13b517844 100644 --- a/dlt/common/normalizers/utils.py +++ b/dlt/common/normalizers/utils.py @@ -63,31 +63,6 @@ def naming_from_reference( (3) a type name which is a module containing `NamingConvention` attribute (4) a type of class deriving from NamingConvention """ - # try: - # if "." in names: - # # TODO: bump schema engine version and migrate schema. also change the name in TNormalizersConfig from names to naming - # if names == "dlt.common.normalizers.names.snake_case": - # names = DEFAULT_NAMING_MODULE - # # this is full module name - # naming_module = cast(SupportsNamingConvention, import_module(names)) - # else: - # # from known location - # naming_module = cast( - # SupportsNamingConvention, import_module(f"dlt.common.normalizers.naming.{names}") - # ) - # except ImportError: - # raise UnknownNamingModule(names) - # if not hasattr(naming_module, "NamingConvention"): - # raise InvalidNamingModule(names) - # # get max identifier length - # if destination_capabilities: - # max_length = min( - # destination_capabilities.max_identifier_length, - # destination_capabilities.max_column_identifier_length, - # ) - # else: - # max_length = None - def _import_naming(module: str, cls: str) -> Type[NamingConvention]: if "." in module or cls != "NamingConvention": # TODO: bump schema engine version and migrate schema. also change the name in TNormalizersConfig from names to naming From 72969ce6b88c9d940489b49de5f4fe2362f5ccbc Mon Sep 17 00:00:00 2001 From: Marcin Rudolf Date: Sat, 15 Jun 2024 22:35:25 +0200 Subject: [PATCH 062/105] allows to add props to classes without vectorizer in weaviate --- .../impl/weaviate/weaviate_client.py | 23 ++++++++++++++----- tests/load/weaviate/test_pipeline.py | 21 ++++++++++++----- tests/load/weaviate/utils.py | 7 +++--- 3 files changed, 36 insertions(+), 15 deletions(-) diff --git a/dlt/destinations/impl/weaviate/weaviate_client.py b/dlt/destinations/impl/weaviate/weaviate_client.py index 8d51c62af3..8f4bdf4db7 100644 --- a/dlt/destinations/impl/weaviate/weaviate_client.py +++ b/dlt/destinations/impl/weaviate/weaviate_client.py @@ -473,8 +473,11 @@ def _execute_schema_update(self, only_tables: Iterable[str]) -> None: logger.info(f"Found {len(new_columns)} updates for {table_name} in {self.schema.name}") if len(new_columns) > 0: if exists: + is_collection_vectorized = self._is_collection_vectorized(table_name) for column in new_columns: - prop = self._make_property_schema(column["name"], column) + prop = self._make_property_schema( + column["name"], column, is_collection_vectorized + ) self.create_class_property(table_name, prop) else: class_schema = self.make_weaviate_class_schema(table_name) @@ -625,31 +628,39 @@ def make_weaviate_class_schema(self, table_name: str) -> Dict[str, Any]: } # check if any column requires vectorization - if get_columns_names_with_prop(self.schema.get_table(table_name), VECTORIZE_HINT): + if self._is_collection_vectorized(table_name): class_schema.update(self._vectorizer_config) else: class_schema.update(NON_VECTORIZED_CLASS) return class_schema + def _is_collection_vectorized(self, table_name: str) -> bool: + """Tells is any of the columns has vectorize hint set""" + return ( + len(get_columns_names_with_prop(self.schema.get_table(table_name), VECTORIZE_HINT)) > 0 + ) + def _make_properties(self, table_name: str) -> List[Dict[str, Any]]: """Creates a Weaviate properties schema from a table schema. Args: table: The table name for which columns should be converted to properties """ - + is_collection_vectorized = self._is_collection_vectorized(table_name) return [ - self._make_property_schema(column_name, column) + self._make_property_schema(column_name, column, is_collection_vectorized) for column_name, column in self.schema.get_table_columns(table_name).items() ] - def _make_property_schema(self, column_name: str, column: TColumnSchema) -> Dict[str, Any]: + def _make_property_schema( + self, column_name: str, column: TColumnSchema, is_collection_vectorized: bool + ) -> Dict[str, Any]: extra_kv = {} vectorizer_name = self._vectorizer_config["vectorizer"] # x-weaviate-vectorize: (bool) means that this field should be vectorized - if not column.get(VECTORIZE_HINT, False): + if is_collection_vectorized and not column.get(VECTORIZE_HINT, False): # tell weaviate explicitly to not vectorize when column has no vectorize hint extra_kv["moduleConfig"] = { vectorizer_name: { diff --git a/tests/load/weaviate/test_pipeline.py b/tests/load/weaviate/test_pipeline.py index 507a4c4f8d..45195e86bc 100644 --- a/tests/load/weaviate/test_pipeline.py +++ b/tests/load/weaviate/test_pipeline.py @@ -10,7 +10,7 @@ ) from dlt.common.utils import uniq_id -from dlt.destinations.impl.weaviate import weaviate_adapter +from dlt.destinations.adapters import weaviate_adapter from dlt.destinations.impl.weaviate.exceptions import PropertyNameConflict from dlt.destinations.impl.weaviate.weaviate_adapter import VECTORIZE_HINT, TOKENIZATION_HINT from dlt.destinations.impl.weaviate.weaviate_client import WeaviateClient @@ -248,7 +248,8 @@ def movies_data(): assert_class(pipeline, "MoviesData", items=data) -def test_pipeline_with_schema_evolution(): +@pytest.mark.parametrize("vectorized", (True, False), ids=("vectorized", "not-vectorized")) +def test_pipeline_with_schema_evolution(vectorized: bool): data = [ { "doc_id": 1, @@ -264,7 +265,8 @@ def test_pipeline_with_schema_evolution(): def some_data(): yield data - weaviate_adapter(some_data, vectorize=["content"]) + if vectorized: + weaviate_adapter(some_data, vectorize=["content"]) pipeline = dlt.pipeline( pipeline_name="test_pipeline_append", @@ -284,17 +286,22 @@ def some_data(): "doc_id": 3, "content": "3", "new_column": "new", + "new_vec_column": "lorem lorem", }, { "doc_id": 4, "content": "4", "new_column": "new", + "new_vec_column": "lorem lorem", }, ] - pipeline.run( - some_data(), - ) + some_data_2 = some_data() + + if vectorized: + weaviate_adapter(some_data_2, vectorize=["new_vec_column"]) + + pipeline.run(some_data_2) table_schema = pipeline.default_schema.tables["SomeData"] assert "new_column" in table_schema["columns"] @@ -302,6 +309,8 @@ def some_data(): aggregated_data.extend(data) aggregated_data[0]["new_column"] = None aggregated_data[1]["new_column"] = None + aggregated_data[0]["new_vec_column"] = None + aggregated_data[1]["new_vec_column"] = None assert_class(pipeline, "SomeData", items=aggregated_data) diff --git a/tests/load/weaviate/utils.py b/tests/load/weaviate/utils.py index 650666c8ab..b391c2fa38 100644 --- a/tests/load/weaviate/utils.py +++ b/tests/load/weaviate/utils.py @@ -38,9 +38,10 @@ def assert_class( # make sure expected columns are vectorized for column_name, column in columns.items(): prop = properties[column_name] - assert prop["moduleConfig"][vectorizer_name]["skip"] == ( - not column.get(VECTORIZE_HINT, False) - ) + if client._is_collection_vectorized(class_name): + assert prop["moduleConfig"][vectorizer_name]["skip"] == ( + not column.get(VECTORIZE_HINT, False) + ) # tokenization if TOKENIZATION_HINT in column: assert prop["tokenization"] == column[TOKENIZATION_HINT] # type: ignore[literal-required] From 656d5fcc3141caf9234e28c05c21a231908f87d9 Mon Sep 17 00:00:00 2001 From: Marcin Rudolf Date: Sat, 15 Jun 2024 22:42:39 +0200 Subject: [PATCH 063/105] moves caps function into factories, cleansup adapters and custom destination --- dlt/destinations/adapters.py | 13 ++-- dlt/destinations/impl/athena/__init__.py | 38 ------------ dlt/destinations/impl/athena/factory.py | 41 +++++++++++-- dlt/destinations/impl/bigquery/__init__.py | 33 ----------- dlt/destinations/impl/bigquery/bigquery.py | 1 - dlt/destinations/impl/bigquery/factory.py | 38 ++++++++++-- dlt/destinations/impl/bigquery/sql_client.py | 1 - dlt/destinations/impl/clickhouse/__init__.py | 58 ------------------ .../impl/clickhouse/clickhouse.py | 1 - dlt/destinations/impl/clickhouse/factory.py | 56 +++++++++++++++++- .../impl/clickhouse/sql_client.py | 1 - dlt/destinations/impl/databricks/__init__.py | 32 ---------- .../impl/databricks/databricks.py | 1 - dlt/destinations/impl/databricks/factory.py | 32 +++++++++- dlt/destinations/impl/destination/__init__.py | 16 ----- .../impl/destination/configuration.py | 21 ++++++- .../impl/destination/destination.py | 3 +- dlt/destinations/impl/destination/factory.py | 44 ++++++++------ dlt/destinations/impl/dremio/dremio.py | 1 - dlt/destinations/impl/dremio/factory.py | 33 +++++++++-- dlt/destinations/impl/dremio/sql_client.py | 2 +- dlt/destinations/impl/duckdb/__init__.py | 28 --------- dlt/destinations/impl/duckdb/duck.py | 1 - dlt/destinations/impl/duckdb/factory.py | 29 ++++++++- dlt/destinations/impl/duckdb/sql_client.py | 1 - dlt/destinations/impl/dummy/__init__.py | 40 ------------- dlt/destinations/impl/dummy/dummy.py | 1 - dlt/destinations/impl/dummy/factory.py | 34 ++++++++++- dlt/destinations/impl/filesystem/__init__.py | 24 -------- dlt/destinations/impl/filesystem/factory.py | 27 +++++++-- .../impl/filesystem/filesystem.py | 1 - dlt/destinations/impl/motherduck/__init__.py | 26 -------- dlt/destinations/impl/motherduck/factory.py | 27 ++++++++- .../impl/motherduck/motherduck.py | 2 - dlt/destinations/impl/mssql/__init__.py | 33 ----------- dlt/destinations/impl/mssql/factory.py | 36 +++++++++-- dlt/destinations/impl/mssql/mssql.py | 6 +- dlt/destinations/impl/mssql/sql_client.py | 2 - dlt/destinations/impl/postgres/__init__.py | 32 ---------- dlt/destinations/impl/postgres/factory.py | 32 +++++++++- dlt/destinations/impl/postgres/postgres.py | 1 - dlt/destinations/impl/postgres/sql_client.py | 1 - dlt/destinations/impl/qdrant/__init__.py | 18 ------ dlt/destinations/impl/qdrant/factory.py | 17 +++++- dlt/destinations/impl/qdrant/qdrant_client.py | 2 +- dlt/destinations/impl/redshift/__init__.py | 30 ---------- dlt/destinations/impl/redshift/factory.py | 32 ++++++++-- dlt/destinations/impl/snowflake/__init__.py | 30 ---------- dlt/destinations/impl/snowflake/factory.py | 33 +++++++++-- dlt/destinations/impl/snowflake/snowflake.py | 1 - dlt/destinations/impl/snowflake/sql_client.py | 1 - dlt/destinations/impl/synapse/__init__.py | 59 ------------------- dlt/destinations/impl/synapse/factory.py | 58 ++++++++++++++++-- dlt/destinations/impl/synapse/sql_client.py | 1 - dlt/destinations/impl/weaviate/__init__.py | 24 -------- dlt/destinations/impl/weaviate/factory.py | 23 +++++++- .../pdf_to_weaviate/pdf_to_weaviate.py | 2 +- .../docs/dlt-ecosystem/destinations/athena.md | 4 +- .../dlt-ecosystem/destinations/bigquery.md | 2 +- .../dlt-ecosystem/destinations/synapse.md | 2 + .../performance_snippets/toml-snippets.toml | 2 +- 61 files changed, 550 insertions(+), 641 deletions(-) diff --git a/dlt/destinations/adapters.py b/dlt/destinations/adapters.py index 1c3e094e19..42d4879653 100644 --- a/dlt/destinations/adapters.py +++ b/dlt/destinations/adapters.py @@ -1,11 +1,11 @@ """This module collects all destination adapters present in `impl` namespace""" -from dlt.destinations.impl.weaviate import weaviate_adapter -from dlt.destinations.impl.qdrant import qdrant_adapter -from dlt.destinations.impl.bigquery import bigquery_adapter -from dlt.destinations.impl.synapse import synapse_adapter -from dlt.destinations.impl.clickhouse import clickhouse_adapter -from dlt.destinations.impl.athena import athena_adapter +from dlt.destinations.impl.weaviate.weaviate_adapter import weaviate_adapter +from dlt.destinations.impl.qdrant.qdrant_adapter import qdrant_adapter +from dlt.destinations.impl.bigquery.bigquery_adapter import bigquery_adapter +from dlt.destinations.impl.synapse.synapse_adapter import synapse_adapter +from dlt.destinations.impl.clickhouse.clickhouse_adapter import clickhouse_adapter +from dlt.destinations.impl.athena.athena_adapter import athena_adapter, athena_partition __all__ = [ "weaviate_adapter", @@ -14,4 +14,5 @@ "synapse_adapter", "clickhouse_adapter", "athena_adapter", + "athena_partition", ] diff --git a/dlt/destinations/impl/athena/__init__.py b/dlt/destinations/impl/athena/__init__.py index f971b5e90a..e69de29bb2 100644 --- a/dlt/destinations/impl/athena/__init__.py +++ b/dlt/destinations/impl/athena/__init__.py @@ -1,38 +0,0 @@ -from dlt.common.destination import DestinationCapabilitiesContext -from dlt.common.data_writers.escape import ( - escape_athena_identifier, - format_bigquery_datetime_literal, -) -from dlt.common.arithmetics import DEFAULT_NUMERIC_PRECISION, DEFAULT_NUMERIC_SCALE - - -def capabilities() -> DestinationCapabilitiesContext: - caps = DestinationCapabilitiesContext() - # athena only supports loading from staged files on s3 for now - caps.preferred_loader_file_format = None - caps.supported_loader_file_formats = [] - caps.supported_table_formats = ["iceberg"] - caps.preferred_staging_file_format = "parquet" - caps.supported_staging_file_formats = ["parquet", "jsonl"] - # athena is storing all identifiers in lower case and is case insensitive - # it also uses lower case in all the queries - # https://docs.aws.amazon.com/athena/latest/ug/tables-databases-columns-names.html - caps.escape_identifier = escape_athena_identifier - caps.casefold_identifier = str.lower - caps.has_case_sensitive_identifiers = False - caps.format_datetime_literal = format_bigquery_datetime_literal - caps.decimal_precision = (DEFAULT_NUMERIC_PRECISION, DEFAULT_NUMERIC_SCALE) - caps.wei_precision = (DEFAULT_NUMERIC_PRECISION, 0) - caps.max_identifier_length = 255 - caps.max_column_identifier_length = 255 - caps.max_query_length = 16 * 1024 * 1024 - caps.is_max_query_length_in_bytes = True - caps.max_text_data_type_length = 262144 - caps.is_max_text_data_type_length_in_bytes = True - caps.supports_ddl_transactions = False - caps.supports_transactions = False - caps.alter_add_multi_column = True - caps.schema_supports_numeric_precision = False - caps.timestamp_precision = 3 - caps.supports_truncate_command = False - return caps diff --git a/dlt/destinations/impl/athena/factory.py b/dlt/destinations/impl/athena/factory.py index 5b37607cca..d4c29a641f 100644 --- a/dlt/destinations/impl/athena/factory.py +++ b/dlt/destinations/impl/athena/factory.py @@ -1,9 +1,14 @@ import typing as t from dlt.common.destination import Destination, DestinationCapabilitiesContext -from dlt.destinations.impl.athena.configuration import AthenaClientConfiguration from dlt.common.configuration.specs import AwsCredentials -from dlt.destinations.impl.athena import capabilities +from dlt.common.data_writers.escape import ( + escape_athena_identifier, + format_bigquery_datetime_literal, +) +from dlt.common.arithmetics import DEFAULT_NUMERIC_PRECISION, DEFAULT_NUMERIC_SCALE + +from dlt.destinations.impl.athena.configuration import AthenaClientConfiguration if t.TYPE_CHECKING: from dlt.destinations.impl.athena.athena import AthenaClient @@ -12,8 +17,36 @@ class athena(Destination[AthenaClientConfiguration, "AthenaClient"]): spec = AthenaClientConfiguration - def capabilities(self) -> DestinationCapabilitiesContext: - return capabilities() + def _raw_capabilities(self) -> DestinationCapabilitiesContext: + caps = DestinationCapabilitiesContext() + # athena only supports loading from staged files on s3 for now + caps.preferred_loader_file_format = None + caps.supported_loader_file_formats = [] + caps.supported_table_formats = ["iceberg"] + caps.preferred_staging_file_format = "parquet" + caps.supported_staging_file_formats = ["parquet", "jsonl"] + # athena is storing all identifiers in lower case and is case insensitive + # it also uses lower case in all the queries + # https://docs.aws.amazon.com/athena/latest/ug/tables-databases-columns-names.html + caps.escape_identifier = escape_athena_identifier + caps.casefold_identifier = str.lower + caps.has_case_sensitive_identifiers = False + caps.format_datetime_literal = format_bigquery_datetime_literal + caps.decimal_precision = (DEFAULT_NUMERIC_PRECISION, DEFAULT_NUMERIC_SCALE) + caps.wei_precision = (DEFAULT_NUMERIC_PRECISION, 0) + caps.max_identifier_length = 255 + caps.max_column_identifier_length = 255 + caps.max_query_length = 16 * 1024 * 1024 + caps.is_max_query_length_in_bytes = True + caps.max_text_data_type_length = 262144 + caps.is_max_text_data_type_length_in_bytes = True + caps.supports_ddl_transactions = False + caps.supports_transactions = False + caps.alter_add_multi_column = True + caps.schema_supports_numeric_precision = False + caps.timestamp_precision = 3 + caps.supports_truncate_command = False + return caps @property def client_class(self) -> t.Type["AthenaClient"]: diff --git a/dlt/destinations/impl/bigquery/__init__.py b/dlt/destinations/impl/bigquery/__init__.py index 120d487757..e69de29bb2 100644 --- a/dlt/destinations/impl/bigquery/__init__.py +++ b/dlt/destinations/impl/bigquery/__init__.py @@ -1,33 +0,0 @@ -from dlt.common.data_writers.escape import escape_hive_identifier, format_bigquery_datetime_literal -from dlt.common.destination import DestinationCapabilitiesContext -from dlt.common.arithmetics import DEFAULT_NUMERIC_PRECISION, DEFAULT_NUMERIC_SCALE - - -def capabilities() -> DestinationCapabilitiesContext: - caps = DestinationCapabilitiesContext() - caps.preferred_loader_file_format = "jsonl" - caps.supported_loader_file_formats = ["jsonl", "parquet"] - caps.preferred_staging_file_format = "parquet" - caps.supported_staging_file_formats = ["parquet", "jsonl"] - # BigQuery is by default case sensitive but that cannot be turned off for a dataset - # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#case_sensitivity - caps.escape_identifier = escape_hive_identifier - caps.escape_literal = None - caps.has_case_sensitive_identifiers = True - caps.casefold_identifier = str - # BQ limit is 4GB but leave a large headroom since buffered writer does not preemptively check size - caps.recommended_file_size = int(1024 * 1024 * 1024) - caps.format_datetime_literal = format_bigquery_datetime_literal - caps.decimal_precision = (DEFAULT_NUMERIC_PRECISION, DEFAULT_NUMERIC_SCALE) - caps.wei_precision = (76, 38) - caps.max_identifier_length = 1024 - caps.max_column_identifier_length = 300 - caps.max_query_length = 1024 * 1024 - caps.is_max_query_length_in_bytes = False - caps.max_text_data_type_length = 10 * 1024 * 1024 - caps.is_max_text_data_type_length_in_bytes = True - caps.supports_ddl_transactions = False - caps.supports_clone_table = True - caps.schema_supports_numeric_precision = False # no precision information in BigQuery - - return caps diff --git a/dlt/destinations/impl/bigquery/bigquery.py b/dlt/destinations/impl/bigquery/bigquery.py index 8d870487f5..d81b7e2c8e 100644 --- a/dlt/destinations/impl/bigquery/bigquery.py +++ b/dlt/destinations/impl/bigquery/bigquery.py @@ -35,7 +35,6 @@ LoadJobNotExistsException, LoadJobTerminalException, ) -from dlt.destinations.impl.bigquery import capabilities from dlt.destinations.impl.bigquery.bigquery_adapter import ( PARTITION_HINT, CLUSTER_HINT, diff --git a/dlt/destinations/impl/bigquery/factory.py b/dlt/destinations/impl/bigquery/factory.py index 8d4da4cfc8..db61a6042a 100644 --- a/dlt/destinations/impl/bigquery/factory.py +++ b/dlt/destinations/impl/bigquery/factory.py @@ -1,11 +1,13 @@ import typing as t from dlt.common.normalizers.naming import NamingConvention -from dlt.destinations.impl.bigquery.configuration import BigQueryClientConfiguration from dlt.common.configuration.specs import GcpServiceAccountCredentials -from dlt.destinations.impl.bigquery import capabilities +from dlt.common.arithmetics import DEFAULT_NUMERIC_PRECISION, DEFAULT_NUMERIC_SCALE +from dlt.common.data_writers.escape import escape_hive_identifier, format_bigquery_datetime_literal from dlt.common.destination import Destination, DestinationCapabilitiesContext +from dlt.destinations.impl.bigquery.configuration import BigQueryClientConfiguration + if t.TYPE_CHECKING: from dlt.destinations.impl.bigquery.bigquery import BigQueryClient @@ -14,8 +16,34 @@ class bigquery(Destination[BigQueryClientConfiguration, "BigQueryClient"]): spec = BigQueryClientConfiguration - def capabilities(self) -> DestinationCapabilitiesContext: - return capabilities() + def _raw_capabilities(self) -> DestinationCapabilitiesContext: + caps = DestinationCapabilitiesContext() + caps.preferred_loader_file_format = "jsonl" + caps.supported_loader_file_formats = ["jsonl", "parquet"] + caps.preferred_staging_file_format = "parquet" + caps.supported_staging_file_formats = ["parquet", "jsonl"] + # BigQuery is by default case sensitive but that cannot be turned off for a dataset + # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#case_sensitivity + caps.escape_identifier = escape_hive_identifier + caps.escape_literal = None + caps.has_case_sensitive_identifiers = True + caps.casefold_identifier = str + # BQ limit is 4GB but leave a large headroom since buffered writer does not preemptively check size + caps.recommended_file_size = int(1024 * 1024 * 1024) + caps.format_datetime_literal = format_bigquery_datetime_literal + caps.decimal_precision = (DEFAULT_NUMERIC_PRECISION, DEFAULT_NUMERIC_SCALE) + caps.wei_precision = (76, 38) + caps.max_identifier_length = 1024 + caps.max_column_identifier_length = 300 + caps.max_query_length = 1024 * 1024 + caps.is_max_query_length_in_bytes = False + caps.max_text_data_type_length = 10 * 1024 * 1024 + caps.is_max_text_data_type_length_in_bytes = True + caps.supports_ddl_transactions = False + caps.supports_clone_table = True + caps.schema_supports_numeric_precision = False # no precision information in BigQuery + + return caps @property def client_class(self) -> t.Type["BigQueryClient"]: @@ -57,7 +85,7 @@ def adjust_capabilities( cls, caps: DestinationCapabilitiesContext, config: BigQueryClientConfiguration, - naming: NamingConvention, + naming: t.Optional[NamingConvention], ) -> DestinationCapabilitiesContext: # modify the caps if case sensitive identifiers are requested caps.has_case_sensitive_identifiers = config.has_case_sensitive_identifiers diff --git a/dlt/destinations/impl/bigquery/sql_client.py b/dlt/destinations/impl/bigquery/sql_client.py index c178ebf419..45e9379af5 100644 --- a/dlt/destinations/impl/bigquery/sql_client.py +++ b/dlt/destinations/impl/bigquery/sql_client.py @@ -17,7 +17,6 @@ DatabaseTransientException, DatabaseUndefinedRelation, ) -from dlt.destinations.impl.bigquery import capabilities from dlt.destinations.sql_client import ( DBApiCursorImpl, SqlClientBase, diff --git a/dlt/destinations/impl/clickhouse/__init__.py b/dlt/destinations/impl/clickhouse/__init__.py index 9594ee0b9e..e69de29bb2 100644 --- a/dlt/destinations/impl/clickhouse/__init__.py +++ b/dlt/destinations/impl/clickhouse/__init__.py @@ -1,58 +0,0 @@ -import sys - -from dlt.common.pendulum import pendulum -from dlt.common.arithmetics import DEFAULT_NUMERIC_PRECISION, DEFAULT_NUMERIC_SCALE -from dlt.common.data_writers.escape import ( - escape_clickhouse_identifier, - escape_clickhouse_literal, - format_clickhouse_datetime_literal, -) -from dlt.common.destination import DestinationCapabilitiesContext -from dlt.destinations.impl.clickhouse.clickhouse_adapter import clickhouse_adapter - - -def capabilities() -> DestinationCapabilitiesContext: - caps = DestinationCapabilitiesContext() - caps.preferred_loader_file_format = "jsonl" - caps.supported_loader_file_formats = ["parquet", "jsonl"] - caps.preferred_staging_file_format = "jsonl" - caps.supported_staging_file_formats = ["parquet", "jsonl"] - - caps.format_datetime_literal = format_clickhouse_datetime_literal - caps.escape_identifier = escape_clickhouse_identifier - caps.escape_literal = escape_clickhouse_literal - # docs are very unclear https://clickhouse.com/docs/en/sql-reference/syntax - # taking into account other sources: identifiers are case sensitive - caps.has_case_sensitive_identifiers = True - # and store as is in the information schema - caps.casefold_identifier = str - - # https://stackoverflow.com/questions/68358686/what-is-the-maximum-length-of-a-column-in-clickhouse-can-it-be-modified - caps.max_identifier_length = 255 - caps.max_column_identifier_length = 255 - - # ClickHouse has no max `String` type length. - caps.max_text_data_type_length = sys.maxsize - - caps.schema_supports_numeric_precision = True - # Use 'Decimal128' with these defaults. - # https://clickhouse.com/docs/en/sql-reference/data-types/decimal - caps.decimal_precision = (DEFAULT_NUMERIC_PRECISION, DEFAULT_NUMERIC_SCALE) - # Use 'Decimal256' with these defaults. - caps.wei_precision = (76, 0) - caps.timestamp_precision = 6 - - # https://clickhouse.com/docs/en/operations/settings/settings#max_query_size - caps.is_max_query_length_in_bytes = True - caps.max_query_length = 262144 - - # ClickHouse has limited support for transactional semantics, especially for `ReplicatedMergeTree`, - # the default ClickHouse Cloud engine. It does, however, provide atomicity for individual DDL operations like `ALTER TABLE`. - # https://clickhouse-driver.readthedocs.io/en/latest/dbapi.html#clickhouse_driver.dbapi.connection.Connection.commit - # https://clickhouse.com/docs/en/guides/developer/transactional#transactions-commit-and-rollback - caps.supports_transactions = False - caps.supports_ddl_transactions = False - - caps.supports_truncate_command = True - - return caps diff --git a/dlt/destinations/impl/clickhouse/clickhouse.py b/dlt/destinations/impl/clickhouse/clickhouse.py index 3696c5036c..6dd8fd47ed 100644 --- a/dlt/destinations/impl/clickhouse/clickhouse.py +++ b/dlt/destinations/impl/clickhouse/clickhouse.py @@ -36,7 +36,6 @@ ) from dlt.common.storages import FileStorage from dlt.destinations.exceptions import LoadJobTerminalException -from dlt.destinations.impl.clickhouse import capabilities from dlt.destinations.impl.clickhouse.clickhouse_adapter import ( TTableEngineType, TABLE_ENGINE_TYPE_HINT, diff --git a/dlt/destinations/impl/clickhouse/factory.py b/dlt/destinations/impl/clickhouse/factory.py index e5b8fc0e6a..52a1694dee 100644 --- a/dlt/destinations/impl/clickhouse/factory.py +++ b/dlt/destinations/impl/clickhouse/factory.py @@ -1,7 +1,14 @@ +import sys import typing as t from dlt.common.destination import Destination, DestinationCapabilitiesContext -from dlt.destinations.impl.clickhouse import capabilities +from dlt.common.arithmetics import DEFAULT_NUMERIC_PRECISION, DEFAULT_NUMERIC_SCALE +from dlt.common.data_writers.escape import ( + escape_clickhouse_identifier, + escape_clickhouse_literal, + format_clickhouse_datetime_literal, +) + from dlt.destinations.impl.clickhouse.configuration import ( ClickHouseClientConfiguration, ClickHouseCredentials, @@ -16,8 +23,51 @@ class clickhouse(Destination[ClickHouseClientConfiguration, "ClickHouseClient"]): spec = ClickHouseClientConfiguration - def capabilities(self) -> DestinationCapabilitiesContext: - return capabilities() + def _raw_capabilities(self) -> DestinationCapabilitiesContext: + caps = DestinationCapabilitiesContext() + caps.preferred_loader_file_format = "jsonl" + caps.supported_loader_file_formats = ["parquet", "jsonl"] + caps.preferred_staging_file_format = "jsonl" + caps.supported_staging_file_formats = ["parquet", "jsonl"] + + caps.format_datetime_literal = format_clickhouse_datetime_literal + caps.escape_identifier = escape_clickhouse_identifier + caps.escape_literal = escape_clickhouse_literal + # docs are very unclear https://clickhouse.com/docs/en/sql-reference/syntax + # taking into account other sources: identifiers are case sensitive + caps.has_case_sensitive_identifiers = True + # and store as is in the information schema + caps.casefold_identifier = str + + # https://stackoverflow.com/questions/68358686/what-is-the-maximum-length-of-a-column-in-clickhouse-can-it-be-modified + caps.max_identifier_length = 255 + caps.max_column_identifier_length = 255 + + # ClickHouse has no max `String` type length. + caps.max_text_data_type_length = sys.maxsize + + caps.schema_supports_numeric_precision = True + # Use 'Decimal128' with these defaults. + # https://clickhouse.com/docs/en/sql-reference/data-types/decimal + caps.decimal_precision = (DEFAULT_NUMERIC_PRECISION, DEFAULT_NUMERIC_SCALE) + # Use 'Decimal256' with these defaults. + caps.wei_precision = (76, 0) + caps.timestamp_precision = 6 + + # https://clickhouse.com/docs/en/operations/settings/settings#max_query_size + caps.is_max_query_length_in_bytes = True + caps.max_query_length = 262144 + + # ClickHouse has limited support for transactional semantics, especially for `ReplicatedMergeTree`, + # the default ClickHouse Cloud engine. It does, however, provide atomicity for individual DDL operations like `ALTER TABLE`. + # https://clickhouse-driver.readthedocs.io/en/latest/dbapi.html#clickhouse_driver.dbapi.connection.Connection.commit + # https://clickhouse.com/docs/en/guides/developer/transactional#transactions-commit-and-rollback + caps.supports_transactions = False + caps.supports_ddl_transactions = False + + caps.supports_truncate_command = True + + return caps @property def client_class(self) -> t.Type["ClickHouseClient"]: diff --git a/dlt/destinations/impl/clickhouse/sql_client.py b/dlt/destinations/impl/clickhouse/sql_client.py index 2083b17c7c..ee013ea123 100644 --- a/dlt/destinations/impl/clickhouse/sql_client.py +++ b/dlt/destinations/impl/clickhouse/sql_client.py @@ -21,7 +21,6 @@ DatabaseTransientException, DatabaseTerminalException, ) -from dlt.destinations.impl.clickhouse import capabilities from dlt.destinations.impl.clickhouse.configuration import ClickHouseCredentials from dlt.destinations.sql_client import ( DBApiCursorImpl, diff --git a/dlt/destinations/impl/databricks/__init__.py b/dlt/destinations/impl/databricks/__init__.py index e07075b960..e69de29bb2 100644 --- a/dlt/destinations/impl/databricks/__init__.py +++ b/dlt/destinations/impl/databricks/__init__.py @@ -1,32 +0,0 @@ -from dlt.common.destination import DestinationCapabilitiesContext -from dlt.common.data_writers.escape import escape_databricks_identifier, escape_databricks_literal -from dlt.common.arithmetics import DEFAULT_NUMERIC_PRECISION, DEFAULT_NUMERIC_SCALE - - -def capabilities() -> DestinationCapabilitiesContext: - caps = DestinationCapabilitiesContext() - caps.preferred_loader_file_format = None - caps.supported_loader_file_formats = [] - caps.preferred_staging_file_format = "parquet" - caps.supported_staging_file_formats = ["jsonl", "parquet"] - caps.escape_identifier = escape_databricks_identifier - # databricks identifiers are case insensitive and stored in lower case - # https://docs.databricks.com/en/sql/language-manual/sql-ref-identifiers.html - caps.escape_literal = escape_databricks_literal - caps.casefold_identifier = str.lower - caps.has_case_sensitive_identifiers = False - caps.decimal_precision = (DEFAULT_NUMERIC_PRECISION, DEFAULT_NUMERIC_SCALE) - caps.wei_precision = (DEFAULT_NUMERIC_PRECISION, 0) - caps.max_identifier_length = 255 - caps.max_column_identifier_length = 255 - caps.max_query_length = 2 * 1024 * 1024 - caps.is_max_query_length_in_bytes = True - caps.max_text_data_type_length = 16 * 1024 * 1024 - caps.is_max_text_data_type_length_in_bytes = True - caps.supports_ddl_transactions = False - caps.supports_truncate_command = True - # caps.supports_transactions = False - caps.alter_add_multi_column = True - caps.supports_multiple_statements = False - caps.supports_clone_table = True - return caps diff --git a/dlt/destinations/impl/databricks/databricks.py b/dlt/destinations/impl/databricks/databricks.py index 9a791a5a6c..62debdedb7 100644 --- a/dlt/destinations/impl/databricks/databricks.py +++ b/dlt/destinations/impl/databricks/databricks.py @@ -27,7 +27,6 @@ from dlt.destinations.insert_job_client import InsertValuesJobClient from dlt.destinations.job_impl import EmptyLoadJob from dlt.destinations.exceptions import LoadJobTerminalException -from dlt.destinations.impl.databricks import capabilities from dlt.destinations.impl.databricks.configuration import DatabricksClientConfiguration from dlt.destinations.impl.databricks.sql_client import DatabricksSqlClient from dlt.destinations.sql_jobs import SqlMergeJob diff --git a/dlt/destinations/impl/databricks/factory.py b/dlt/destinations/impl/databricks/factory.py index 7c6c95137d..56462714c1 100644 --- a/dlt/destinations/impl/databricks/factory.py +++ b/dlt/destinations/impl/databricks/factory.py @@ -1,12 +1,13 @@ import typing as t from dlt.common.destination import Destination, DestinationCapabilitiesContext +from dlt.common.data_writers.escape import escape_databricks_identifier, escape_databricks_literal +from dlt.common.arithmetics import DEFAULT_NUMERIC_PRECISION, DEFAULT_NUMERIC_SCALE from dlt.destinations.impl.databricks.configuration import ( DatabricksCredentials, DatabricksClientConfiguration, ) -from dlt.destinations.impl.databricks import capabilities if t.TYPE_CHECKING: from dlt.destinations.impl.databricks.databricks import DatabricksClient @@ -15,8 +16,33 @@ class databricks(Destination[DatabricksClientConfiguration, "DatabricksClient"]): spec = DatabricksClientConfiguration - def capabilities(self) -> DestinationCapabilitiesContext: - return capabilities() + def _raw_capabilities(self) -> DestinationCapabilitiesContext: + caps = DestinationCapabilitiesContext() + caps.preferred_loader_file_format = None + caps.supported_loader_file_formats = [] + caps.preferred_staging_file_format = "parquet" + caps.supported_staging_file_formats = ["jsonl", "parquet"] + caps.escape_identifier = escape_databricks_identifier + # databricks identifiers are case insensitive and stored in lower case + # https://docs.databricks.com/en/sql/language-manual/sql-ref-identifiers.html + caps.escape_literal = escape_databricks_literal + caps.casefold_identifier = str.lower + caps.has_case_sensitive_identifiers = False + caps.decimal_precision = (DEFAULT_NUMERIC_PRECISION, DEFAULT_NUMERIC_SCALE) + caps.wei_precision = (DEFAULT_NUMERIC_PRECISION, 0) + caps.max_identifier_length = 255 + caps.max_column_identifier_length = 255 + caps.max_query_length = 2 * 1024 * 1024 + caps.is_max_query_length_in_bytes = True + caps.max_text_data_type_length = 16 * 1024 * 1024 + caps.is_max_text_data_type_length_in_bytes = True + caps.supports_ddl_transactions = False + caps.supports_truncate_command = True + # caps.supports_transactions = False + caps.alter_add_multi_column = True + caps.supports_multiple_statements = False + caps.supports_clone_table = True + return caps @property def client_class(self) -> t.Type["DatabricksClient"]: diff --git a/dlt/destinations/impl/destination/__init__.py b/dlt/destinations/impl/destination/__init__.py index f985119f26..e69de29bb2 100644 --- a/dlt/destinations/impl/destination/__init__.py +++ b/dlt/destinations/impl/destination/__init__.py @@ -1,16 +0,0 @@ -from typing import Optional -from dlt.common.destination import DestinationCapabilitiesContext, TLoaderFileFormat - - -def capabilities( - preferred_loader_file_format: TLoaderFileFormat = "typed-jsonl", - naming_convention: str = "direct", - max_table_nesting: Optional[int] = 0, -) -> DestinationCapabilitiesContext: - caps = DestinationCapabilitiesContext.generic_capabilities(preferred_loader_file_format) - caps.supported_loader_file_formats = ["typed-jsonl", "parquet"] - caps.supports_ddl_transactions = False - caps.supports_transactions = False - caps.naming_convention = naming_convention - caps.max_table_nesting = max_table_nesting - return caps diff --git a/dlt/destinations/impl/destination/configuration.py b/dlt/destinations/impl/destination/configuration.py index c3b677058c..705f3b0bb5 100644 --- a/dlt/destinations/impl/destination/configuration.py +++ b/dlt/destinations/impl/destination/configuration.py @@ -1,20 +1,23 @@ import dataclasses -from typing import Optional, Final, Callable, Union +from typing import Optional, Final, Callable, Union, Any from typing_extensions import ParamSpec -from dlt.common.configuration import configspec +from dlt.common.configuration import configspec, ConfigurationValueError from dlt.common.destination import TLoaderFileFormat from dlt.common.destination.reference import ( DestinationClientConfiguration, ) from dlt.common.typing import TDataItems from dlt.common.schema import TTableSchema -from dlt.common.destination import Destination TDestinationCallable = Callable[[Union[TDataItems, str], TTableSchema], None] TDestinationCallableParams = ParamSpec("TDestinationCallableParams") +def dummy_custom_destination(*args: Any, **kwargs: Any) -> None: + pass + + @configspec class CustomDestinationClientConfiguration(DestinationClientConfiguration): destination_type: Final[str] = dataclasses.field(default="destination", init=False, repr=False, compare=False) # type: ignore @@ -23,3 +26,15 @@ class CustomDestinationClientConfiguration(DestinationClientConfiguration): batch_size: int = 10 skip_dlt_columns_and_tables: bool = True max_table_nesting: Optional[int] = 0 + + def ensure_callable(self) -> None: + """Makes sure that valid callable was provided""" + # TODO: this surely can be done with `on_resolved` + if ( + self.destination_callable is None + or self.destination_callable is dummy_custom_destination + ): + raise ConfigurationValueError( + f"A valid callable was not provided to {self.__class__.__name__}. Did you decorate" + " a function @dlt.destination correctly?" + ) diff --git a/dlt/destinations/impl/destination/destination.py b/dlt/destinations/impl/destination/destination.py index a2fe717d73..c44fd3cca1 100644 --- a/dlt/destinations/impl/destination/destination.py +++ b/dlt/destinations/impl/destination/destination.py @@ -15,8 +15,6 @@ DoNothingJob, JobClientBase, ) - -from dlt.destinations.impl.destination import capabilities from dlt.destinations.impl.destination.configuration import CustomDestinationClientConfiguration from dlt.destinations.job_impl import ( DestinationJsonlLoadJob, @@ -33,6 +31,7 @@ def __init__( config: CustomDestinationClientConfiguration, capabilities: DestinationCapabilitiesContext, ) -> None: + config.ensure_callable() super().__init__(schema, config, capabilities) self.config: CustomDestinationClientConfiguration = config # create pre-resolved callable to avoid multiple config resolutions during execution of the jobs diff --git a/dlt/destinations/impl/destination/factory.py b/dlt/destinations/impl/destination/factory.py index 3ae6f2e876..184e05095d 100644 --- a/dlt/destinations/impl/destination/factory.py +++ b/dlt/destinations/impl/destination/factory.py @@ -4,18 +4,19 @@ from types import ModuleType from dlt.common import logger +from dlt.common.exceptions import TerminalValueError +from dlt.common.normalizers.naming.naming import NamingConvention from dlt.common.typing import AnyFun from dlt.common.destination import Destination, DestinationCapabilitiesContext, TLoaderFileFormat from dlt.common.configuration import known_sections, with_config, get_fun_spec from dlt.common.configuration.exceptions import ConfigurationValueError from dlt.common.utils import get_callable_name, is_inner_callable -from dlt.destinations.exceptions import DestinationTransientException from dlt.destinations.impl.destination.configuration import ( CustomDestinationClientConfiguration, + dummy_custom_destination, TDestinationCallable, ) -from dlt.destinations.impl.destination import capabilities if t.TYPE_CHECKING: from dlt.destinations.impl.destination.destination import DestinationClient @@ -34,14 +35,14 @@ class DestinationInfo(t.NamedTuple): class destination(Destination[CustomDestinationClientConfiguration, "DestinationClient"]): - def capabilities(self) -> DestinationCapabilitiesContext: - return capabilities( - preferred_loader_file_format=self.config_params.get( - "loader_file_format", "typed-jsonl" - ), - naming_convention=self.config_params.get("naming_convention", "direct"), - max_table_nesting=self.config_params.get("max_table_nesting", None), - ) + def _raw_capabilities(self) -> DestinationCapabilitiesContext: + caps = DestinationCapabilitiesContext.generic_capabilities("typed-jsonl") + caps.supported_loader_file_formats = ["typed-jsonl", "parquet"] + caps.supports_ddl_transactions = False + caps.supports_transactions = False + caps.naming_convention = "direct" + caps.max_table_nesting = 0 + return caps @property def spec(self) -> t.Type[CustomDestinationClientConfiguration]: @@ -66,7 +67,7 @@ def __init__( **kwargs: t.Any, ) -> None: if spec and not issubclass(spec, CustomDestinationClientConfiguration): - raise ValueError( + raise TerminalValueError( "A SPEC for a sink destination must use CustomDestinationClientConfiguration as a" " base." ) @@ -95,14 +96,7 @@ def __init__( "No destination callable provided, providing dummy callable which will fail on" " load." ) - - def dummy_callable(*args: t.Any, **kwargs: t.Any) -> None: - raise DestinationTransientException( - "You tried to load to a custom destination without a valid callable." - ) - - destination_callable = dummy_callable - + destination_callable = dummy_custom_destination elif not callable(destination_callable): raise ConfigurationValueError("Resolved Sink destination callable is not a callable.") @@ -136,9 +130,21 @@ def dummy_callable(*args: t.Any, **kwargs: t.Any) -> None: super().__init__( destination_name=destination_name, environment=environment, + # NOTE: `loader_file_format` is not a field in the caps so we had to hack the base class to allow this loader_file_format=loader_file_format, batch_size=batch_size, naming_convention=naming_convention, destination_callable=conf_callable, **kwargs, ) + + @classmethod + def adjust_capabilities( + cls, + caps: DestinationCapabilitiesContext, + config: CustomDestinationClientConfiguration, + naming: t.Optional[NamingConvention], + ) -> DestinationCapabilitiesContext: + caps = super().adjust_capabilities(caps, config, naming) + caps.preferred_loader_file_format = config.loader_file_format + return caps diff --git a/dlt/destinations/impl/dremio/dremio.py b/dlt/destinations/impl/dremio/dremio.py index c35ef619ed..00e51b74a6 100644 --- a/dlt/destinations/impl/dremio/dremio.py +++ b/dlt/destinations/impl/dremio/dremio.py @@ -14,7 +14,6 @@ from dlt.common.storages.file_storage import FileStorage from dlt.common.utils import uniq_id from dlt.destinations.exceptions import LoadJobTerminalException -from dlt.destinations.impl.dremio import capabilities from dlt.destinations.impl.dremio.configuration import DremioClientConfiguration from dlt.destinations.impl.dremio.sql_client import DremioSqlClient from dlt.destinations.job_client_impl import SqlJobClientWithStaging diff --git a/dlt/destinations/impl/dremio/factory.py b/dlt/destinations/impl/dremio/factory.py index 61895e4f90..29a4937c69 100644 --- a/dlt/destinations/impl/dremio/factory.py +++ b/dlt/destinations/impl/dremio/factory.py @@ -1,11 +1,13 @@ import typing as t +from dlt.common.destination import Destination, DestinationCapabilitiesContext +from dlt.common.arithmetics import DEFAULT_NUMERIC_PRECISION, DEFAULT_NUMERIC_SCALE +from dlt.common.data_writers.escape import escape_dremio_identifier + from dlt.destinations.impl.dremio.configuration import ( DremioCredentials, DremioClientConfiguration, ) -from dlt.destinations.impl.dremio import capabilities -from dlt.common.destination import Destination, DestinationCapabilitiesContext if t.TYPE_CHECKING: from dlt.destinations.impl.dremio.dremio import DremioClient @@ -14,8 +16,31 @@ class dremio(Destination[DremioClientConfiguration, "DremioClient"]): spec = DremioClientConfiguration - def capabilities(self) -> DestinationCapabilitiesContext: - return capabilities() + def _raw_capabilities(self) -> DestinationCapabilitiesContext: + caps = DestinationCapabilitiesContext() + caps.preferred_loader_file_format = None + caps.supported_loader_file_formats = [] + caps.preferred_staging_file_format = "parquet" + caps.supported_staging_file_formats = ["jsonl", "parquet"] + caps.escape_identifier = escape_dremio_identifier + # all identifiers are case insensitive but are stored as is + # https://docs.dremio.com/current/sonar/data-sources + caps.has_case_sensitive_identifiers = False + caps.decimal_precision = (DEFAULT_NUMERIC_PRECISION, DEFAULT_NUMERIC_SCALE) + caps.wei_precision = (DEFAULT_NUMERIC_PRECISION, 0) + caps.max_identifier_length = 255 + caps.max_column_identifier_length = 255 + caps.max_query_length = 2 * 1024 * 1024 + caps.is_max_query_length_in_bytes = True + caps.max_text_data_type_length = 16 * 1024 * 1024 + caps.is_max_text_data_type_length_in_bytes = True + caps.supports_transactions = False + caps.supports_ddl_transactions = False + caps.alter_add_multi_column = True + caps.supports_clone_table = False + caps.supports_multiple_statements = False + caps.timestamp_precision = 3 + return caps @property def client_class(self) -> t.Type["DremioClient"]: diff --git a/dlt/destinations/impl/dremio/sql_client.py b/dlt/destinations/impl/dremio/sql_client.py index 1f17045c0b..fac65e7fd0 100644 --- a/dlt/destinations/impl/dremio/sql_client.py +++ b/dlt/destinations/impl/dremio/sql_client.py @@ -10,7 +10,7 @@ DatabaseUndefinedRelation, DatabaseTransientException, ) -from dlt.destinations.impl.dremio import capabilities, pydremio +from dlt.destinations.impl.dremio import pydremio from dlt.destinations.impl.dremio.configuration import DremioCredentials from dlt.destinations.sql_client import ( DBApiCursorImpl, diff --git a/dlt/destinations/impl/duckdb/__init__.py b/dlt/destinations/impl/duckdb/__init__.py index 8523735a09..e69de29bb2 100644 --- a/dlt/destinations/impl/duckdb/__init__.py +++ b/dlt/destinations/impl/duckdb/__init__.py @@ -1,28 +0,0 @@ -from dlt.common.data_writers.escape import escape_postgres_identifier, escape_duckdb_literal -from dlt.common.destination import DestinationCapabilitiesContext -from dlt.common.arithmetics import DEFAULT_NUMERIC_PRECISION, DEFAULT_NUMERIC_SCALE - - -def capabilities() -> DestinationCapabilitiesContext: - caps = DestinationCapabilitiesContext() - caps.preferred_loader_file_format = "insert_values" - caps.supported_loader_file_formats = ["insert_values", "parquet", "jsonl"] - caps.preferred_staging_file_format = None - caps.supported_staging_file_formats = [] - caps.escape_identifier = escape_postgres_identifier - # all identifiers are case insensitive but are stored as is - caps.escape_literal = escape_duckdb_literal - caps.has_case_sensitive_identifiers = False - caps.decimal_precision = (DEFAULT_NUMERIC_PRECISION, DEFAULT_NUMERIC_SCALE) - caps.wei_precision = (DEFAULT_NUMERIC_PRECISION, 0) - caps.max_identifier_length = 65536 - caps.max_column_identifier_length = 65536 - caps.max_query_length = 32 * 1024 * 1024 - caps.is_max_query_length_in_bytes = True - caps.max_text_data_type_length = 1024 * 1024 * 1024 - caps.is_max_text_data_type_length_in_bytes = True - caps.supports_ddl_transactions = True - caps.alter_add_multi_column = False - caps.supports_truncate_command = False - - return caps diff --git a/dlt/destinations/impl/duckdb/duck.py b/dlt/destinations/impl/duckdb/duck.py index cac21ddbc6..b87a2c4780 100644 --- a/dlt/destinations/impl/duckdb/duck.py +++ b/dlt/destinations/impl/duckdb/duck.py @@ -12,7 +12,6 @@ from dlt.destinations.insert_job_client import InsertValuesJobClient -from dlt.destinations.impl.duckdb import capabilities from dlt.destinations.impl.duckdb.sql_client import DuckDbSqlClient from dlt.destinations.impl.duckdb.configuration import DuckDbClientConfiguration from dlt.destinations.type_mapping import TypeMapper diff --git a/dlt/destinations/impl/duckdb/factory.py b/dlt/destinations/impl/duckdb/factory.py index 55fcd3b339..388f914479 100644 --- a/dlt/destinations/impl/duckdb/factory.py +++ b/dlt/destinations/impl/duckdb/factory.py @@ -1,8 +1,10 @@ import typing as t from dlt.common.destination import Destination, DestinationCapabilitiesContext +from dlt.common.data_writers.escape import escape_postgres_identifier, escape_duckdb_literal +from dlt.common.arithmetics import DEFAULT_NUMERIC_PRECISION, DEFAULT_NUMERIC_SCALE + from dlt.destinations.impl.duckdb.configuration import DuckDbCredentials, DuckDbClientConfiguration -from dlt.destinations.impl.duckdb import capabilities if t.TYPE_CHECKING: from duckdb import DuckDBPyConnection @@ -12,8 +14,29 @@ class duckdb(Destination[DuckDbClientConfiguration, "DuckDbClient"]): spec = DuckDbClientConfiguration - def capabilities(self) -> DestinationCapabilitiesContext: - return capabilities() + def _raw_capabilities(self) -> DestinationCapabilitiesContext: + caps = DestinationCapabilitiesContext() + caps.preferred_loader_file_format = "insert_values" + caps.supported_loader_file_formats = ["insert_values", "parquet", "jsonl"] + caps.preferred_staging_file_format = None + caps.supported_staging_file_formats = [] + caps.escape_identifier = escape_postgres_identifier + # all identifiers are case insensitive but are stored as is + caps.escape_literal = escape_duckdb_literal + caps.has_case_sensitive_identifiers = False + caps.decimal_precision = (DEFAULT_NUMERIC_PRECISION, DEFAULT_NUMERIC_SCALE) + caps.wei_precision = (DEFAULT_NUMERIC_PRECISION, 0) + caps.max_identifier_length = 65536 + caps.max_column_identifier_length = 65536 + caps.max_query_length = 32 * 1024 * 1024 + caps.is_max_query_length_in_bytes = True + caps.max_text_data_type_length = 1024 * 1024 * 1024 + caps.is_max_text_data_type_length_in_bytes = True + caps.supports_ddl_transactions = True + caps.alter_add_multi_column = False + caps.supports_truncate_command = False + + return caps @property def client_class(self) -> t.Type["DuckDbClient"]: diff --git a/dlt/destinations/impl/duckdb/sql_client.py b/dlt/destinations/impl/duckdb/sql_client.py index fb19f0d947..95762a1f26 100644 --- a/dlt/destinations/impl/duckdb/sql_client.py +++ b/dlt/destinations/impl/duckdb/sql_client.py @@ -17,7 +17,6 @@ raise_open_connection_error, ) -from dlt.destinations.impl.duckdb import capabilities from dlt.destinations.impl.duckdb.configuration import DuckDbBaseCredentials diff --git a/dlt/destinations/impl/dummy/__init__.py b/dlt/destinations/impl/dummy/__init__.py index e52c083ebd..e69de29bb2 100644 --- a/dlt/destinations/impl/dummy/__init__.py +++ b/dlt/destinations/impl/dummy/__init__.py @@ -1,40 +0,0 @@ -from typing import List -from dlt.common.configuration import with_config, known_sections -from dlt.common.configuration.accessors import config -from dlt.common.destination import DestinationCapabilitiesContext -from dlt.common.destination.capabilities import TLoaderFileFormat - -from dlt.destinations.impl.dummy.configuration import DummyClientConfiguration - - -@with_config( - spec=DummyClientConfiguration, - sections=( - known_sections.DESTINATION, - "dummy", - ), -) -def _configure(config: DummyClientConfiguration = config.value) -> DummyClientConfiguration: - return config - - -def capabilities() -> DestinationCapabilitiesContext: - config = _configure() - additional_formats: List[TLoaderFileFormat] = ( - ["reference"] if config.create_followup_jobs else [] # type:ignore[list-item] - ) - caps = DestinationCapabilitiesContext() - caps.preferred_loader_file_format = config.loader_file_format - caps.supported_loader_file_formats = additional_formats + [config.loader_file_format] - caps.preferred_staging_file_format = None - caps.supported_staging_file_formats = additional_formats + [config.loader_file_format] - caps.has_case_sensitive_identifiers = True - caps.max_identifier_length = 127 - caps.max_column_identifier_length = 127 - caps.max_query_length = 8 * 1024 * 1024 - caps.is_max_query_length_in_bytes = True - caps.max_text_data_type_length = 65536 - caps.is_max_text_data_type_length_in_bytes = True - caps.supports_ddl_transactions = False - - return caps diff --git a/dlt/destinations/impl/dummy/dummy.py b/dlt/destinations/impl/dummy/dummy.py index d0e2aa2885..965a558a5d 100644 --- a/dlt/destinations/impl/dummy/dummy.py +++ b/dlt/destinations/impl/dummy/dummy.py @@ -36,7 +36,6 @@ LoadJobNotExistsException, LoadJobInvalidStateTransitionException, ) -from dlt.destinations.impl.dummy import capabilities from dlt.destinations.impl.dummy.configuration import DummyClientConfiguration from dlt.destinations.job_impl import NewReferenceJob diff --git a/dlt/destinations/impl/dummy/factory.py b/dlt/destinations/impl/dummy/factory.py index 1c848cf22d..c68bc36ca9 100644 --- a/dlt/destinations/impl/dummy/factory.py +++ b/dlt/destinations/impl/dummy/factory.py @@ -2,11 +2,12 @@ from dlt.common.destination import Destination, DestinationCapabilitiesContext +from dlt.common.destination.capabilities import TLoaderFileFormat +from dlt.common.normalizers.naming.naming import NamingConvention from dlt.destinations.impl.dummy.configuration import ( DummyClientConfiguration, DummyClientCredentials, ) -from dlt.destinations.impl.dummy import capabilities if t.TYPE_CHECKING: from dlt.destinations.impl.dummy.dummy import DummyClient @@ -15,8 +16,19 @@ class dummy(Destination[DummyClientConfiguration, "DummyClient"]): spec = DummyClientConfiguration - def capabilities(self) -> DestinationCapabilitiesContext: - return capabilities() + def _raw_capabilities(self) -> DestinationCapabilitiesContext: + caps = DestinationCapabilitiesContext() + caps.preferred_staging_file_format = None + caps.has_case_sensitive_identifiers = True + caps.max_identifier_length = 127 + caps.max_column_identifier_length = 127 + caps.max_query_length = 8 * 1024 * 1024 + caps.is_max_query_length_in_bytes = True + caps.max_text_data_type_length = 65536 + caps.is_max_text_data_type_length_in_bytes = True + caps.supports_ddl_transactions = False + + return caps @property def client_class(self) -> t.Type["DummyClient"]: @@ -37,3 +49,19 @@ def __init__( environment=environment, **kwargs, ) + + @classmethod + def adjust_capabilities( + cls, + caps: DestinationCapabilitiesContext, + config: DummyClientConfiguration, + naming: t.Optional[NamingConvention], + ) -> DestinationCapabilitiesContext: + caps = super().adjust_capabilities(caps, config, naming) + additional_formats: t.List[TLoaderFileFormat] = ( + ["reference"] if config.create_followup_jobs else [] # type:ignore[list-item] + ) + caps.preferred_loader_file_format = config.loader_file_format + caps.supported_loader_file_formats = additional_formats + [config.loader_file_format] + caps.supported_staging_file_formats = additional_formats + [config.loader_file_format] + return caps diff --git a/dlt/destinations/impl/filesystem/__init__.py b/dlt/destinations/impl/filesystem/__init__.py index 49fabd61d7..e69de29bb2 100644 --- a/dlt/destinations/impl/filesystem/__init__.py +++ b/dlt/destinations/impl/filesystem/__init__.py @@ -1,24 +0,0 @@ -from typing import Sequence, Tuple - -from dlt.common.schema.typing import TTableSchema -from dlt.common.destination import DestinationCapabilitiesContext, TLoaderFileFormat - - -def loader_file_format_adapter( - preferred_loader_file_format: TLoaderFileFormat, - supported_loader_file_formats: Sequence[TLoaderFileFormat], - /, - *, - table_schema: TTableSchema, -) -> Tuple[TLoaderFileFormat, Sequence[TLoaderFileFormat]]: - if table_schema.get("table_format") == "delta": - return ("parquet", ["parquet"]) - return (preferred_loader_file_format, supported_loader_file_formats) - - -def capabilities() -> DestinationCapabilitiesContext: - return DestinationCapabilitiesContext.generic_capabilities( - preferred_loader_file_format="jsonl", - loader_file_format_adapter=loader_file_format_adapter, - supported_table_formats=["delta"], - ) diff --git a/dlt/destinations/impl/filesystem/factory.py b/dlt/destinations/impl/filesystem/factory.py index 029a5bdda5..111c7e1ad6 100644 --- a/dlt/destinations/impl/filesystem/factory.py +++ b/dlt/destinations/impl/filesystem/factory.py @@ -1,19 +1,36 @@ import typing as t -from dlt.destinations.impl.filesystem.configuration import FilesystemDestinationClientConfiguration -from dlt.destinations.impl.filesystem import capabilities -from dlt.common.destination import Destination, DestinationCapabilitiesContext +from dlt.common.destination import Destination, DestinationCapabilitiesContext, TLoaderFileFormat +from dlt.common.schema.typing import TTableSchema from dlt.common.storages.configuration import FileSystemCredentials +from dlt.destinations.impl.filesystem.configuration import FilesystemDestinationClientConfiguration + if t.TYPE_CHECKING: from dlt.destinations.impl.filesystem.filesystem import FilesystemClient +def loader_file_format_adapter( + preferred_loader_file_format: TLoaderFileFormat, + supported_loader_file_formats: t.Sequence[TLoaderFileFormat], + /, + *, + table_schema: TTableSchema, +) -> t.Tuple[TLoaderFileFormat, t.Sequence[TLoaderFileFormat]]: + if table_schema.get("table_format") == "delta": + return ("parquet", ["parquet"]) + return (preferred_loader_file_format, supported_loader_file_formats) + + class filesystem(Destination[FilesystemDestinationClientConfiguration, "FilesystemClient"]): spec = FilesystemDestinationClientConfiguration - def capabilities(self) -> DestinationCapabilitiesContext: - return capabilities() + def _raw_capabilities(self) -> DestinationCapabilitiesContext: + return DestinationCapabilitiesContext.generic_capabilities( + preferred_loader_file_format="jsonl", + loader_file_format_adapter=loader_file_format_adapter, + supported_table_formats=["delta"], + ) @property def client_class(self) -> t.Type["FilesystemClient"]: diff --git a/dlt/destinations/impl/filesystem/filesystem.py b/dlt/destinations/impl/filesystem/filesystem.py index 9f2af7ab30..4cffebd7ce 100644 --- a/dlt/destinations/impl/filesystem/filesystem.py +++ b/dlt/destinations/impl/filesystem/filesystem.py @@ -29,7 +29,6 @@ ) from dlt.common.destination.exceptions import DestinationUndefinedEntity from dlt.destinations.job_impl import EmptyLoadJob, NewReferenceJob -from dlt.destinations.impl.filesystem import capabilities from dlt.destinations.impl.filesystem.configuration import FilesystemDestinationClientConfiguration from dlt.destinations.job_impl import NewReferenceJob from dlt.destinations import path_utils diff --git a/dlt/destinations/impl/motherduck/__init__.py b/dlt/destinations/impl/motherduck/__init__.py index 849c56d10b..e69de29bb2 100644 --- a/dlt/destinations/impl/motherduck/__init__.py +++ b/dlt/destinations/impl/motherduck/__init__.py @@ -1,26 +0,0 @@ -from dlt.common.data_writers.escape import escape_postgres_identifier, escape_duckdb_literal -from dlt.common.destination import DestinationCapabilitiesContext -from dlt.common.arithmetics import DEFAULT_NUMERIC_PRECISION, DEFAULT_NUMERIC_SCALE - - -def capabilities() -> DestinationCapabilitiesContext: - caps = DestinationCapabilitiesContext() - caps.preferred_loader_file_format = "parquet" - caps.supported_loader_file_formats = ["parquet", "insert_values", "jsonl"] - caps.escape_identifier = escape_postgres_identifier - # all identifiers are case insensitive but are stored as is - caps.escape_literal = escape_duckdb_literal - caps.has_case_sensitive_identifiers = False - caps.decimal_precision = (DEFAULT_NUMERIC_PRECISION, DEFAULT_NUMERIC_SCALE) - caps.wei_precision = (DEFAULT_NUMERIC_PRECISION, 0) - caps.max_identifier_length = 65536 - caps.max_column_identifier_length = 65536 - caps.max_query_length = 512 * 1024 - caps.is_max_query_length_in_bytes = True - caps.max_text_data_type_length = 1024 * 1024 * 1024 - caps.is_max_text_data_type_length_in_bytes = True - caps.supports_ddl_transactions = False - caps.alter_add_multi_column = False - caps.supports_truncate_command = False - - return caps diff --git a/dlt/destinations/impl/motherduck/factory.py b/dlt/destinations/impl/motherduck/factory.py index 5e35f69d75..df7418b9db 100644 --- a/dlt/destinations/impl/motherduck/factory.py +++ b/dlt/destinations/impl/motherduck/factory.py @@ -1,11 +1,13 @@ import typing as t from dlt.common.destination import Destination, DestinationCapabilitiesContext +from dlt.common.data_writers.escape import escape_postgres_identifier, escape_duckdb_literal +from dlt.common.arithmetics import DEFAULT_NUMERIC_PRECISION, DEFAULT_NUMERIC_SCALE + from dlt.destinations.impl.motherduck.configuration import ( MotherDuckCredentials, MotherDuckClientConfiguration, ) -from dlt.destinations.impl.motherduck import capabilities if t.TYPE_CHECKING: from duckdb import DuckDBPyConnection @@ -15,8 +17,27 @@ class motherduck(Destination[MotherDuckClientConfiguration, "MotherDuckClient"]): spec = MotherDuckClientConfiguration - def capabilities(self) -> DestinationCapabilitiesContext: - return capabilities() + def _raw_capabilities(self) -> DestinationCapabilitiesContext: + caps = DestinationCapabilitiesContext() + caps.preferred_loader_file_format = "parquet" + caps.supported_loader_file_formats = ["parquet", "insert_values", "jsonl"] + caps.escape_identifier = escape_postgres_identifier + # all identifiers are case insensitive but are stored as is + caps.escape_literal = escape_duckdb_literal + caps.has_case_sensitive_identifiers = False + caps.decimal_precision = (DEFAULT_NUMERIC_PRECISION, DEFAULT_NUMERIC_SCALE) + caps.wei_precision = (DEFAULT_NUMERIC_PRECISION, 0) + caps.max_identifier_length = 65536 + caps.max_column_identifier_length = 65536 + caps.max_query_length = 512 * 1024 + caps.is_max_query_length_in_bytes = True + caps.max_text_data_type_length = 1024 * 1024 * 1024 + caps.is_max_text_data_type_length_in_bytes = True + caps.supports_ddl_transactions = False + caps.alter_add_multi_column = False + caps.supports_truncate_command = False + + return caps @property def client_class(self) -> t.Type["MotherDuckClient"]: diff --git a/dlt/destinations/impl/motherduck/motherduck.py b/dlt/destinations/impl/motherduck/motherduck.py index 8e361e4ac1..3a5f172864 100644 --- a/dlt/destinations/impl/motherduck/motherduck.py +++ b/dlt/destinations/impl/motherduck/motherduck.py @@ -1,5 +1,3 @@ -from typing import ClassVar - from dlt.common.destination import DestinationCapabilitiesContext from dlt.common.schema import Schema diff --git a/dlt/destinations/impl/mssql/__init__.py b/dlt/destinations/impl/mssql/__init__.py index 8b874af2f6..e69de29bb2 100644 --- a/dlt/destinations/impl/mssql/__init__.py +++ b/dlt/destinations/impl/mssql/__init__.py @@ -1,33 +0,0 @@ -from dlt.common.data_writers.escape import escape_postgres_identifier, escape_mssql_literal -from dlt.common.destination import DestinationCapabilitiesContext -from dlt.common.arithmetics import DEFAULT_NUMERIC_PRECISION, DEFAULT_NUMERIC_SCALE -from dlt.common.wei import EVM_DECIMAL_PRECISION - - -def capabilities() -> DestinationCapabilitiesContext: - caps = DestinationCapabilitiesContext() - caps.preferred_loader_file_format = "insert_values" - caps.supported_loader_file_formats = ["insert_values"] - caps.preferred_staging_file_format = None - caps.supported_staging_file_formats = [] - # mssql is by default case insensitive and stores identifiers as is - # case sensitivity can be changed by database collation so we allow to reconfigure - # capabilities in the mssql factory - caps.escape_identifier = escape_postgres_identifier - caps.escape_literal = escape_mssql_literal - caps.has_case_sensitive_identifiers = False - caps.decimal_precision = (DEFAULT_NUMERIC_PRECISION, DEFAULT_NUMERIC_SCALE) - caps.wei_precision = (DEFAULT_NUMERIC_PRECISION, 0) - # https://learn.microsoft.com/en-us/sql/sql-server/maximum-capacity-specifications-for-sql-server?view=sql-server-ver16&redirectedfrom=MSDN - caps.max_identifier_length = 128 - caps.max_column_identifier_length = 128 - # A SQL Query can be a varchar(max) but is shown as limited to 65,536 * Network Packet - caps.max_query_length = 65536 * 10 - caps.is_max_query_length_in_bytes = True - caps.max_text_data_type_length = 2**30 - 1 - caps.is_max_text_data_type_length_in_bytes = False - caps.supports_ddl_transactions = True - caps.max_rows_per_insert = 1000 - caps.timestamp_precision = 7 - - return caps diff --git a/dlt/destinations/impl/mssql/factory.py b/dlt/destinations/impl/mssql/factory.py index d935a10176..6912510995 100644 --- a/dlt/destinations/impl/mssql/factory.py +++ b/dlt/destinations/impl/mssql/factory.py @@ -1,10 +1,11 @@ import typing as t from dlt.common.destination import Destination, DestinationCapabilitiesContext - from dlt.common.normalizers.naming.naming import NamingConvention +from dlt.common.data_writers.escape import escape_postgres_identifier, escape_mssql_literal +from dlt.common.arithmetics import DEFAULT_NUMERIC_PRECISION, DEFAULT_NUMERIC_SCALE + from dlt.destinations.impl.mssql.configuration import MsSqlCredentials, MsSqlClientConfiguration -from dlt.destinations.impl.mssql import capabilities if t.TYPE_CHECKING: from dlt.destinations.impl.mssql.mssql import MsSqlJobClient @@ -13,8 +14,33 @@ class mssql(Destination[MsSqlClientConfiguration, "MsSqlJobClient"]): spec = MsSqlClientConfiguration - def capabilities(self) -> DestinationCapabilitiesContext: - return capabilities() + def _raw_capabilities(self) -> DestinationCapabilitiesContext: + caps = DestinationCapabilitiesContext() + caps.preferred_loader_file_format = "insert_values" + caps.supported_loader_file_formats = ["insert_values"] + caps.preferred_staging_file_format = None + caps.supported_staging_file_formats = [] + # mssql is by default case insensitive and stores identifiers as is + # case sensitivity can be changed by database collation so we allow to reconfigure + # capabilities in the mssql factory + caps.escape_identifier = escape_postgres_identifier + caps.escape_literal = escape_mssql_literal + caps.has_case_sensitive_identifiers = False + caps.decimal_precision = (DEFAULT_NUMERIC_PRECISION, DEFAULT_NUMERIC_SCALE) + caps.wei_precision = (DEFAULT_NUMERIC_PRECISION, 0) + # https://learn.microsoft.com/en-us/sql/sql-server/maximum-capacity-specifications-for-sql-server?view=sql-server-ver16&redirectedfrom=MSDN + caps.max_identifier_length = 128 + caps.max_column_identifier_length = 128 + # A SQL Query can be a varchar(max) but is shown as limited to 65,536 * Network Packet + caps.max_query_length = 65536 * 10 + caps.is_max_query_length_in_bytes = True + caps.max_text_data_type_length = 2**30 - 1 + caps.is_max_text_data_type_length_in_bytes = False + caps.supports_ddl_transactions = True + caps.max_rows_per_insert = 1000 + caps.timestamp_precision = 7 + + return caps @property def client_class(self) -> t.Type["MsSqlJobClient"]: @@ -56,7 +82,7 @@ def adjust_capabilities( cls, caps: DestinationCapabilitiesContext, config: MsSqlClientConfiguration, - naming: NamingConvention, + naming: t.Optional[NamingConvention], ) -> DestinationCapabilitiesContext: # modify the caps if case sensitive identifiers are requested if config.has_case_sensitive_identifiers: diff --git a/dlt/destinations/impl/mssql/mssql.py b/dlt/destinations/impl/mssql/mssql.py index 555a3193a7..25aab5c52a 100644 --- a/dlt/destinations/impl/mssql/mssql.py +++ b/dlt/destinations/impl/mssql/mssql.py @@ -1,19 +1,15 @@ -from typing import ClassVar, Dict, Optional, Sequence, List, Any, Tuple +from typing import Dict, Optional, Sequence, List, Any from dlt.common.exceptions import TerminalValueError -from dlt.common.wei import EVM_DECIMAL_PRECISION from dlt.common.destination.reference import NewLoadJob from dlt.common.destination import DestinationCapabilitiesContext -from dlt.common.data_types import TDataType from dlt.common.schema import TColumnSchema, TColumnHint, Schema from dlt.common.schema.typing import TTableSchema, TColumnType, TTableFormat -from dlt.common.utils import uniq_id from dlt.destinations.sql_jobs import SqlStagingCopyJob, SqlMergeJob, SqlJobParams from dlt.destinations.insert_job_client import InsertValuesJobClient -from dlt.destinations.impl.mssql import capabilities from dlt.destinations.impl.mssql.sql_client import PyOdbcMsSqlClient from dlt.destinations.impl.mssql.configuration import MsSqlClientConfiguration from dlt.destinations.sql_client import SqlClientBase diff --git a/dlt/destinations/impl/mssql/sql_client.py b/dlt/destinations/impl/mssql/sql_client.py index 1c6f82cdbf..a360670e77 100644 --- a/dlt/destinations/impl/mssql/sql_client.py +++ b/dlt/destinations/impl/mssql/sql_client.py @@ -1,4 +1,3 @@ -import platform import struct from datetime import datetime, timedelta, timezone # noqa: I251 @@ -23,7 +22,6 @@ ) from dlt.destinations.impl.mssql.configuration import MsSqlCredentials -from dlt.destinations.impl.mssql import capabilities def handle_datetimeoffset(dto_value: bytes) -> datetime: diff --git a/dlt/destinations/impl/postgres/__init__.py b/dlt/destinations/impl/postgres/__init__.py index 76ee2b5cff..e69de29bb2 100644 --- a/dlt/destinations/impl/postgres/__init__.py +++ b/dlt/destinations/impl/postgres/__init__.py @@ -1,32 +0,0 @@ -from dlt.common.data_writers.escape import escape_postgres_identifier, escape_postgres_literal -from dlt.common.destination import DestinationCapabilitiesContext -from dlt.common.arithmetics import DEFAULT_NUMERIC_PRECISION, DEFAULT_NUMERIC_SCALE -from dlt.common.wei import EVM_DECIMAL_PRECISION - - -def capabilities() -> DestinationCapabilitiesContext: - # https://www.postgresql.org/docs/current/limits.html - caps = DestinationCapabilitiesContext() - caps.preferred_loader_file_format = "insert_values" - caps.supported_loader_file_formats = ["insert_values", "csv"] - caps.preferred_staging_file_format = None - caps.supported_staging_file_formats = [] - caps.escape_identifier = escape_postgres_identifier - # postgres has case sensitive identifiers but by default - # it folds them to lower case which makes them case insensitive - # https://stackoverflow.com/questions/20878932/are-postgresql-column-names-case-sensitive - caps.casefold_identifier = str.lower - caps.has_case_sensitive_identifiers = True - caps.escape_literal = escape_postgres_literal - caps.has_case_sensitive_identifiers = True - caps.decimal_precision = (DEFAULT_NUMERIC_PRECISION, DEFAULT_NUMERIC_SCALE) - caps.wei_precision = (2 * EVM_DECIMAL_PRECISION, EVM_DECIMAL_PRECISION) - caps.max_identifier_length = 63 - caps.max_column_identifier_length = 63 - caps.max_query_length = 32 * 1024 * 1024 - caps.is_max_query_length_in_bytes = True - caps.max_text_data_type_length = 1024 * 1024 * 1024 - caps.is_max_text_data_type_length_in_bytes = True - caps.supports_ddl_transactions = True - - return caps diff --git a/dlt/destinations/impl/postgres/factory.py b/dlt/destinations/impl/postgres/factory.py index 68d72f890a..7260256be9 100644 --- a/dlt/destinations/impl/postgres/factory.py +++ b/dlt/destinations/impl/postgres/factory.py @@ -1,12 +1,14 @@ import typing as t from dlt.common.destination import Destination, DestinationCapabilitiesContext +from dlt.common.data_writers.escape import escape_postgres_identifier, escape_postgres_literal +from dlt.common.arithmetics import DEFAULT_NUMERIC_PRECISION, DEFAULT_NUMERIC_SCALE +from dlt.common.wei import EVM_DECIMAL_PRECISION from dlt.destinations.impl.postgres.configuration import ( PostgresCredentials, PostgresClientConfiguration, ) -from dlt.destinations.impl.postgres import capabilities if t.TYPE_CHECKING: from dlt.destinations.impl.postgres.postgres import PostgresClient @@ -15,8 +17,32 @@ class postgres(Destination[PostgresClientConfiguration, "PostgresClient"]): spec = PostgresClientConfiguration - def capabilities(self) -> DestinationCapabilitiesContext: - return capabilities() + def _raw_capabilities(self) -> DestinationCapabilitiesContext: + # https://www.postgresql.org/docs/current/limits.html + caps = DestinationCapabilitiesContext() + caps.preferred_loader_file_format = "insert_values" + caps.supported_loader_file_formats = ["insert_values", "csv"] + caps.preferred_staging_file_format = None + caps.supported_staging_file_formats = [] + caps.escape_identifier = escape_postgres_identifier + # postgres has case sensitive identifiers but by default + # it folds them to lower case which makes them case insensitive + # https://stackoverflow.com/questions/20878932/are-postgresql-column-names-case-sensitive + caps.casefold_identifier = str.lower + caps.has_case_sensitive_identifiers = True + caps.escape_literal = escape_postgres_literal + caps.has_case_sensitive_identifiers = True + caps.decimal_precision = (DEFAULT_NUMERIC_PRECISION, DEFAULT_NUMERIC_SCALE) + caps.wei_precision = (2 * EVM_DECIMAL_PRECISION, EVM_DECIMAL_PRECISION) + caps.max_identifier_length = 63 + caps.max_column_identifier_length = 63 + caps.max_query_length = 32 * 1024 * 1024 + caps.is_max_query_length_in_bytes = True + caps.max_text_data_type_length = 1024 * 1024 * 1024 + caps.is_max_text_data_type_length_in_bytes = True + caps.supports_ddl_transactions = True + + return caps @property def client_class(self) -> t.Type["PostgresClient"]: diff --git a/dlt/destinations/impl/postgres/postgres.py b/dlt/destinations/impl/postgres/postgres.py index 089365bbef..3a90048230 100644 --- a/dlt/destinations/impl/postgres/postgres.py +++ b/dlt/destinations/impl/postgres/postgres.py @@ -9,7 +9,6 @@ from dlt.destinations.sql_jobs import SqlStagingCopyJob, SqlJobParams from dlt.destinations.insert_job_client import InsertValuesJobClient -from dlt.destinations.impl.postgres import capabilities from dlt.destinations.impl.postgres.sql_client import Psycopg2SqlClient from dlt.destinations.impl.postgres.configuration import PostgresClientConfiguration from dlt.destinations.sql_client import SqlClientBase diff --git a/dlt/destinations/impl/postgres/sql_client.py b/dlt/destinations/impl/postgres/sql_client.py index 8bd1a9cfa5..38bfc212d5 100644 --- a/dlt/destinations/impl/postgres/sql_client.py +++ b/dlt/destinations/impl/postgres/sql_client.py @@ -26,7 +26,6 @@ ) from dlt.destinations.impl.postgres.configuration import PostgresCredentials -from dlt.destinations.impl.postgres import capabilities class Psycopg2SqlClient(SqlClientBase["psycopg2.connection"], DBTransaction): diff --git a/dlt/destinations/impl/qdrant/__init__.py b/dlt/destinations/impl/qdrant/__init__.py index 331d1725ef..e69de29bb2 100644 --- a/dlt/destinations/impl/qdrant/__init__.py +++ b/dlt/destinations/impl/qdrant/__init__.py @@ -1,18 +0,0 @@ -from dlt.common.destination import DestinationCapabilitiesContext -from dlt.destinations.impl.qdrant.qdrant_adapter import qdrant_adapter - - -def capabilities() -> DestinationCapabilitiesContext: - caps = DestinationCapabilitiesContext() - caps.preferred_loader_file_format = "jsonl" - caps.supported_loader_file_formats = ["jsonl"] - caps.has_case_sensitive_identifiers = True - caps.max_identifier_length = 200 - caps.max_column_identifier_length = 1024 - caps.max_query_length = 8 * 1024 * 1024 - caps.is_max_query_length_in_bytes = False - caps.max_text_data_type_length = 8 * 1024 * 1024 - caps.is_max_text_data_type_length_in_bytes = False - caps.supports_ddl_transactions = False - - return caps diff --git a/dlt/destinations/impl/qdrant/factory.py b/dlt/destinations/impl/qdrant/factory.py index df9cd64871..defd29a03a 100644 --- a/dlt/destinations/impl/qdrant/factory.py +++ b/dlt/destinations/impl/qdrant/factory.py @@ -3,7 +3,6 @@ from dlt.common.destination import Destination, DestinationCapabilitiesContext from dlt.destinations.impl.qdrant.configuration import QdrantCredentials, QdrantClientConfiguration -from dlt.destinations.impl.qdrant import capabilities if t.TYPE_CHECKING: from dlt.destinations.impl.qdrant.qdrant_client import QdrantClient @@ -12,8 +11,20 @@ class qdrant(Destination[QdrantClientConfiguration, "QdrantClient"]): spec = QdrantClientConfiguration - def capabilities(self) -> DestinationCapabilitiesContext: - return capabilities() + def _raw_capabilities(self) -> DestinationCapabilitiesContext: + caps = DestinationCapabilitiesContext() + caps.preferred_loader_file_format = "jsonl" + caps.supported_loader_file_formats = ["jsonl"] + caps.has_case_sensitive_identifiers = True + caps.max_identifier_length = 200 + caps.max_column_identifier_length = 1024 + caps.max_query_length = 8 * 1024 * 1024 + caps.is_max_query_length_in_bytes = False + caps.max_text_data_type_length = 8 * 1024 * 1024 + caps.is_max_text_data_type_length_in_bytes = False + caps.supports_ddl_transactions = False + + return caps @property def client_class(self) -> t.Type["QdrantClient"]: diff --git a/dlt/destinations/impl/qdrant/qdrant_client.py b/dlt/destinations/impl/qdrant/qdrant_client.py index 03233f7792..554602d594 100644 --- a/dlt/destinations/impl/qdrant/qdrant_client.py +++ b/dlt/destinations/impl/qdrant/qdrant_client.py @@ -1,5 +1,5 @@ from types import TracebackType -from typing import ClassVar, Optional, Sequence, List, Dict, Type, Iterable, Any, IO +from typing import Optional, Sequence, List, Dict, Type, Iterable, Any from dlt.common import logger from dlt.common.json import json diff --git a/dlt/destinations/impl/redshift/__init__.py b/dlt/destinations/impl/redshift/__init__.py index f79ce7fb66..e69de29bb2 100644 --- a/dlt/destinations/impl/redshift/__init__.py +++ b/dlt/destinations/impl/redshift/__init__.py @@ -1,30 +0,0 @@ -from dlt.common.data_writers.escape import escape_redshift_identifier, escape_redshift_literal -from dlt.common.destination import DestinationCapabilitiesContext -from dlt.common.arithmetics import DEFAULT_NUMERIC_PRECISION, DEFAULT_NUMERIC_SCALE - - -def capabilities() -> DestinationCapabilitiesContext: - caps = DestinationCapabilitiesContext() - caps.preferred_loader_file_format = "insert_values" - caps.supported_loader_file_formats = ["insert_values"] - caps.preferred_staging_file_format = "jsonl" - caps.supported_staging_file_formats = ["jsonl", "parquet"] - # redshift is case insensitive and will lower case identifiers when stored - # you can enable case sensitivity https://docs.aws.amazon.com/redshift/latest/dg/r_enable_case_sensitive_identifier.html - # then redshift behaves like postgres - caps.escape_identifier = escape_redshift_identifier - caps.escape_literal = escape_redshift_literal - caps.casefold_identifier = str.lower - caps.has_case_sensitive_identifiers = False - caps.decimal_precision = (DEFAULT_NUMERIC_PRECISION, DEFAULT_NUMERIC_SCALE) - caps.wei_precision = (DEFAULT_NUMERIC_PRECISION, 0) - caps.max_identifier_length = 127 - caps.max_column_identifier_length = 127 - caps.max_query_length = 16 * 1024 * 1024 - caps.is_max_query_length_in_bytes = True - caps.max_text_data_type_length = 65535 - caps.is_max_text_data_type_length_in_bytes = True - caps.supports_ddl_transactions = True - caps.alter_add_multi_column = False - - return caps diff --git a/dlt/destinations/impl/redshift/factory.py b/dlt/destinations/impl/redshift/factory.py index ef9c3d0ad3..7e6638be1e 100644 --- a/dlt/destinations/impl/redshift/factory.py +++ b/dlt/destinations/impl/redshift/factory.py @@ -1,13 +1,14 @@ import typing as t from dlt.common.destination import Destination, DestinationCapabilitiesContext +from dlt.common.data_writers.escape import escape_redshift_identifier, escape_redshift_literal +from dlt.common.arithmetics import DEFAULT_NUMERIC_PRECISION, DEFAULT_NUMERIC_SCALE from dlt.common.normalizers.naming import NamingConvention from dlt.destinations.impl.redshift.configuration import ( RedshiftCredentials, RedshiftClientConfiguration, ) -from dlt.destinations.impl.redshift import capabilities if t.TYPE_CHECKING: from dlt.destinations.impl.redshift.redshift import RedshiftClient @@ -16,8 +17,31 @@ class redshift(Destination[RedshiftClientConfiguration, "RedshiftClient"]): spec = RedshiftClientConfiguration - def capabilities(self) -> DestinationCapabilitiesContext: - return capabilities() + def _raw_capabilities(self) -> DestinationCapabilitiesContext: + caps = DestinationCapabilitiesContext() + caps.preferred_loader_file_format = "insert_values" + caps.supported_loader_file_formats = ["insert_values"] + caps.preferred_staging_file_format = "jsonl" + caps.supported_staging_file_formats = ["jsonl", "parquet"] + # redshift is case insensitive and will lower case identifiers when stored + # you can enable case sensitivity https://docs.aws.amazon.com/redshift/latest/dg/r_enable_case_sensitive_identifier.html + # then redshift behaves like postgres + caps.escape_identifier = escape_redshift_identifier + caps.escape_literal = escape_redshift_literal + caps.casefold_identifier = str.lower + caps.has_case_sensitive_identifiers = False + caps.decimal_precision = (DEFAULT_NUMERIC_PRECISION, DEFAULT_NUMERIC_SCALE) + caps.wei_precision = (DEFAULT_NUMERIC_PRECISION, 0) + caps.max_identifier_length = 127 + caps.max_column_identifier_length = 127 + caps.max_query_length = 16 * 1024 * 1024 + caps.is_max_query_length_in_bytes = True + caps.max_text_data_type_length = 65535 + caps.is_max_text_data_type_length_in_bytes = True + caps.supports_ddl_transactions = True + caps.alter_add_multi_column = False + + return caps @property def client_class(self) -> t.Type["RedshiftClient"]: @@ -59,7 +83,7 @@ def adjust_capabilities( cls, caps: DestinationCapabilitiesContext, config: RedshiftClientConfiguration, - naming: NamingConvention, + naming: t.Optional[NamingConvention], ) -> DestinationCapabilitiesContext: # modify the caps if case sensitive identifiers are requested if config.has_case_sensitive_identifiers: diff --git a/dlt/destinations/impl/snowflake/__init__.py b/dlt/destinations/impl/snowflake/__init__.py index f5c1bec314..e69de29bb2 100644 --- a/dlt/destinations/impl/snowflake/__init__.py +++ b/dlt/destinations/impl/snowflake/__init__.py @@ -1,30 +0,0 @@ -from dlt.common.destination import DestinationCapabilitiesContext -from dlt.common.data_writers.escape import escape_snowflake_identifier -from dlt.common.arithmetics import DEFAULT_NUMERIC_PRECISION, DEFAULT_NUMERIC_SCALE - - -def capabilities() -> DestinationCapabilitiesContext: - caps = DestinationCapabilitiesContext() - caps.preferred_loader_file_format = "jsonl" - caps.supported_loader_file_formats = ["jsonl", "parquet"] - caps.preferred_staging_file_format = "jsonl" - caps.supported_staging_file_formats = ["jsonl", "parquet"] - # snowflake is case sensitive but all unquoted identifiers are upper cased - # so upper case identifiers are considered case insensitive - caps.escape_identifier = escape_snowflake_identifier - # dlt is configured to create case insensitive identifiers - # note that case sensitive naming conventions will change this setting to "str" (case sensitive) - caps.casefold_identifier = str.upper - caps.has_case_sensitive_identifiers = True - caps.decimal_precision = (DEFAULT_NUMERIC_PRECISION, DEFAULT_NUMERIC_SCALE) - caps.wei_precision = (DEFAULT_NUMERIC_PRECISION, 0) - caps.max_identifier_length = 255 - caps.max_column_identifier_length = 255 - caps.max_query_length = 2 * 1024 * 1024 - caps.is_max_query_length_in_bytes = True - caps.max_text_data_type_length = 16 * 1024 * 1024 - caps.is_max_text_data_type_length_in_bytes = True - caps.supports_ddl_transactions = True - caps.alter_add_multi_column = True - caps.supports_clone_table = True - return caps diff --git a/dlt/destinations/impl/snowflake/factory.py b/dlt/destinations/impl/snowflake/factory.py index c4459232b7..73f6175624 100644 --- a/dlt/destinations/impl/snowflake/factory.py +++ b/dlt/destinations/impl/snowflake/factory.py @@ -1,11 +1,13 @@ import typing as t +from dlt.common.destination import Destination, DestinationCapabilitiesContext +from dlt.common.data_writers.escape import escape_snowflake_identifier +from dlt.common.arithmetics import DEFAULT_NUMERIC_PRECISION, DEFAULT_NUMERIC_SCALE + from dlt.destinations.impl.snowflake.configuration import ( SnowflakeCredentials, SnowflakeClientConfiguration, ) -from dlt.destinations.impl.snowflake import capabilities -from dlt.common.destination import Destination, DestinationCapabilitiesContext if t.TYPE_CHECKING: from dlt.destinations.impl.snowflake.snowflake import SnowflakeClient @@ -14,8 +16,31 @@ class snowflake(Destination[SnowflakeClientConfiguration, "SnowflakeClient"]): spec = SnowflakeClientConfiguration - def capabilities(self) -> DestinationCapabilitiesContext: - return capabilities() + def _raw_capabilities(self) -> DestinationCapabilitiesContext: + caps = DestinationCapabilitiesContext() + caps.preferred_loader_file_format = "jsonl" + caps.supported_loader_file_formats = ["jsonl", "parquet"] + caps.preferred_staging_file_format = "jsonl" + caps.supported_staging_file_formats = ["jsonl", "parquet"] + # snowflake is case sensitive but all unquoted identifiers are upper cased + # so upper case identifiers are considered case insensitive + caps.escape_identifier = escape_snowflake_identifier + # dlt is configured to create case insensitive identifiers + # note that case sensitive naming conventions will change this setting to "str" (case sensitive) + caps.casefold_identifier = str.upper + caps.has_case_sensitive_identifiers = True + caps.decimal_precision = (DEFAULT_NUMERIC_PRECISION, DEFAULT_NUMERIC_SCALE) + caps.wei_precision = (DEFAULT_NUMERIC_PRECISION, 0) + caps.max_identifier_length = 255 + caps.max_column_identifier_length = 255 + caps.max_query_length = 2 * 1024 * 1024 + caps.is_max_query_length_in_bytes = True + caps.max_text_data_type_length = 16 * 1024 * 1024 + caps.is_max_text_data_type_length_in_bytes = True + caps.supports_ddl_transactions = True + caps.alter_add_multi_column = True + caps.supports_clone_table = True + return caps @property def client_class(self) -> t.Type["SnowflakeClient"]: diff --git a/dlt/destinations/impl/snowflake/snowflake.py b/dlt/destinations/impl/snowflake/snowflake.py index d8650d33c5..f30c16643d 100644 --- a/dlt/destinations/impl/snowflake/snowflake.py +++ b/dlt/destinations/impl/snowflake/snowflake.py @@ -24,7 +24,6 @@ from dlt.destinations.job_impl import EmptyLoadJob from dlt.destinations.exceptions import LoadJobTerminalException -from dlt.destinations.impl.snowflake import capabilities from dlt.destinations.impl.snowflake.configuration import SnowflakeClientConfiguration from dlt.destinations.impl.snowflake.sql_client import SnowflakeSqlClient from dlt.destinations.sql_jobs import SqlJobParams diff --git a/dlt/destinations/impl/snowflake/sql_client.py b/dlt/destinations/impl/snowflake/sql_client.py index 23b9f65052..e033a9f455 100644 --- a/dlt/destinations/impl/snowflake/sql_client.py +++ b/dlt/destinations/impl/snowflake/sql_client.py @@ -17,7 +17,6 @@ ) from dlt.destinations.typing import DBApi, DBApiCursor, DBTransaction, DataFrame from dlt.destinations.impl.snowflake.configuration import SnowflakeCredentials -from dlt.destinations.impl.snowflake import capabilities class SnowflakeCursorImpl(DBApiCursorImpl): diff --git a/dlt/destinations/impl/synapse/__init__.py b/dlt/destinations/impl/synapse/__init__.py index 6c695e7089..e69de29bb2 100644 --- a/dlt/destinations/impl/synapse/__init__.py +++ b/dlt/destinations/impl/synapse/__init__.py @@ -1,59 +0,0 @@ -from dlt.common.data_writers.escape import escape_postgres_identifier, escape_mssql_literal -from dlt.common.destination import DestinationCapabilitiesContext -from dlt.common.arithmetics import DEFAULT_NUMERIC_PRECISION, DEFAULT_NUMERIC_SCALE -from dlt.common.wei import EVM_DECIMAL_PRECISION - -from dlt.destinations.impl.synapse.synapse_adapter import synapse_adapter - - -def capabilities() -> DestinationCapabilitiesContext: - caps = DestinationCapabilitiesContext() - - caps.preferred_loader_file_format = "insert_values" - caps.supported_loader_file_formats = ["insert_values"] - caps.preferred_staging_file_format = "parquet" - caps.supported_staging_file_formats = ["parquet"] - - caps.insert_values_writer_type = "select_union" # https://stackoverflow.com/a/77014299 - - # similarly to mssql case sensitivity depends on database collation - # https://learn.microsoft.com/en-us/sql/relational-databases/collations/collation-and-unicode-support?view=sql-server-ver16#collations-in-azure-sql-database - # note that special option CATALOG_COLLATION is used to change it - caps.escape_identifier = escape_postgres_identifier - caps.escape_literal = escape_mssql_literal - # we allow to reconfigure capabilities in the mssql factory - caps.has_case_sensitive_identifiers = False - - # Synapse has a max precision of 38 - # https://learn.microsoft.com/en-us/sql/t-sql/statements/create-table-azure-sql-data-warehouse?view=aps-pdw-2016-au7#DataTypes - caps.decimal_precision = (DEFAULT_NUMERIC_PRECISION, DEFAULT_NUMERIC_SCALE) - caps.wei_precision = (DEFAULT_NUMERIC_PRECISION, 0) - - # https://learn.microsoft.com/en-us/sql/t-sql/statements/create-table-azure-sql-data-warehouse?view=aps-pdw-2016-au7#LimitationsRestrictions - caps.max_identifier_length = 128 - caps.max_column_identifier_length = 128 - - # https://learn.microsoft.com/en-us/azure/synapse-analytics/sql-data-warehouse/sql-data-warehouse-service-capacity-limits#queries - caps.max_query_length = 65536 * 4096 - caps.is_max_query_length_in_bytes = True - - # nvarchar(max) can store 2 GB - # https://learn.microsoft.com/en-us/sql/t-sql/data-types/nchar-and-nvarchar-transact-sql?view=sql-server-ver16#nvarchar---n--max-- - caps.max_text_data_type_length = 2 * 1024 * 1024 * 1024 - caps.is_max_text_data_type_length_in_bytes = True - - # https://learn.microsoft.com/en-us/azure/synapse-analytics/sql-data-warehouse/sql-data-warehouse-develop-transactions - caps.supports_transactions = True - caps.supports_ddl_transactions = False - - # Synapse throws "Some part of your SQL statement is nested too deeply. Rewrite the query or break it up into smaller queries." - # if number of records exceeds a certain number. Which exact number that is seems not deterministic: - # in tests, I've seen a query with 12230 records run succesfully on one run, but fail on a subsequent run, while the query remained exactly the same. - # 10.000 records is a "safe" amount that always seems to work. - caps.max_rows_per_insert = 10000 - - # datetimeoffset can store 7 digits for fractional seconds - # https://learn.microsoft.com/en-us/sql/t-sql/data-types/datetimeoffset-transact-sql?view=sql-server-ver16 - caps.timestamp_precision = 7 - - return caps diff --git a/dlt/destinations/impl/synapse/factory.py b/dlt/destinations/impl/synapse/factory.py index 41fb248056..4820056e66 100644 --- a/dlt/destinations/impl/synapse/factory.py +++ b/dlt/destinations/impl/synapse/factory.py @@ -2,8 +2,9 @@ from dlt.common.destination import Destination, DestinationCapabilitiesContext from dlt.common.normalizers.naming import NamingConvention +from dlt.common.data_writers.escape import escape_postgres_identifier, escape_mssql_literal +from dlt.common.arithmetics import DEFAULT_NUMERIC_PRECISION, DEFAULT_NUMERIC_SCALE -from dlt.destinations.impl.synapse import capabilities from dlt.destinations.impl.synapse.configuration import ( SynapseCredentials, SynapseClientConfiguration, @@ -22,8 +23,57 @@ class synapse(Destination[SynapseClientConfiguration, "SynapseClient"]): # def spec(self) -> t.Type[SynapseClientConfiguration]: # return SynapseClientConfiguration - def capabilities(self) -> DestinationCapabilitiesContext: - return capabilities() + def _raw_capabilities(self) -> DestinationCapabilitiesContext: + caps = DestinationCapabilitiesContext() + + caps.preferred_loader_file_format = "insert_values" + caps.supported_loader_file_formats = ["insert_values"] + caps.preferred_staging_file_format = "parquet" + caps.supported_staging_file_formats = ["parquet"] + + caps.insert_values_writer_type = "select_union" # https://stackoverflow.com/a/77014299 + + # similarly to mssql case sensitivity depends on database collation + # https://learn.microsoft.com/en-us/sql/relational-databases/collations/collation-and-unicode-support?view=sql-server-ver16#collations-in-azure-sql-database + # note that special option CATALOG_COLLATION is used to change it + caps.escape_identifier = escape_postgres_identifier + caps.escape_literal = escape_mssql_literal + # we allow to reconfigure capabilities in the mssql factory + caps.has_case_sensitive_identifiers = False + + # Synapse has a max precision of 38 + # https://learn.microsoft.com/en-us/sql/t-sql/statements/create-table-azure-sql-data-warehouse?view=aps-pdw-2016-au7#DataTypes + caps.decimal_precision = (DEFAULT_NUMERIC_PRECISION, DEFAULT_NUMERIC_SCALE) + caps.wei_precision = (DEFAULT_NUMERIC_PRECISION, 0) + + # https://learn.microsoft.com/en-us/sql/t-sql/statements/create-table-azure-sql-data-warehouse?view=aps-pdw-2016-au7#LimitationsRestrictions + caps.max_identifier_length = 128 + caps.max_column_identifier_length = 128 + + # https://learn.microsoft.com/en-us/azure/synapse-analytics/sql-data-warehouse/sql-data-warehouse-service-capacity-limits#queries + caps.max_query_length = 65536 * 4096 + caps.is_max_query_length_in_bytes = True + + # nvarchar(max) can store 2 GB + # https://learn.microsoft.com/en-us/sql/t-sql/data-types/nchar-and-nvarchar-transact-sql?view=sql-server-ver16#nvarchar---n--max-- + caps.max_text_data_type_length = 2 * 1024 * 1024 * 1024 + caps.is_max_text_data_type_length_in_bytes = True + + # https://learn.microsoft.com/en-us/azure/synapse-analytics/sql-data-warehouse/sql-data-warehouse-develop-transactions + caps.supports_transactions = True + caps.supports_ddl_transactions = False + + # Synapse throws "Some part of your SQL statement is nested too deeply. Rewrite the query or break it up into smaller queries." + # if number of records exceeds a certain number. Which exact number that is seems not deterministic: + # in tests, I've seen a query with 12230 records run succesfully on one run, but fail on a subsequent run, while the query remained exactly the same. + # 10.000 records is a "safe" amount that always seems to work. + caps.max_rows_per_insert = 10000 + + # datetimeoffset can store 7 digits for fractional seconds + # https://learn.microsoft.com/en-us/sql/t-sql/data-types/datetimeoffset-transact-sql?view=sql-server-ver16 + caps.timestamp_precision = 7 + + return caps @property def client_class(self) -> t.Type["SynapseClient"]: @@ -71,7 +121,7 @@ def adjust_capabilities( cls, caps: DestinationCapabilitiesContext, config: SynapseClientConfiguration, - naming: NamingConvention, + naming: t.Optional[NamingConvention], ) -> DestinationCapabilitiesContext: # modify the caps if case sensitive identifiers are requested if config.has_case_sensitive_identifiers: diff --git a/dlt/destinations/impl/synapse/sql_client.py b/dlt/destinations/impl/synapse/sql_client.py index 05ceee0356..db1b3e7cf6 100644 --- a/dlt/destinations/impl/synapse/sql_client.py +++ b/dlt/destinations/impl/synapse/sql_client.py @@ -5,7 +5,6 @@ from dlt.destinations.impl.mssql.sql_client import PyOdbcMsSqlClient from dlt.destinations.impl.mssql.configuration import MsSqlCredentials -from dlt.destinations.impl.synapse import capabilities from dlt.destinations.impl.synapse.configuration import SynapseCredentials from dlt.destinations.exceptions import DatabaseUndefinedRelation diff --git a/dlt/destinations/impl/weaviate/__init__.py b/dlt/destinations/impl/weaviate/__init__.py index eaf4ca56f3..e69de29bb2 100644 --- a/dlt/destinations/impl/weaviate/__init__.py +++ b/dlt/destinations/impl/weaviate/__init__.py @@ -1,24 +0,0 @@ -from dlt.common.destination import DestinationCapabilitiesContext -from dlt.destinations.impl.weaviate.weaviate_adapter import weaviate_adapter - - -def capabilities() -> DestinationCapabilitiesContext: - caps = DestinationCapabilitiesContext() - caps.preferred_loader_file_format = "jsonl" - caps.supported_loader_file_formats = ["jsonl"] - # weaviate names are case sensitive following GraphQL naming convention - # https://weaviate.io/developers/weaviate/config-refs/schema - caps.has_case_sensitive_identifiers = False - # weaviate will upper case first letter of class name and lower case first letter of a property - # we assume that naming convention will do that - caps.casefold_identifier = str - caps.max_identifier_length = 200 - caps.max_column_identifier_length = 1024 - caps.max_query_length = 8 * 1024 * 1024 - caps.is_max_query_length_in_bytes = False - caps.max_text_data_type_length = 8 * 1024 * 1024 - caps.is_max_text_data_type_length_in_bytes = False - caps.supports_ddl_transactions = False - caps.naming_convention = "dlt.destinations.impl.weaviate.naming" - - return caps diff --git a/dlt/destinations/impl/weaviate/factory.py b/dlt/destinations/impl/weaviate/factory.py index 0449e6cdd5..3d78c9582a 100644 --- a/dlt/destinations/impl/weaviate/factory.py +++ b/dlt/destinations/impl/weaviate/factory.py @@ -6,7 +6,6 @@ WeaviateCredentials, WeaviateClientConfiguration, ) -from dlt.destinations.impl.weaviate import capabilities if t.TYPE_CHECKING: from dlt.destinations.impl.weaviate.weaviate_client import WeaviateClient @@ -15,8 +14,26 @@ class weaviate(Destination[WeaviateClientConfiguration, "WeaviateClient"]): spec = WeaviateClientConfiguration - def capabilities(self) -> DestinationCapabilitiesContext: - return capabilities() + def _raw_capabilities(self) -> DestinationCapabilitiesContext: + caps = DestinationCapabilitiesContext() + caps.preferred_loader_file_format = "jsonl" + caps.supported_loader_file_formats = ["jsonl"] + # weaviate names are case sensitive following GraphQL naming convention + # https://weaviate.io/developers/weaviate/config-refs/schema + caps.has_case_sensitive_identifiers = False + # weaviate will upper case first letter of class name and lower case first letter of a property + # we assume that naming convention will do that + caps.casefold_identifier = str + caps.max_identifier_length = 200 + caps.max_column_identifier_length = 1024 + caps.max_query_length = 8 * 1024 * 1024 + caps.is_max_query_length_in_bytes = False + caps.max_text_data_type_length = 8 * 1024 * 1024 + caps.is_max_text_data_type_length_in_bytes = False + caps.supports_ddl_transactions = False + caps.naming_convention = "dlt.destinations.impl.weaviate.naming" + + return caps @property def client_class(self) -> t.Type["WeaviateClient"]: diff --git a/docs/examples/pdf_to_weaviate/pdf_to_weaviate.py b/docs/examples/pdf_to_weaviate/pdf_to_weaviate.py index 809a6cfbd6..5fbba98a21 100644 --- a/docs/examples/pdf_to_weaviate/pdf_to_weaviate.py +++ b/docs/examples/pdf_to_weaviate/pdf_to_weaviate.py @@ -25,7 +25,7 @@ import os import dlt -from dlt.destinations.impl.weaviate import weaviate_adapter +from dlt.destinations.adapters import weaviate_adapter from PyPDF2 import PdfReader diff --git a/docs/website/docs/dlt-ecosystem/destinations/athena.md b/docs/website/docs/dlt-ecosystem/destinations/athena.md index 93291bfe9a..a723e3554c 100644 --- a/docs/website/docs/dlt-ecosystem/destinations/athena.md +++ b/docs/website/docs/dlt-ecosystem/destinations/athena.md @@ -141,7 +141,7 @@ For every table created as an iceberg table, the Athena destination will create The `merge` write disposition is supported for Athena when using iceberg tables. > Note that: -> 1. there is a risk of tables ending up in inconsistent state in case a pipeline run fails mid flight, because Athena doesn't support transactions, and `dlt` uses multiple DELETE/UPDATE/INSERT statements to implement `merge`, +> 1. there is a risk of tables ending up in inconsistent state in case a pipeline run fails mid flight, because Athena doesn't support transactions, and `dlt` uses multiple DELETE/UPDATE/INSERT statements to implement `merge`, > 2. `dlt` creates additional helper tables called `insert_` and `delete_
` in the staging schema to work around Athena's lack of temporary tables. ### dbt support @@ -183,7 +183,7 @@ Here is an example of how to use the adapter to partition a table: from datetime import date import dlt -from dlt.destinations.impl.athena.athena_adapter import athena_partition, athena_adapter +from dlt.destinations.adapters import athena_partition, athena_adapter data_items = [ (1, "A", date(2021, 1, 1)), diff --git a/docs/website/docs/dlt-ecosystem/destinations/bigquery.md b/docs/website/docs/dlt-ecosystem/destinations/bigquery.md index 4f99901e37..f97a4a96bb 100644 --- a/docs/website/docs/dlt-ecosystem/destinations/bigquery.md +++ b/docs/website/docs/dlt-ecosystem/destinations/bigquery.md @@ -232,7 +232,7 @@ Here is an example of how to use the `bigquery_adapter` method to apply hints to from datetime import date, timedelta import dlt -from dlt.destinations.impl.bigquery.bigquery_adapter import bigquery_adapter +from dlt.destinations.adapters import bigquery_adapter @dlt.resource( diff --git a/docs/website/docs/dlt-ecosystem/destinations/synapse.md b/docs/website/docs/dlt-ecosystem/destinations/synapse.md index 2e936f193e..6cfcb1ef8f 100644 --- a/docs/website/docs/dlt-ecosystem/destinations/synapse.md +++ b/docs/website/docs/dlt-ecosystem/destinations/synapse.md @@ -148,6 +148,8 @@ Data is loaded via `INSERT` statements by default. The [table index type](https://learn.microsoft.com/en-us/azure/synapse-analytics/sql-data-warehouse/sql-data-warehouse-tables-index) of the created tables can be configured at the resource level with the `synapse_adapter`: ```py +from dlt.destinations.adapters import synapse_adapter + info = pipeline.run( synapse_adapter( data=your_resource, diff --git a/docs/website/docs/reference/performance_snippets/toml-snippets.toml b/docs/website/docs/reference/performance_snippets/toml-snippets.toml index 5e700c4e31..e1a640e7cf 100644 --- a/docs/website/docs/reference/performance_snippets/toml-snippets.toml +++ b/docs/website/docs/reference/performance_snippets/toml-snippets.toml @@ -71,7 +71,7 @@ max_parallel_items=10 # @@@DLT_SNIPPET_START normalize_workers_toml - [extract.data_writer] +[extract.data_writer] # force extract file rotation if size exceeds 1MiB file_max_bytes=1000000 From bbd7fe63ceecc58c0d125098768adedee2997ca5 Mon Sep 17 00:00:00 2001 From: Marcin Rudolf Date: Sat, 15 Jun 2024 22:43:36 +0200 Subject: [PATCH 064/105] sentry_dsn --- dlt/pipeline/pipeline.py | 65 ++++++++++++++++++++++++++++------------ 1 file changed, 46 insertions(+), 19 deletions(-) diff --git a/dlt/pipeline/pipeline.py b/dlt/pipeline/pipeline.py index 48f37f1be3..72db857aa8 100644 --- a/dlt/pipeline/pipeline.py +++ b/dlt/pipeline/pipeline.py @@ -1,6 +1,5 @@ import contextlib import os -import datetime # noqa: 251 from contextlib import contextmanager from functools import wraps from typing import ( @@ -38,7 +37,6 @@ DestinationUndefinedEntity, ) from dlt.common.exceptions import MissingDependencyException -from dlt.common.normalizers.utils import explicit_normalizers, import_normalizers from dlt.common.runtime import signals, initialize_runtime from dlt.common.schema.typing import ( TColumnNames, @@ -84,12 +82,12 @@ DestinationClientStagingConfiguration, DestinationClientDwhWithStagingConfiguration, ) +from dlt.common.normalizers.naming import NamingConvention from dlt.common.pipeline import ( ExtractInfo, LoadInfo, NormalizeInfo, PipelineContext, - StepInfo, TStepInfo, SupportsPipeline, TPipelineLocalState, @@ -104,7 +102,7 @@ from dlt.common.warnings import deprecated, Dlt04DeprecationWarning from dlt.common.versioned_state import json_encode_state, json_decode_state -from dlt.extract import DltSource, DltResource +from dlt.extract import DltSource from dlt.extract.exceptions import SourceExhausted from dlt.extract.extract import Extract, data_to_sources from dlt.normalize import Normalize @@ -125,7 +123,6 @@ PipelineStepFailed, SqlClientNotAvailable, FSClientNotAvailable, - PipelineNeverRan, ) from dlt.pipeline.trace import ( PipelineTrace, @@ -360,14 +357,14 @@ def __init__( self._init_working_dir(pipeline_name, pipelines_dir) with self.managed_state() as state: + self.credentials = credentials + self._configure(import_schema_path, export_schema_path, must_attach_to_local_pipeline) # changing the destination could be dangerous if pipeline has pending load packages - self._set_destinations(destination=destination, staging=staging) + self._set_destinations(destination=destination, staging=staging, initializing=True) # set the pipeline properties from state, destination and staging will not be set self._state_to_props(state) # we overwrite the state with the values from init self._set_dataset_name(dataset_name) - self.credentials = credentials - self._configure(import_schema_path, export_schema_path, must_attach_to_local_pipeline) def drop(self, pipeline_name: str = None) -> "Pipeline": """Deletes local pipeline state, schemas and any working files. @@ -866,6 +863,11 @@ def state(self) -> TPipelineState: """Returns a dictionary with the pipeline state""" return self._get_state() + @property + def naming(self) -> NamingConvention: + """Returns naming convention of the default schema""" + return self._get_schema_or_create().naming + @property def last_trace(self) -> PipelineTrace: """Returns or loads last trace generated by pipeline. The trace is loaded from standard location.""" @@ -1248,10 +1250,28 @@ def _get_destination_capabilities(self) -> DestinationCapabilitiesContext: "Please provide `destination` argument to `pipeline`, `run` or `load` method" " directly or via .dlt config.toml file or environment variable.", ) - return self.destination.capabilities() + # check if default schema is present + if ( + self.default_schema_name is not None + and self.default_schema_name in self._schema_storage + ): + naming = self.default_schema.naming + else: + naming = None + return self.destination.capabilities(naming=naming) def _get_staging_capabilities(self) -> Optional[DestinationCapabilitiesContext]: - return self.staging.capabilities() if self.staging is not None else None + if self.staging is None: + return None + # check if default schema is present + if ( + self.default_schema_name is not None + and self.default_schema_name in self._schema_storage + ): + naming = self.default_schema.naming + else: + naming = None + return self.staging.capabilities(naming=naming) def _validate_pipeline_name(self) -> None: try: @@ -1287,9 +1307,11 @@ def _set_destinations( destination_name: Optional[str] = None, staging: Optional[TDestinationReferenceArg] = None, staging_name: Optional[str] = None, + initializing: bool = False, ) -> None: - # destination_mod = DestinationReference.from_name(destination) - if destination: + destination_changed = destination is not None and destination != self.destination + # set destination if provided but do not swap if factory is the same + if destination_changed: self.destination = Destination.from_reference( destination, destination_name=destination_name ) @@ -1308,7 +1330,8 @@ def _set_destinations( staging = "filesystem" staging_name = "filesystem" - if staging: + staging_changed = staging is not None and staging != self.staging + if staging_changed: staging_module = Destination.from_reference(staging, destination_name=staging_name) if staging_module and not issubclass( staging_module.spec, DestinationClientStagingConfiguration @@ -1316,9 +1339,16 @@ def _set_destinations( raise DestinationNoStagingMode(staging_module.destination_name) self.staging = staging_module - with self._maybe_destination_capabilities(): - # default normalizers must match the destination - self._set_default_normalizers() + if staging_changed or destination_changed: + # make sure that capabilities can be generated + with self._maybe_destination_capabilities(): + # update normalizers in all live schemas, only when destination changed + if destination_changed and not initializing: + for schema in self._schema_storage.live_schemas.values(): + schema.update_normalizers() + # set new context + if not initializing: + self._set_context(is_active=True) @contextmanager def _maybe_destination_capabilities( @@ -1346,9 +1376,6 @@ def _maybe_destination_capabilities( if injected_caps: injected_caps.__exit__(None, None, None) - def _set_default_normalizers(self) -> None: - _, self._default_naming, _ = import_normalizers(explicit_normalizers()) - def _set_dataset_name(self, new_dataset_name: str) -> None: if not new_dataset_name and not self.dataset_name: # dataset name is required but not provided - generate the default now From a6715081ab8c2d63446cc621a332da070b8856d5 Mon Sep 17 00:00:00 2001 From: Marcin Rudolf Date: Sat, 15 Jun 2024 22:45:41 +0200 Subject: [PATCH 065/105] adds basic destination reference tests --- tests/common/test_destination.py | 142 ++++++++++++++++++++++++++++--- 1 file changed, 131 insertions(+), 11 deletions(-) diff --git a/tests/common/test_destination.py b/tests/common/test_destination.py index 24b0928463..6ac9b8205e 100644 --- a/tests/common/test_destination.py +++ b/tests/common/test_destination.py @@ -1,10 +1,13 @@ +from typing import Dict import pytest from dlt.common.destination.reference import DestinationClientDwhConfiguration, Destination from dlt.common.destination import DestinationCapabilitiesContext from dlt.common.destination.exceptions import InvalidDestinationReference, UnknownDestinationModule from dlt.common.schema import Schema +from dlt.common.typing import is_subclass +from tests.common.configuration.utils import environment from tests.utils import ACTIVE_DESTINATIONS @@ -32,6 +35,96 @@ def test_custom_destination_module() -> None: ) # a full type name +def test_arguments_propagated_to_config() -> None: + dest = Destination.from_reference( + "dlt.destinations.duckdb", create_indexes=None, unknown_param="A" + ) + # None for create_indexes is not a default and it is passed on, unknown_param is removed because it is unknown + assert dest.config_params == {"create_indexes": None} + assert dest.caps_params == {} + + # test explicit config value being passed + import dlt + + dest = Destination.from_reference( + "dlt.destinations.duckdb", create_indexes=dlt.config.value, unknown_param="A" + ) + assert dest.config_params == {"create_indexes": dlt.config.value} + assert dest.caps_params == {} + + dest = Destination.from_reference( + "dlt.destinations.weaviate", naming_convention="duck_case", create_indexes=True + ) + # create indexes are not known + assert dest.config_params == {} + + # create explicit caps + dest = Destination.from_reference( + "dlt.destinations.dummy", + naming_convention="duck_case", + recommended_file_size=4000000, + loader_file_format="parquet", + ) + from dlt.destinations.impl.dummy.configuration import DummyClientConfiguration + + assert dest.config_params == {"loader_file_format": "parquet"} + # loader_file_format is a legacy param that is duplicated as preferred_loader_file_format + assert dest.caps_params == { + "naming_convention": "duck_case", + "recommended_file_size": 4000000, + } + # instantiate configs + caps = dest.capabilities() + assert caps.naming_convention == "duck_case" + assert caps.preferred_loader_file_format == "parquet" + assert caps.recommended_file_size == 4000000 + init_config = DummyClientConfiguration() + config = dest.configuration(init_config) + assert config.loader_file_format == "parquet" # type: ignore[attr-defined] + + +def test_factory_config_injection(environment: Dict[str, str]) -> None: + environment["DESTINATION__LOADER_FILE_FORMAT"] = "parquet" + from dlt.destinations import dummy + + # caps will resolve from config without client + assert dummy().capabilities().preferred_loader_file_format == "parquet" + + caps = dummy().client(Schema("client")).capabilities + assert caps.preferred_loader_file_format == "parquet" + + environment.clear() + caps = dummy().client(Schema("client")).capabilities + assert caps.preferred_loader_file_format == "jsonl" + + environment["DESTINATION__DUMMY__LOADER_FILE_FORMAT"] = "parquet" + environment["DESTINATION__DUMMY__FAIL_PROB"] = "0.435" + + # config will partially resolve without client + config = dummy().configuration(None, accept_partial=True) + assert config.fail_prob == 0.435 + assert config.loader_file_format == "parquet" + + dummy_ = dummy().client(Schema("client")) + assert dummy_.capabilities.preferred_loader_file_format == "parquet" + assert dummy_.config.fail_prob == 0.435 + + # test named destination + environment.clear() + import os + from dlt.destinations import filesystem + from dlt.destinations.impl.filesystem.configuration import ( + FilesystemDestinationClientConfiguration, + ) + + filesystem_ = filesystem(destination_name="local") + abs_path = os.path.abspath("_storage") + environment["DESTINATION__LOCAL__BUCKET_URL"] = abs_path + init_config = FilesystemDestinationClientConfiguration()._bind_dataset_name(dataset_name="test") + configured_bucket_url = filesystem_.client(Schema("test"), init_config).config.bucket_url + assert configured_bucket_url.endswith("_storage") + + def test_import_module_by_path() -> None: # importing works directly from dlt destinations dest = Destination.from_reference("dlt.destinations.postgres") @@ -54,17 +147,7 @@ def test_import_module_by_path() -> None: def test_import_all_destinations() -> None: # this must pass without the client dependencies being imported for dest_type in ACTIVE_DESTINATIONS: - # generic destination needs a valid callable, otherwise instantiation will fail - additional_args = {} - if dest_type == "destination": - - def dest_callable(items, table) -> None: - pass - - additional_args["destination_callable"] = dest_callable - dest = Destination.from_reference( - dest_type, None, dest_type + "_name", "production", **additional_args - ) + dest = Destination.from_reference(dest_type, None, dest_type + "_name", "production") assert dest.destination_type == "dlt.destinations." + dest_type assert dest.destination_name == dest_type + "_name" assert dest.config_params["environment"] == "production" @@ -73,6 +156,42 @@ def dest_callable(items, table) -> None: assert isinstance(dest.capabilities(), DestinationCapabilitiesContext) +def test_instantiate_all_factories() -> None: + from dlt import destinations + + impls = dir(destinations) + for impl in impls: + var_ = getattr(destinations, impl) + if not is_subclass(var_, Destination): + continue + dest = var_() + + assert dest.destination_name + assert dest.destination_type + # custom destination is named after the callable + if dest.destination_type != "dlt.destinations.destination": + assert dest.destination_type.endswith(dest.destination_name) + else: + assert dest.destination_name == "dummy_custom_destination" + assert dest.spec + assert dest.spec() + assert dest.capabilities() + # partial configuration may always be created + assert dest.configuration(None, accept_partial=True) + + mod_dest = var_( + destination_name="fake_name", environment="prod", naming_convention="duck_case" + ) + assert ( + mod_dest.config_params.items() + >= {"destination_name": "fake_name", "environment": "prod"}.items() + ) + assert mod_dest.caps_params == {"naming_convention": "duck_case"} + assert mod_dest.destination_name == "fake_name" + caps = mod_dest.capabilities() + assert caps.naming_convention == "duck_case" + + def test_import_destination_config() -> None: # importing destination by type will work dest = Destination.from_reference(ref="dlt.destinations.duckdb", environment="stage") @@ -97,6 +216,7 @@ def test_import_destination_config() -> None: ref="duckdb", destination_name="my_destination", environment="devel" ) assert dest.destination_type == "dlt.destinations.duckdb" + assert dest.destination_name == "my_destination" assert dest.config_params["environment"] == "devel" config = dest.configuration(dest.spec()._bind_dataset_name(dataset_name="dataset")) # type: ignore assert config.destination_type == "duckdb" From 81e0db992804311b971c0894c280203f8e6ec4d5 Mon Sep 17 00:00:00 2001 From: Marcin Rudolf Date: Sat, 15 Jun 2024 22:45:51 +0200 Subject: [PATCH 066/105] fixes table builder tests --- dlt/extract/resource.py | 7 +++- dlt/load/load.py | 5 +-- tests/common/cases/destinations/null.py | 2 +- tests/common/configuration/test_inject.py | 14 ++++++- .../common/data_writers/test_data_writers.py | 6 +-- .../normalizers/test_import_normalizers.py | 6 +-- tests/destinations/test_custom_destination.py | 39 ++++++++++++++++--- .../athena_iceberg/test_athena_adapter.py | 2 +- .../test_bigquery_streaming_insert.py | 2 +- .../bigquery/test_bigquery_table_builder.py | 7 ++-- .../test_clickhouse_table_builder.py | 5 +-- tests/load/dremio/test_dremio_client.py | 5 +-- .../load/duckdb/test_duckdb_table_builder.py | 8 ++-- tests/load/mssql/test_mssql_table_builder.py | 5 +-- tests/load/pipeline/test_athena.py | 2 +- .../postgres/test_postgres_table_builder.py | 16 +++----- tests/load/qdrant/test_pipeline.py | 1 + .../redshift/test_redshift_table_builder.py | 5 +-- .../snowflake/test_snowflake_table_builder.py | 5 +-- .../synapse/test_synapse_table_builder.py | 8 ++-- .../synapse/test_synapse_table_indexing.py | 5 +-- tests/normalize/utils.py | 15 +++---- tests/pipeline/test_dlt_versions.py | 10 ++--- tests/pipeline/test_pipeline_extra.py | 13 ++++--- 24 files changed, 108 insertions(+), 85 deletions(-) diff --git a/dlt/extract/resource.py b/dlt/extract/resource.py index eecb570375..93eb9d1189 100644 --- a/dlt/extract/resource.py +++ b/dlt/extract/resource.py @@ -1,4 +1,3 @@ -from copy import deepcopy import inspect from functools import partial from typing import ( @@ -14,6 +13,7 @@ ) from typing_extensions import TypeVar, Self +from dlt.common import logger from dlt.common.configuration.inject import get_fun_spec, with_config from dlt.common.configuration.resolve import inject_section from dlt.common.configuration.specs import known_sections @@ -394,6 +394,11 @@ def _gen_wrap(gen: TPipeStep) -> TPipeStep: else: # keep function as function to not evaluate generators before pipe starts self._pipe.replace_gen(partial(_gen_wrap, gen)) + else: + logger.warning( + f"Setting add_limit to a transformer {self.name} has no effect. Set the limit on" + " the top level resource." + ) return self def parallelize(self: TDltResourceImpl) -> TDltResourceImpl: diff --git a/dlt/load/load.py b/dlt/load/load.py index 8c7eb431e8..5d049a45d7 100644 --- a/dlt/load/load.py +++ b/dlt/load/load.py @@ -75,7 +75,6 @@ def __init__( self.initial_client_config = initial_client_config self.initial_staging_client_config = initial_staging_client_config self.destination = destination - self.capabilities = destination.capabilities() self.staging_destination = staging_destination self.pool = NullExecutor() self.load_storage: LoadStorage = self.create_storage(is_storage_owner) @@ -83,7 +82,7 @@ def __init__( super().__init__() def create_storage(self, is_storage_owner: bool) -> LoadStorage: - supported_file_formats = self.capabilities.supported_loader_file_formats + supported_file_formats = self.destination.capabilities().supported_loader_file_formats if self.staging_destination: supported_file_formats = ( self.staging_destination.capabilities().supported_loader_file_formats @@ -145,7 +144,7 @@ def w_spool_job( if job_info.file_format not in self.load_storage.supported_job_file_formats: raise LoadClientUnsupportedFileFormats( job_info.file_format, - self.capabilities.supported_loader_file_formats, + self.destination.capabilities().supported_loader_file_formats, file_path, ) logger.info(f"Will load file {file_path} with table name {job_info.table_name}") diff --git a/tests/common/cases/destinations/null.py b/tests/common/cases/destinations/null.py index b2054cd7e8..37e87d89cf 100644 --- a/tests/common/cases/destinations/null.py +++ b/tests/common/cases/destinations/null.py @@ -14,7 +14,7 @@ def __init__(self, **kwargs: Any) -> None: spec = DestinationClientConfiguration - def capabilities(self) -> DestinationCapabilitiesContext: + def _raw_capabilities(self) -> DestinationCapabilitiesContext: return DestinationCapabilitiesContext.generic_capabilities() @property diff --git a/tests/common/configuration/test_inject.py b/tests/common/configuration/test_inject.py index f0494e9898..13d68b53e9 100644 --- a/tests/common/configuration/test_inject.py +++ b/tests/common/configuration/test_inject.py @@ -570,7 +570,19 @@ def get_cf(aux: str = dlt.config.value, last_config: AuxTest = None): def test_inject_spec_into_argument_with_spec_type() -> None: # if signature contains argument with type of SPEC, it gets injected there - from dlt.destinations.impl.dummy import _configure, DummyClientConfiguration + import dlt + from dlt.common.configuration import known_sections + from dlt.destinations.impl.dummy.configuration import DummyClientConfiguration + + @with_config( + spec=DummyClientConfiguration, + sections=( + known_sections.DESTINATION, + "dummy", + ), + ) + def _configure(config: DummyClientConfiguration = dlt.config.value) -> DummyClientConfiguration: + return config # _configure has argument of type DummyClientConfiguration that it returns # this type holds resolved configuration diff --git a/tests/common/data_writers/test_data_writers.py b/tests/common/data_writers/test_data_writers.py index 978fcc283b..cd5ff2eaf8 100644 --- a/tests/common/data_writers/test_data_writers.py +++ b/tests/common/data_writers/test_data_writers.py @@ -7,8 +7,6 @@ from dlt.common.data_writers.exceptions import DataWriterNotFound, SpecLookupFailed from dlt.common.typing import AnyFun -# from dlt.destinations.postgres import capabilities -from dlt.destinations.impl.redshift import capabilities as redshift_caps from dlt.common.data_writers.escape import ( escape_redshift_identifier, escape_hive_identifier, @@ -51,8 +49,10 @@ class _BytesIOWriter(DataWriter): @pytest.fixture def insert_writer() -> Iterator[DataWriter]: + from dlt.destinations import redshift + with io.StringIO() as f: - yield InsertValuesWriter(f, caps=redshift_caps()) + yield InsertValuesWriter(f, caps=redshift().capabilities()) @pytest.fixture diff --git a/tests/common/normalizers/test_import_normalizers.py b/tests/common/normalizers/test_import_normalizers.py index 065aa4dd3e..13bd1fcd3c 100644 --- a/tests/common/normalizers/test_import_normalizers.py +++ b/tests/common/normalizers/test_import_normalizers.py @@ -82,6 +82,6 @@ def test_import_invalid_naming_module() -> None: import_normalizers(explicit_normalizers("dlt.common.tests")) assert py_ex.value.naming_module == "dlt.common.tests" with pytest.raises(InvalidNamingModule) as py_ex2: - import_normalizers(explicit_normalizers("dlt.pipeline")) - assert py_ex2.value.naming_module == "dlt" - assert py_ex2.value.naming_class == "pipeline" + import_normalizers(explicit_normalizers("dlt.pipeline.helpers")) + assert py_ex2.value.naming_module == "dlt.pipeline" + assert py_ex2.value.naming_class == "helpers" diff --git a/tests/destinations/test_custom_destination.py b/tests/destinations/test_custom_destination.py index 6834006689..1d1bb3d96c 100644 --- a/tests/destinations/test_custom_destination.py +++ b/tests/destinations/test_custom_destination.py @@ -8,12 +8,13 @@ from copy import deepcopy from dlt.common.configuration.specs.base_configuration import configspec +from dlt.common.schema.schema import Schema from dlt.common.typing import TDataItems from dlt.common.schema import TTableSchema from dlt.common.data_writers.writers import TLoaderFileFormat from dlt.common.destination.reference import Destination from dlt.common.destination.exceptions import InvalidDestinationReference -from dlt.common.configuration.exceptions import ConfigFieldMissingException +from dlt.common.configuration.exceptions import ConfigFieldMissingException, ConfigurationValueError from dlt.common.configuration.specs import ConnectionStringCredentials from dlt.common.configuration.inject import get_fun_spec from dlt.common.configuration.specs import BaseConfiguration @@ -38,7 +39,7 @@ def _run_through_sink( batch_size: int = 10, ) -> List[Tuple[TDataItems, TTableSchema]]: """ - runs a list of items through the sink destination and returns colleceted calls + runs a list of items through the sink destination and returns collected calls """ calls: List[Tuple[TDataItems, TTableSchema]] = [] @@ -126,6 +127,34 @@ def global_sink_func(items: TDataItems, table: TTableSchema) -> None: global_calls.append((items, table)) +def test_capabilities() -> None: + # test default caps + dest = dlt.destination()(global_sink_func)() + caps = dest.capabilities() + assert caps.preferred_loader_file_format == "typed-jsonl" + assert caps.supported_loader_file_formats == ["typed-jsonl", "parquet"] + assert caps.naming_convention == "direct" + assert caps.max_table_nesting == 0 + client_caps = dest.client(Schema("schema")).capabilities + assert dict(caps) == dict(client_caps) + + # test modified caps + dest = dlt.destination( + loader_file_format="parquet", + batch_size=0, + name="my_name", + naming_convention="snake_case", + max_table_nesting=10, + )(global_sink_func)() + caps = dest.capabilities() + assert caps.preferred_loader_file_format == "parquet" + assert caps.supported_loader_file_formats == ["typed-jsonl", "parquet"] + assert caps.naming_convention == "snake_case" + assert caps.max_table_nesting == 10 + client_caps = dest.client(Schema("schema")).capabilities + assert dict(caps) == dict(client_caps) + + def test_instantiation() -> None: # also tests _DESTINATIONS calls: List[Tuple[TDataItems, TTableSchema]] = [] @@ -144,7 +173,7 @@ def local_sink_func(items: TDataItems, table: TTableSchema, my_val=dlt.config.va p.run([1, 2, 3], table_name="items") assert len(calls) == 1 # local func does not create entry in destinations - assert not _DESTINATIONS + assert "local_sink_func" not in _DESTINATIONS # test passing via from_reference calls = [] @@ -156,7 +185,7 @@ def local_sink_func(items: TDataItems, table: TTableSchema, my_val=dlt.config.va p.run([1, 2, 3], table_name="items") assert len(calls) == 1 # local func does not create entry in destinations - assert not _DESTINATIONS + assert "local_sink_func" not in _DESTINATIONS # test passing string reference global global_calls @@ -184,7 +213,7 @@ def local_sink_func(items: TDataItems, table: TTableSchema, my_val=dlt.config.va destination=Destination.from_reference("destination", destination_callable=None), full_refresh=True, ) - with pytest.raises(PipelineStepFailed): + with pytest.raises(ConfigurationValueError): p.run([1, 2, 3], table_name="items") # pass invalid string reference will fail on instantiation diff --git a/tests/load/athena_iceberg/test_athena_adapter.py b/tests/load/athena_iceberg/test_athena_adapter.py index 3144eb9cc9..d14c0bdd34 100644 --- a/tests/load/athena_iceberg/test_athena_adapter.py +++ b/tests/load/athena_iceberg/test_athena_adapter.py @@ -2,7 +2,7 @@ import dlt from dlt.destinations import filesystem -from dlt.destinations.impl.athena.athena_adapter import athena_adapter, athena_partition +from dlt.destinations.adapters import athena_adapter, athena_partition # mark all tests as essential, do not remove pytestmark = pytest.mark.essential diff --git a/tests/load/bigquery/test_bigquery_streaming_insert.py b/tests/load/bigquery/test_bigquery_streaming_insert.py index c80f6ed65a..391bf4095e 100644 --- a/tests/load/bigquery/test_bigquery_streaming_insert.py +++ b/tests/load/bigquery/test_bigquery_streaming_insert.py @@ -1,7 +1,7 @@ import pytest import dlt -from dlt.destinations.impl.bigquery.bigquery_adapter import bigquery_adapter +from dlt.destinations.adapters import bigquery_adapter from tests.pipeline.utils import assert_load_info diff --git a/tests/load/bigquery/test_bigquery_table_builder.py b/tests/load/bigquery/test_bigquery_table_builder.py index a38b1746f4..e6fa936da8 100644 --- a/tests/load/bigquery/test_bigquery_table_builder.py +++ b/tests/load/bigquery/test_bigquery_table_builder.py @@ -23,9 +23,9 @@ from dlt.common.utils import uniq_id from dlt.destinations.exceptions import DestinationSchemaWillNotUpdate -from dlt.destinations.impl.bigquery import capabilities +from dlt.destinations import bigquery from dlt.destinations.impl.bigquery.bigquery import BigQueryClient -from dlt.destinations.impl.bigquery.bigquery_adapter import bigquery_adapter +from dlt.destinations.adapters import bigquery_adapter from dlt.destinations.impl.bigquery.configuration import BigQueryClientConfiguration from dlt.extract import DltResource @@ -62,12 +62,11 @@ def gcp_client(empty_schema: Schema) -> BigQueryClient: creds = GcpServiceAccountCredentials() creds.project_id = "test_project_id" # noinspection PydanticTypeChecker - return BigQueryClient( + return bigquery().client( empty_schema, BigQueryClientConfiguration(credentials=creds)._bind_dataset_name( dataset_name=f"test_{uniq_id()}" ), - capabilities(), ) diff --git a/tests/load/clickhouse/test_clickhouse_table_builder.py b/tests/load/clickhouse/test_clickhouse_table_builder.py index 653ca33c38..867102dde9 100644 --- a/tests/load/clickhouse/test_clickhouse_table_builder.py +++ b/tests/load/clickhouse/test_clickhouse_table_builder.py @@ -7,7 +7,7 @@ from dlt.common.utils import custom_environ, digest128 from dlt.common.utils import uniq_id -from dlt.destinations.impl.clickhouse import capabilities +from dlt.destinations import clickhouse from dlt.destinations.impl.clickhouse.clickhouse import ClickHouseClient from dlt.destinations.impl.clickhouse.configuration import ( ClickHouseCredentials, @@ -20,10 +20,9 @@ def clickhouse_client(empty_schema: Schema) -> ClickHouseClient: # Return a client without opening connection. creds = ClickHouseCredentials() - return ClickHouseClient( + return clickhouse().client( empty_schema, ClickHouseClientConfiguration(credentials=creds)._bind_dataset_name(f"test_{uniq_id()}"), - capabilities(), ) diff --git a/tests/load/dremio/test_dremio_client.py b/tests/load/dremio/test_dremio_client.py index a690472f97..ebcf2c5111 100644 --- a/tests/load/dremio/test_dremio_client.py +++ b/tests/load/dremio/test_dremio_client.py @@ -2,7 +2,7 @@ from dlt.common.schema import TColumnSchema, Schema -from dlt.destinations.impl.dremio import capabilities +from dlt.destinations import dremio from dlt.destinations.impl.dremio.configuration import DremioClientConfiguration, DremioCredentials from dlt.destinations.impl.dremio.dremio import DremioClient from tests.load.utils import empty_schema @@ -12,12 +12,11 @@ def dremio_client(empty_schema: Schema) -> DremioClient: creds = DremioCredentials() creds.database = "test_database" - return DremioClient( + return dremio().client( empty_schema, DremioClientConfiguration(credentials=creds)._bind_dataset_name( dataset_name="test_dataset" ), - capabilities(), ) diff --git a/tests/load/duckdb/test_duckdb_table_builder.py b/tests/load/duckdb/test_duckdb_table_builder.py index b492fa1747..85f86ce84d 100644 --- a/tests/load/duckdb/test_duckdb_table_builder.py +++ b/tests/load/duckdb/test_duckdb_table_builder.py @@ -5,7 +5,7 @@ from dlt.common.utils import uniq_id from dlt.common.schema import Schema -from dlt.destinations.impl.duckdb import capabilities +from dlt.destinations import duckdb from dlt.destinations.impl.duckdb.duck import DuckDbClient from dlt.destinations.impl.duckdb.configuration import DuckDbClientConfiguration @@ -23,10 +23,9 @@ @pytest.fixture def client(empty_schema: Schema) -> DuckDbClient: # return client without opening connection - return DuckDbClient( + return duckdb().client( empty_schema, DuckDbClientConfiguration()._bind_dataset_name(dataset_name="test_" + uniq_id()), - capabilities(), ) @@ -119,12 +118,11 @@ def test_create_table_with_hints(client: DuckDbClient) -> None: assert '"col4" TIMESTAMP WITH TIME ZONE NOT NULL' in sql # same thing with indexes - client = DuckDbClient( + client = duckdb().client( client.schema, DuckDbClientConfiguration(create_indexes=True)._bind_dataset_name( dataset_name="test_" + uniq_id() ), - capabilities(), ) sql = client._get_table_update_sql("event_test_table", mod_update, False)[0] sqlfluff.parse(sql) diff --git a/tests/load/mssql/test_mssql_table_builder.py b/tests/load/mssql/test_mssql_table_builder.py index 79126572fb..d6cf3ec3e8 100644 --- a/tests/load/mssql/test_mssql_table_builder.py +++ b/tests/load/mssql/test_mssql_table_builder.py @@ -6,7 +6,7 @@ pytest.importorskip("dlt.destinations.impl.mssql.mssql", reason="MSSQL ODBC driver not installed") -from dlt.destinations.impl.mssql import capabilities +from dlt.destinations import mssql from dlt.destinations.impl.mssql.mssql import MsSqlJobClient from dlt.destinations.impl.mssql.configuration import MsSqlClientConfiguration, MsSqlCredentials @@ -19,12 +19,11 @@ @pytest.fixture def client(empty_schema: Schema) -> MsSqlJobClient: # return client without opening connection - return MsSqlJobClient( + return mssql().client( empty_schema, MsSqlClientConfiguration(credentials=MsSqlCredentials())._bind_dataset_name( dataset_name="test_" + uniq_id() ), - capabilities(), ) diff --git a/tests/load/pipeline/test_athena.py b/tests/load/pipeline/test_athena.py index 272cc701d5..4234b55fc1 100644 --- a/tests/load/pipeline/test_athena.py +++ b/tests/load/pipeline/test_athena.py @@ -9,7 +9,7 @@ from tests.pipeline.utils import assert_load_info, load_table_counts from tests.pipeline.utils import load_table_counts from dlt.destinations.exceptions import CantExtractTablePrefix -from dlt.destinations.impl.athena.athena_adapter import athena_partition, athena_adapter +from dlt.destinations.adapters import athena_partition, athena_adapter from dlt.destinations.fs_client import FSClientBase from tests.load.pipeline.utils import destinations_configs, DestinationTestConfiguration diff --git a/tests/load/postgres/test_postgres_table_builder.py b/tests/load/postgres/test_postgres_table_builder.py index af8f96a907..5ba68be67c 100644 --- a/tests/load/postgres/test_postgres_table_builder.py +++ b/tests/load/postgres/test_postgres_table_builder.py @@ -5,9 +5,8 @@ from dlt.common.exceptions import TerminalValueError from dlt.common.utils import uniq_id from dlt.common.schema import Schema, utils -from dlt.common.destination import Destination -from dlt.destinations.impl.postgres import capabilities +from dlt.destinations import postgres from dlt.destinations.impl.postgres.postgres import PostgresClient from dlt.destinations.impl.postgres.configuration import ( PostgresClientConfiguration, @@ -43,11 +42,7 @@ def create_client(empty_schema: Schema) -> PostgresClient: config = PostgresClientConfiguration(credentials=PostgresCredentials())._bind_dataset_name( dataset_name="test_" + uniq_id() ) - return PostgresClient( - empty_schema, - config, - Destination.adjust_capabilities(capabilities(), config, empty_schema.naming), - ) + return postgres().client(empty_schema, config) def test_create_table(client: PostgresClient) -> None: @@ -118,7 +113,7 @@ def test_alter_table(client: PostgresClient) -> None: assert '"col11_precision" time (3) without time zone NOT NULL' in sql -def test_create_table_with_hints(client: PostgresClient) -> None: +def test_create_table_with_hints(client: PostgresClient, empty_schema: Schema) -> None: mod_update = deepcopy(TABLE_UPDATE) # timestamp mod_update[0]["primary_key"] = True @@ -135,13 +130,12 @@ def test_create_table_with_hints(client: PostgresClient) -> None: assert '"col4" timestamp with time zone NOT NULL' in sql # same thing without indexes - client = PostgresClient( - client.schema, + client = postgres().client( + empty_schema, PostgresClientConfiguration( create_indexes=False, credentials=PostgresCredentials(), )._bind_dataset_name(dataset_name="test_" + uniq_id()), - capabilities(), ) sql = client._get_table_update_sql("event_test_table", mod_update, False)[0] sqlfluff.parse(sql, dialect="postgres") diff --git a/tests/load/qdrant/test_pipeline.py b/tests/load/qdrant/test_pipeline.py index b8101053fc..e0bd1fff97 100644 --- a/tests/load/qdrant/test_pipeline.py +++ b/tests/load/qdrant/test_pipeline.py @@ -5,6 +5,7 @@ from dlt.common import json from dlt.common.utils import uniq_id +from dlt.destinations.adapters import qdrant_adapter from dlt.destinations.impl.qdrant.qdrant_adapter import qdrant_adapter, VECTORIZE_HINT from dlt.destinations.impl.qdrant.qdrant_client import QdrantClient from tests.pipeline.utils import assert_load_info diff --git a/tests/load/redshift/test_redshift_table_builder.py b/tests/load/redshift/test_redshift_table_builder.py index b7557f8bc9..de6f450134 100644 --- a/tests/load/redshift/test_redshift_table_builder.py +++ b/tests/load/redshift/test_redshift_table_builder.py @@ -6,7 +6,7 @@ from dlt.common.schema import Schema from dlt.common.configuration import resolve_configuration -from dlt.destinations.impl.redshift import capabilities +from dlt.destinations import redshift from dlt.destinations.impl.redshift.redshift import RedshiftClient from dlt.destinations.impl.redshift.configuration import ( RedshiftClientConfiguration, @@ -22,12 +22,11 @@ @pytest.fixture def client(empty_schema: Schema) -> RedshiftClient: # return client without opening connection - return RedshiftClient( + return redshift().client( empty_schema, RedshiftClientConfiguration(credentials=RedshiftCredentials())._bind_dataset_name( dataset_name="test_" + uniq_id() ), - capabilities(), ) diff --git a/tests/load/snowflake/test_snowflake_table_builder.py b/tests/load/snowflake/test_snowflake_table_builder.py index 194b6bb6fb..4bb69085da 100644 --- a/tests/load/snowflake/test_snowflake_table_builder.py +++ b/tests/load/snowflake/test_snowflake_table_builder.py @@ -5,7 +5,7 @@ from dlt.common.utils import uniq_id from dlt.common.schema import Schema -from dlt.destinations.impl.snowflake import capabilities +from dlt.destinations import snowflake from dlt.destinations.impl.snowflake.snowflake import SnowflakeClient from dlt.destinations.impl.snowflake.configuration import ( SnowflakeClientConfiguration, @@ -22,12 +22,11 @@ def snowflake_client(empty_schema: Schema) -> SnowflakeClient: # return client without opening connection creds = SnowflakeCredentials() - return SnowflakeClient( + return snowflake().client( empty_schema, SnowflakeClientConfiguration(credentials=creds)._bind_dataset_name( dataset_name="test_" + uniq_id() ), - capabilities(), ) diff --git a/tests/load/synapse/test_synapse_table_builder.py b/tests/load/synapse/test_synapse_table_builder.py index 6e3e69b3a3..1a92a20f1e 100644 --- a/tests/load/synapse/test_synapse_table_builder.py +++ b/tests/load/synapse/test_synapse_table_builder.py @@ -7,7 +7,7 @@ from dlt.common.utils import uniq_id from dlt.common.schema import Schema, TColumnHint -from dlt.destinations.impl.synapse import capabilities +from dlt.destinations import synapse from dlt.destinations.impl.synapse.synapse import ( SynapseClient, HINT_TO_SYNAPSE_ATTR, @@ -27,12 +27,11 @@ @pytest.fixture def client(empty_schema: Schema) -> SynapseClient: # return client without opening connection - client = SynapseClient( + client = synapse().client( empty_schema, SynapseClientConfiguration(credentials=SynapseCredentials())._bind_dataset_name( dataset_name="test_" + uniq_id() ), - capabilities(), ) assert client.config.create_indexes is False return client @@ -41,12 +40,11 @@ def client(empty_schema: Schema) -> SynapseClient: @pytest.fixture def client_with_indexes_enabled(empty_schema: Schema) -> SynapseClient: # return client without opening connection - client = SynapseClient( + client = synapse().client( empty_schema, SynapseClientConfiguration( credentials=SynapseCredentials(), create_indexes=True )._bind_dataset_name(dataset_name="test_" + uniq_id()), - capabilities(), ) assert client.config.create_indexes is True return client diff --git a/tests/load/synapse/test_synapse_table_indexing.py b/tests/load/synapse/test_synapse_table_indexing.py index a9d426ad4a..b3a077dd7f 100644 --- a/tests/load/synapse/test_synapse_table_indexing.py +++ b/tests/load/synapse/test_synapse_table_indexing.py @@ -1,14 +1,11 @@ import os import pytest from typing import Iterator, List, Any, Union -from textwrap import dedent import dlt from dlt.common.schema import TColumnSchema -from dlt.destinations.sql_client import SqlClientBase - -from dlt.destinations.impl.synapse import synapse_adapter +from dlt.destinations.adapters import synapse_adapter from dlt.destinations.impl.synapse.synapse_adapter import TTableIndexType from tests.load.utils import TABLE_UPDATE, TABLE_ROW_ALL_DATA_TYPES diff --git a/tests/normalize/utils.py b/tests/normalize/utils.py index 0ce099d4b6..dffb3f1bb6 100644 --- a/tests/normalize/utils.py +++ b/tests/normalize/utils.py @@ -1,15 +1,10 @@ -from typing import Mapping, cast +from dlt.destinations import duckdb, redshift, postgres, bigquery, filesystem -from dlt.destinations.impl.duckdb import capabilities as duck_insert_caps -from dlt.destinations.impl.redshift import capabilities as rd_insert_caps -from dlt.destinations.impl.postgres import capabilities as pg_insert_caps -from dlt.destinations.impl.bigquery import capabilities as jsonl_caps -from dlt.destinations.impl.filesystem import capabilities as filesystem_caps - -DEFAULT_CAPS = pg_insert_caps -INSERT_CAPS = [duck_insert_caps, rd_insert_caps, pg_insert_caps] -JSONL_CAPS = [jsonl_caps, filesystem_caps] +# callables to capabilities +DEFAULT_CAPS = postgres().capabilities +INSERT_CAPS = [duckdb().capabilities, redshift().capabilities, DEFAULT_CAPS] +JSONL_CAPS = [bigquery().capabilities, filesystem().capabilities] ALL_CAPABILITIES = INSERT_CAPS + JSONL_CAPS diff --git a/tests/pipeline/test_dlt_versions.py b/tests/pipeline/test_dlt_versions.py index 7628c6d358..e4eb9e23a4 100644 --- a/tests/pipeline/test_dlt_versions.py +++ b/tests/pipeline/test_dlt_versions.py @@ -20,13 +20,13 @@ TStoredSchema, ) from dlt.common.configuration.resolve import resolve_configuration -from dlt.destinations.impl.duckdb import capabilities +from dlt.destinations import duckdb from dlt.destinations.impl.duckdb.configuration import DuckDbClientConfiguration from dlt.destinations.impl.duckdb.sql_client import DuckDbSqlClient from tests.utils import TEST_STORAGE_ROOT, test_storage -if sys.version_info > (3, 11): +if sys.version_info >= (3, 12): pytest.skip("Does not run on Python 3.12 and later", allow_module_level=True) @@ -77,7 +77,7 @@ def test_pipeline_with_dlt_update(test_storage: FileStorage) -> None: sections=("destination", "duckdb"), ) with DuckDbSqlClient( - GITHUB_DATASET, duckdb_cfg.credentials, capabilities() + GITHUB_DATASET, duckdb_cfg.credentials, duckdb().capabilities() ) as client: rows = client.execute_sql(f"SELECT * FROM {LOADS_TABLE_NAME}") # make sure we have just 4 columns @@ -130,7 +130,7 @@ def test_pipeline_with_dlt_update(test_storage: FileStorage) -> None: assert "_version_hash" in state_dict with DuckDbSqlClient( - GITHUB_DATASET, duckdb_cfg.credentials, capabilities() + GITHUB_DATASET, duckdb_cfg.credentials, duckdb().capabilities() ) as client: rows = client.execute_sql( f"SELECT * FROM {LOADS_TABLE_NAME} ORDER BY inserted_at" @@ -212,7 +212,7 @@ def test_load_package_with_dlt_update(test_storage: FileStorage) -> None: sections=("destination", "duckdb"), ) with DuckDbSqlClient( - GITHUB_DATASET, duckdb_cfg.credentials, capabilities() + GITHUB_DATASET, duckdb_cfg.credentials, duckdb().capabilities() ) as client: rows = client.execute_sql("SELECT * FROM issues") assert len(rows) == 70 diff --git a/tests/pipeline/test_pipeline_extra.py b/tests/pipeline/test_pipeline_extra.py index 7208216c9f..2a39865b8a 100644 --- a/tests/pipeline/test_pipeline_extra.py +++ b/tests/pipeline/test_pipeline_extra.py @@ -40,7 +40,11 @@ class BaseModel: # type: ignore[no-redef] @pytest.mark.parametrize( - "destination_config", destinations_configs(default_sql_configs=True), ids=lambda x: x.name + "destination_config", + destinations_configs( + default_sql_configs=True, default_vector_configs=True, local_filesystem_configs=True + ), + ids=lambda x: x.name, ) def test_create_pipeline_all_destinations(destination_config: DestinationTestConfiguration) -> None: # create pipelines, extract and normalize. that should be possible without installing any dependencies @@ -51,11 +55,8 @@ def test_create_pipeline_all_destinations(destination_config: DestinationTestCon ) # are capabilities injected caps = p._container[DestinationCapabilitiesContext] - print(caps.naming_convention) - # are right naming conventions created - assert p._default_naming.max_length == min( - caps.max_column_identifier_length, caps.max_identifier_length - ) + assert p.naming.name() == caps.naming_convention + p.extract([1, "2", 3], table_name="data") # is default schema with right naming convention assert p.default_schema.naming.max_length == min( From 8a32793f584f94db4703687f4a480218a2bb39de Mon Sep 17 00:00:00 2001 From: Marcin Rudolf Date: Sat, 15 Jun 2024 23:05:25 +0200 Subject: [PATCH 067/105] fix deps and docs --- docs/website/docs/general-usage/schema.md | 6 +++--- tests/common/test_destination.py | 6 ++++-- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/docs/website/docs/general-usage/schema.md b/docs/website/docs/general-usage/schema.md index 0d41483ca1..7727d7b8a3 100644 --- a/docs/website/docs/general-usage/schema.md +++ b/docs/website/docs/general-usage/schema.md @@ -239,7 +239,7 @@ settings: ``` Alternatively you can add and remove detections from code: -```python +```py source = source() # remove iso time detector source.schema.remove_type_detection("iso_timestamp") @@ -280,7 +280,7 @@ settings: - re:_timestamp$ ``` Above we add `partition` hint to all columns ending with `_timestamp`. You can do same thing in the code -```python +```py source = source() # this will update existing hints with the hints passed source.schema.merge_hints({"partition": ["re:_timestamp$"]}) @@ -306,7 +306,7 @@ settings: Above we prefer `timestamp` data type for all columns containing **timestamp** substring and define a few exact matches ie. **created_at**. Here's same thing in code -```python +```py source = source() source.schema.update_preferred_types( { diff --git a/tests/common/test_destination.py b/tests/common/test_destination.py index 6ac9b8205e..2c690d94bb 100644 --- a/tests/common/test_destination.py +++ b/tests/common/test_destination.py @@ -175,9 +175,11 @@ def test_instantiate_all_factories() -> None: assert dest.destination_name == "dummy_custom_destination" assert dest.spec assert dest.spec() - assert dest.capabilities() # partial configuration may always be created - assert dest.configuration(None, accept_partial=True) + init_config = dest.spec.credentials_type()() + init_config.__is_resolved__ = True + assert dest.configuration(init_config, accept_partial=True) + assert dest.capabilities() mod_dest = var_( destination_name="fake_name", environment="prod", naming_convention="duck_case" From 0dc6dc838f6c114a3531cd50001cbf53ee388514 Mon Sep 17 00:00:00 2001 From: Marcin Rudolf Date: Sun, 16 Jun 2024 11:07:31 +0200 Subject: [PATCH 068/105] fixes more tests --- docs/website/docs/general-usage/schema.md | 6 +++--- tests/load/dremio/test_dremio_client.py | 8 ++++---- .../filesystem/test_object_store_rs_credentials.py | 10 +++++++--- tests/load/pipeline/test_merge_disposition.py | 8 +++++--- .../pipeline/test_write_disposition_changes.py | 8 ++++++-- tests/load/test_job_client.py | 10 ++++++---- tests/load/utils.py | 14 +++++++++++++- 7 files changed, 44 insertions(+), 20 deletions(-) diff --git a/docs/website/docs/general-usage/schema.md b/docs/website/docs/general-usage/schema.md index 7727d7b8a3..0e3e3bba1f 100644 --- a/docs/website/docs/general-usage/schema.md +++ b/docs/website/docs/general-usage/schema.md @@ -240,7 +240,7 @@ settings: Alternatively you can add and remove detections from code: ```py - source = source() + source = data_source() # remove iso time detector source.schema.remove_type_detection("iso_timestamp") # convert UNIX timestamp (float, withing a year from NOW) into timestamp @@ -281,7 +281,7 @@ settings: ``` Above we add `partition` hint to all columns ending with `_timestamp`. You can do same thing in the code ```py - source = source() + source = data_source() # this will update existing hints with the hints passed source.schema.merge_hints({"partition": ["re:_timestamp$"]}) ``` @@ -307,7 +307,7 @@ settings: Above we prefer `timestamp` data type for all columns containing **timestamp** substring and define a few exact matches ie. **created_at**. Here's same thing in code ```py - source = source() + source = data_source() source.schema.update_preferred_types( { "re:timestamp": "timestamp", diff --git a/tests/load/dremio/test_dremio_client.py b/tests/load/dremio/test_dremio_client.py index ebcf2c5111..efc72c0652 100644 --- a/tests/load/dremio/test_dremio_client.py +++ b/tests/load/dremio/test_dremio_client.py @@ -12,11 +12,11 @@ def dremio_client(empty_schema: Schema) -> DremioClient: creds = DremioCredentials() creds.database = "test_database" - return dremio().client( + # ignore any configured values + creds.resolve() + return dremio(credentials=creds).client( empty_schema, - DremioClientConfiguration(credentials=creds)._bind_dataset_name( - dataset_name="test_dataset" - ), + DremioClientConfiguration()._bind_dataset_name(dataset_name="test_dataset"), ) diff --git a/tests/load/filesystem/test_object_store_rs_credentials.py b/tests/load/filesystem/test_object_store_rs_credentials.py index 4e43b7c5d8..524cd4425d 100644 --- a/tests/load/filesystem/test_object_store_rs_credentials.py +++ b/tests/load/filesystem/test_object_store_rs_credentials.py @@ -29,9 +29,11 @@ FS_CREDS: Dict[str, Any] = dlt.secrets.get("destination.filesystem.credentials") -assert ( - FS_CREDS is not None -), "`destination.filesystem.credentials` must be configured for these tests." +if FS_CREDS is None: + pytest.skip( + msg="`destination.filesystem.credentials` must be configured for these tests.", + allow_module_level=True, + ) def can_connect(bucket_url: str, object_store_rs_credentials: Dict[str, str]) -> bool: @@ -86,6 +88,7 @@ def test_aws_object_store_rs_credentials() -> None: creds = AwsCredentials( aws_access_key_id=FS_CREDS["aws_access_key_id"], aws_secret_access_key=FS_CREDS["aws_secret_access_key"], + # region_name must be configured in order for data lake to work region_name=FS_CREDS["region_name"], ) assert creds.aws_session_token is None @@ -138,6 +141,7 @@ def test_gcp_object_store_rs_credentials() -> None: creds = GcpServiceAccountCredentialsWithoutDefaults( project_id=FS_CREDS["project_id"], private_key=FS_CREDS["private_key"], + # private_key_id must be configured in order for data lake to work private_key_id=FS_CREDS["private_key_id"], client_email=FS_CREDS["client_email"], ) diff --git a/tests/load/pipeline/test_merge_disposition.py b/tests/load/pipeline/test_merge_disposition.py index 9b2010d1fc..a68e81ca97 100644 --- a/tests/load/pipeline/test_merge_disposition.py +++ b/tests/load/pipeline/test_merge_disposition.py @@ -18,6 +18,7 @@ from dlt.sources.helpers.transform import skip_first, take_first from dlt.pipeline.exceptions import PipelineStepFailed +from tests.load.utils import normalize_storage_table_cols from tests.pipeline.utils import assert_load_info, load_table_counts, select_data from tests.load.pipeline.utils import destinations_configs, DestinationTestConfiguration @@ -307,9 +308,10 @@ def test_merge_keys_non_existing_columns(destination_config: DestinationTestConf github_2_counts = load_table_counts(p, *[t["name"] for t in p.default_schema.data_tables()]) assert github_2_counts["issues"] == 100 - 45 + 1 with p._sql_job_client(p.default_schema) as job_c: - _, table_schema = job_c.get_storage_table("issues") - assert "url" in table_schema - assert "m_a1" not in table_schema # unbound columns were not created + _, storage_cols = job_c.get_storage_table("issues") + storage_cols = normalize_storage_table_cols("issues", storage_cols, p.default_schema) + assert "url" in storage_cols + assert "m_a1" not in storage_cols # unbound columns were not created @pytest.mark.parametrize( diff --git a/tests/load/pipeline/test_write_disposition_changes.py b/tests/load/pipeline/test_write_disposition_changes.py index 16c589352e..b1703b4339 100644 --- a/tests/load/pipeline/test_write_disposition_changes.py +++ b/tests/load/pipeline/test_write_disposition_changes.py @@ -124,9 +124,13 @@ def source(): ) # schemaless destinations allow adding of root key without the pipeline failing - # for now this is only the case for dremio + # they do not mind adding NOT NULL columns to tables with existing data (id NOT NULL is supported at all) # doing this will result in somewhat useless behavior - destination_allows_adding_root_key = destination_config.destination in ["dremio", "clickhouse"] + destination_allows_adding_root_key = destination_config.destination in [ + "dremio", + "clickhouse", + "athena", + ] if destination_allows_adding_root_key and not with_root_key: pipeline.run( diff --git a/tests/load/test_job_client.py b/tests/load/test_job_client.py index 999cea8553..769e32280e 100644 --- a/tests/load/test_job_client.py +++ b/tests/load/test_job_client.py @@ -5,7 +5,7 @@ from unittest.mock import patch import pytest import datetime # noqa: I251 -from typing import Iterator, Tuple, List, Dict, Any, Mapping, MutableMapping +from typing import Iterator, Tuple, List, Dict, Any from dlt.common import json, pendulum from dlt.common.schema import Schema @@ -41,6 +41,7 @@ cm_yield_client_with_storage, write_dataset, prepare_table, + normalize_storage_table_cols, ) from tests.load.pipeline.utils import destinations_configs, DestinationTestConfiguration @@ -297,10 +298,11 @@ def test_schema_update_alter_table(client: SqlJobClientBase) -> None: assert len(schema_update[table_name]["columns"]) == 2 assert schema_update[table_name]["columns"]["col3"]["data_type"] == "double" assert schema_update[table_name]["columns"]["col4"]["data_type"] == "timestamp" - _, storage_table = client.get_storage_table(table_name) + _, storage_table_cols = client.get_storage_table(table_name) # 4 columns - assert len(storage_table) == 4 - assert storage_table["col4"]["data_type"] == "timestamp" + assert len(storage_table_cols) == 4 + storage_table_cols = normalize_storage_table_cols(table_name, storage_table_cols, schema) + assert storage_table_cols["col4"]["data_type"] == "timestamp" @pytest.mark.parametrize( diff --git a/tests/load/utils.py b/tests/load/utils.py index 8df1b9d738..eb40b8243f 100644 --- a/tests/load/utils.py +++ b/tests/load/utils.py @@ -26,7 +26,7 @@ from dlt.common.data_writers import DataWriter from dlt.common.schema import TTableSchemaColumns, Schema from dlt.common.storages import SchemaStorage, FileStorage, SchemaStorageConfiguration -from dlt.common.schema.utils import new_table +from dlt.common.schema.utils import new_table, normalize_table_identifiers from dlt.common.storages import ParsedLoadJobFileName, LoadStorage, PackageStorage from dlt.common.typing import StrAny from dlt.common.utils import uniq_id @@ -711,3 +711,15 @@ def sequence_generator() -> Generator[List[Dict[str, str]], None, None]: while True: yield [{"content": str(count + i)} for i in range(3)] count += 3 + + +def normalize_storage_table_cols( + table_name: str, cols: TTableSchemaColumns, schema: Schema +) -> TTableSchemaColumns: + """Normalize storage table columns back into schema naming""" + # go back to schema naming convention. this is a hack - will work here to + # reverse snowflake UPPER case folding + storage_table = normalize_table_identifiers( + new_table(table_name, columns=cols.values()), schema.naming # type: ignore[arg-type] + ) + return storage_table["columns"] From 4a39795721083ea8d0c2bc1efefff3ee8d5bd195 Mon Sep 17 00:00:00 2001 From: Marcin Rudolf Date: Mon, 17 Jun 2024 20:15:23 +0200 Subject: [PATCH 069/105] case sensitivity docs stubs --- .../docs/dlt-ecosystem/destinations/redshift.md | 6 ++++++ docs/website/docs/general-usage/destination.md | 2 ++ .../docs/general-usage/naming-convention.md | 16 ++++++++++++++-- 3 files changed, 22 insertions(+), 2 deletions(-) diff --git a/docs/website/docs/dlt-ecosystem/destinations/redshift.md b/docs/website/docs/dlt-ecosystem/destinations/redshift.md index 7e0679ec6b..ab193c755d 100644 --- a/docs/website/docs/dlt-ecosystem/destinations/redshift.md +++ b/docs/website/docs/dlt-ecosystem/destinations/redshift.md @@ -97,6 +97,12 @@ Amazon Redshift supports the following column hints: Redshift supports s3 as a file staging destination. dlt will upload files in the parquet format to s3 and ask Redshift to copy their data directly into the db. Please refer to the [S3 documentation](./filesystem.md#aws-s3) to learn how to set up your s3 bucket with the bucket_url and credentials. The `dlt` Redshift loader will use the AWS credentials provided for s3 to access the s3 bucket if not specified otherwise (see config options below). Alternatively to parquet files, you can also specify jsonl as the staging file format. For this, set the `loader_file_format` argument of the `run` command of the pipeline to `jsonl`. +## Identifier names and case sensitivity +* Up to 127 characters +* Case insensitive +* Stores identifiers in lower case +* Has case sensitive mode, if enabled you must [enable case sensitivity in destination factory](../../general-usage/destination.md#control-how-dlt-creates-table-column-and-other-identifiers) + ### Authentication IAM Role If you would like to load from s3 without forwarding the AWS staging credentials but authorize with an IAM role connected to Redshift, follow the [Redshift documentation](https://docs.aws.amazon.com/redshift/latest/mgmt/authorizing-redshift-service.html) to create a role with access to s3 linked to your Redshift cluster and change your destination settings to use the IAM role: diff --git a/docs/website/docs/general-usage/destination.md b/docs/website/docs/general-usage/destination.md index f8e39d2d08..0cce49390d 100644 --- a/docs/website/docs/general-usage/destination.md +++ b/docs/website/docs/general-usage/destination.md @@ -126,6 +126,8 @@ Obviously, dlt will access the destination when you instantiate [sql_client](../ 1. Redshift - always lower case, no matter which naming convention used. case insensitive 2. Athena - always lower case, no matter which naming convention used. uses different catalogue and query engines that are incompatible +### Enable case sensitive mode + ## Create new destination You have two ways to implement a new destination: diff --git a/docs/website/docs/general-usage/naming-convention.md b/docs/website/docs/general-usage/naming-convention.md index bb9eae9c89..71e6b2489c 100644 --- a/docs/website/docs/general-usage/naming-convention.md +++ b/docs/website/docs/general-usage/naming-convention.md @@ -8,12 +8,20 @@ keywords: [identifiers, snake case, ] `dlt` creates tables, child tables and column schemas from the data. The data being loaded, typically JSON documents, contains identifiers (i.e. key names in a dictionary) with any Unicode characters, any lengths and naming styles. On the other hand the destinations accept very strict -namespaces for their identifiers. Like Redshift that accepts case-insensitive alphanumeric +namespaces for their identifiers. Like [Redshift](../dlt-ecosystem/destinations/redshift.md#naming-convention) that accepts case-insensitive alphanumeric identifiers with maximum 127 characters. Each schema contains `naming convention` that tells `dlt` how to translate identifiers to the namespace that the destination understands. +* Each destination has a preferred naming convention. +* This naming convention is used when new schemas are created. +* Schemas preserve naming convention when saved +* `dlt` applies final naming convention in `normalize` stage. Naming convention comes from (1) explicit configuration (2) from destination capabilities. Naming convention +in schema will be ignored. +* You can change the naming convention in the capabilities: (name, case-folding, case sensitivity) + +## Case sensitivity ## Default naming convention (snake_case) @@ -35,10 +43,14 @@ namespace that the destination understands. > 💡 Use simple, short small caps identifiers for everything! +## Set and adjust naming convention explicitly -## Change naming convention +## Configure naming convention The naming convention is configurable and users can easily create their own conventions that i.e. pass all the identifiers unchanged if the destination accepts that (i.e. DuckDB). +## Available naming conventions + +## Write your own naming convention From 43d6d5fa7ea073409ac306766202408393aaafaa Mon Sep 17 00:00:00 2001 From: Marcin Rudolf Date: Mon, 17 Jun 2024 20:15:56 +0200 Subject: [PATCH 070/105] fixes drop_pipeline fixture --- tests/load/pipeline/utils.py | 62 +++++++++++++++++++----------------- 1 file changed, 32 insertions(+), 30 deletions(-) diff --git a/tests/load/pipeline/utils.py b/tests/load/pipeline/utils.py index d762029ddd..14a0dfb550 100644 --- a/tests/load/pipeline/utils.py +++ b/tests/load/pipeline/utils.py @@ -32,36 +32,38 @@ def drop_pipeline(request) -> Iterator[None]: def drop_active_pipeline_data() -> None: """Drops all the datasets for currently active pipeline, wipes the working folder and then deactivated it.""" if Container()[PipelineContext].is_active(): - # take existing pipeline - p = dlt.pipeline() + try: + # take existing pipeline + p = dlt.pipeline() - def _drop_dataset(schema_name: str) -> None: - with p.destination_client(schema_name) as client: - try: - client.drop_storage() - print("dropped") - except Exception as exc: - print(exc) - if isinstance(client, WithStagingDataset): - with client.with_staging_dataset(): - try: - client.drop_storage() - print("staging dropped") - except Exception as exc: - print(exc) + def _drop_dataset(schema_name: str) -> None: + with p.destination_client(schema_name) as client: + try: + client.drop_storage() + print("dropped") + except Exception as exc: + print(exc) + if isinstance(client, WithStagingDataset): + with client.with_staging_dataset(): + try: + client.drop_storage() + print("staging dropped") + except Exception as exc: + print(exc) - # drop_func = _drop_dataset_fs if _is_filesystem(p) else _drop_dataset_sql - # take all schemas and if destination was set - if p.destination: - if p.config.use_single_dataset: - # drop just the dataset for default schema - if p.default_schema_name: - _drop_dataset(p.default_schema_name) - else: - # for each schema, drop the dataset - for schema_name in p.schema_names: - _drop_dataset(schema_name) + # drop_func = _drop_dataset_fs if _is_filesystem(p) else _drop_dataset_sql + # take all schemas and if destination was set + if p.destination: + if p.config.use_single_dataset: + # drop just the dataset for default schema + if p.default_schema_name: + _drop_dataset(p.default_schema_name) + else: + # for each schema, drop the dataset + for schema_name in p.schema_names: + _drop_dataset(schema_name) - # p._wipe_working_folder() - # deactivate context - Container()[PipelineContext].deactivate() + # p._wipe_working_folder() + finally: + # always deactivate context, working directory will be wiped when the next test starts + Container()[PipelineContext].deactivate() From e3d998c34a00a005516d15e95d8ee924b7b2dad6 Mon Sep 17 00:00:00 2001 From: Marcin Rudolf Date: Mon, 17 Jun 2024 20:16:17 +0200 Subject: [PATCH 071/105] improves partial config generation for capabilities --- dlt/common/destination/reference.py | 31 +++++++++++++--- dlt/destinations/impl/bigquery/bigquery.py | 1 - dlt/load/load.py | 4 +-- tests/common/storages/test_schema_storage.py | 2 -- tests/load/filesystem/test_aws_credentials.py | 6 ++++ .../load/filesystem/test_filesystem_client.py | 35 +++++++++++++++++++ tests/load/test_job_client.py | 6 ---- 7 files changed, 69 insertions(+), 16 deletions(-) diff --git a/dlt/common/destination/reference.py b/dlt/common/destination/reference.py index 110c9271a0..374dac146b 100644 --- a/dlt/common/destination/reference.py +++ b/dlt/common/destination/reference.py @@ -45,7 +45,6 @@ from dlt.common.storages import FileStorage from dlt.common.storages.load_storage import ParsedLoadJobFileName from dlt.common.storages.load_package import LoadJobInfo -from dlt.common.typing import get_all_types_of_class_in_union TLoaderReplaceStrategy = Literal["truncate-and-insert", "insert-from-staging", "staging-optimized"] TDestinationConfig = TypeVar("TDestinationConfig", bound="DestinationClientConfiguration") @@ -96,8 +95,23 @@ def on_resolved(self) -> None: self.destination_name = self.destination_name or self.destination_type @classmethod - def credentials_type(cls) -> Type[CredentialsConfiguration]: - return extract_inner_hint(cls.get_resolvable_fields()["credentials"]) + def credentials_type( + cls, config: "DestinationClientConfiguration" = None + ) -> Type[CredentialsConfiguration]: + """Figure out credentials type, using hint resolvers for dynamic types + + For correct type resolution of filesystem, config should have bucket_url populated + """ + key = "credentials" + type_ = cls.get_resolvable_fields()[key] + if key in cls.__hint_resolvers__ and config is not None: + try: + # Type hint for this field is created dynamically + type_ = cls.__hint_resolvers__[key](config) + except Exception: + # we suppress failed hint resolutions + pass + return extract_inner_hint(type_) @configspec @@ -473,10 +487,17 @@ def capabilities( # get explicit config if final config not passed if config is None: # create mock credentials to avoid credentials being resolved - credentials = self.spec.credentials_type()() + init_config = self.spec() + init_config.update(self.config_params) + credentials = self.spec.credentials_type(init_config)() credentials.__is_resolved__ = True config = self.spec(credentials=credentials) - config = self.configuration(config, accept_partial=True) + try: + config = self.configuration(config, accept_partial=True) + except Exception: + # in rare cases partial may fail ie. when invalid native value is present + # in that case we fallback to "empty" config + pass return self.adjust_capabilities(caps, config, naming) @abstractmethod diff --git a/dlt/destinations/impl/bigquery/bigquery.py b/dlt/destinations/impl/bigquery/bigquery.py index d81b7e2c8e..c3a1be4174 100644 --- a/dlt/destinations/impl/bigquery/bigquery.py +++ b/dlt/destinations/impl/bigquery/bigquery.py @@ -373,7 +373,6 @@ def prepare_load_table( def get_storage_tables( self, table_names: Iterable[str] ) -> Iterable[Tuple[str, TTableSchemaColumns]]: - print(self.sql_client.fully_qualified_dataset_name()) """Gets table schemas from BigQuery using INFORMATION_SCHEMA or get_table for hidden datasets""" if not self.sql_client.is_hidden_dataset: return super().get_storage_tables(table_names) diff --git a/dlt/load/load.py b/dlt/load/load.py index 5d049a45d7..cf5618c024 100644 --- a/dlt/load/load.py +++ b/dlt/load/load.py @@ -351,7 +351,7 @@ def complete_package(self, load_id: str, schema: Schema, aborted: bool = False) ) ): job_client.complete_load(load_id) - self._maybe_trancate_staging_dataset(schema, job_client) + self._maybe_truncate_staging_dataset(schema, job_client) self.load_storage.complete_load_package(load_id, aborted) # collect package info @@ -504,7 +504,7 @@ def run(self, pool: Optional[Executor]) -> TRunMetrics: return TRunMetrics(False, len(self.load_storage.list_normalized_packages())) - def _maybe_trancate_staging_dataset(self, schema: Schema, job_client: JobClientBase) -> None: + def _maybe_truncate_staging_dataset(self, schema: Schema, job_client: JobClientBase) -> None: """ Truncate the staging dataset if one used, and configuration requests truncation. diff --git a/tests/common/storages/test_schema_storage.py b/tests/common/storages/test_schema_storage.py index 383fa5d1ed..ffbd2ecf1b 100644 --- a/tests/common/storages/test_schema_storage.py +++ b/tests/common/storages/test_schema_storage.py @@ -115,11 +115,9 @@ def test_skip_import_if_not_modified(synced_storage: SchemaStorage, storage: Sch _, new_table = storage_schema.coerce_row("event_user", None, row) storage_schema.update_table(new_table) assert storage_schema.is_modified - print("SAVE SCHEMA") storage.save_schema(storage_schema) assert not storage_schema.is_modified # now use synced storage to load schema again - print("LOAD SCHEMA") reloaded_schema = synced_storage.load_schema("ethereum") # the schema was not overwritten assert "event_user" in reloaded_schema.tables diff --git a/tests/load/filesystem/test_aws_credentials.py b/tests/load/filesystem/test_aws_credentials.py index 1a41144744..28460f1ca3 100644 --- a/tests/load/filesystem/test_aws_credentials.py +++ b/tests/load/filesystem/test_aws_credentials.py @@ -1,6 +1,7 @@ import pytest from typing import Dict +from dlt.common.configuration.specs.base_configuration import CredentialsConfiguration from dlt.common.utils import digest128 from dlt.common.configuration import resolve_configuration from dlt.common.configuration.specs.aws_credentials import AwsCredentials @@ -101,6 +102,11 @@ def test_aws_credentials_from_boto3(environment: Dict[str, str]) -> None: assert c.aws_access_key_id == "fake_access_key" +def test_aws_credentials_from_unknown_object() -> None: + with pytest.raises(InvalidBoto3Session): + AwsCredentials().parse_native_representation(CredentialsConfiguration()) + + def test_aws_credentials_for_profile(environment: Dict[str, str]) -> None: import botocore.exceptions diff --git a/tests/load/filesystem/test_filesystem_client.py b/tests/load/filesystem/test_filesystem_client.py index fbfd08271b..597d400344 100644 --- a/tests/load/filesystem/test_filesystem_client.py +++ b/tests/load/filesystem/test_filesystem_client.py @@ -2,13 +2,22 @@ import os from unittest import mock from pathlib import Path +from urllib.parse import urlparse import pytest +from dlt.common.configuration.specs.azure_credentials import AzureCredentials +from dlt.common.configuration.specs.base_configuration import ( + CredentialsConfiguration, + extract_inner_hint, +) +from dlt.common.schema.schema import Schema +from dlt.common.storages.configuration import FilesystemConfiguration from dlt.common.time import ensure_pendulum_datetime from dlt.common.utils import digest128, uniq_id from dlt.common.storages import FileStorage, ParsedLoadJobFileName +from dlt.destinations import filesystem from dlt.destinations.impl.filesystem.filesystem import ( FilesystemDestinationClientConfiguration, INIT_FILE_NAME, @@ -46,6 +55,32 @@ def test_filesystem_destination_configuration() -> None: ).fingerprint() == digest128("s3://cool") +def test_filesystem_factory_buckets(with_gdrive_buckets_env: str) -> None: + proto = urlparse(with_gdrive_buckets_env).scheme + credentials_type = extract_inner_hint( + FilesystemConfiguration.PROTOCOL_CREDENTIALS.get(proto, CredentialsConfiguration) + ) + + # test factory figuring out the right credentials + filesystem_ = filesystem(with_gdrive_buckets_env) + client = filesystem_.client( + Schema("test"), + initial_config=FilesystemDestinationClientConfiguration()._bind_dataset_name("test"), + ) + assert client.config.protocol == proto or "file" + assert isinstance(client.config.credentials, credentials_type) + assert issubclass(client.config.credentials_type(client.config), credentials_type) + assert filesystem_.capabilities() + + # factory gets initial credentials + filesystem_ = filesystem(with_gdrive_buckets_env, credentials=credentials_type()) + client = filesystem_.client( + Schema("test"), + initial_config=FilesystemDestinationClientConfiguration()._bind_dataset_name("test"), + ) + assert isinstance(client.config.credentials, credentials_type) + + @pytest.mark.parametrize("write_disposition", ("replace", "append", "merge")) @pytest.mark.parametrize("layout", TEST_FILE_LAYOUTS) def test_successful_load(write_disposition: str, layout: str, with_gdrive_buckets_env: str) -> None: diff --git a/tests/load/test_job_client.py b/tests/load/test_job_client.py index 769e32280e..04a1175180 100644 --- a/tests/load/test_job_client.py +++ b/tests/load/test_job_client.py @@ -89,8 +89,6 @@ def test_get_schema_on_empty_storage(client: SqlJobClientBase) -> None: ) def test_get_update_basic_schema(client: SqlJobClientBase) -> None: schema = client.schema - print(schema.stored_version) - print(schema.version) schema_update = client.update_stored_schema() # expect dlt tables in schema update assert set(schema_update.keys()) == {VERSION_TABLE_NAME, LOADS_TABLE_NAME, "event_slot"} @@ -390,12 +388,9 @@ def test_get_storage_table_with_all_types(client: SqlJobClientBase) -> None: # now get the actual schema from the db _, storage_table = list(client.get_storage_tables([table_name]))[0] assert len(storage_table) > 0 - print(storage_table) # column order must match TABLE_UPDATE storage_columns = list(storage_table.values()) for c, expected_c in zip(TABLE_UPDATE, storage_columns): - # print(c["name"]) - # print(c["data_type"]) # storage columns are returned with column names as in information schema assert client.capabilities.casefold_identifier(c["name"]) == expected_c["name"] # athena does not know wei data type and has no JSON type, time is not supported with parquet tables @@ -444,7 +439,6 @@ def _assert_columns_order(sql_: str) -> None: col_name = client.sql_client.escape_ddl_identifier(c["name"]) else: col_name = client.sql_client.escape_column_name(c["name"]) - print(col_name) # find column names idx = sql_.find(col_name, idx) assert idx > 0, f"column {col_name} not found in script" From 3aef3fd451b42e8c7682a7951e0f8c584d90eca1 Mon Sep 17 00:00:00 2001 From: Marcin Rudolf Date: Mon, 17 Jun 2024 22:05:44 +0200 Subject: [PATCH 072/105] adds snowflake csv support --- dlt/destinations/impl/snowflake/factory.py | 4 ++-- dlt/destinations/impl/snowflake/snowflake.py | 10 +++++++++- .../dlt-ecosystem/destinations/snowflake.md | 7 ++++++- .../docs/dlt-ecosystem/file-formats/csv.md | 2 +- tests/load/pipeline/test_postgres.py | 17 ++++++++++------- 5 files changed, 28 insertions(+), 12 deletions(-) diff --git a/dlt/destinations/impl/snowflake/factory.py b/dlt/destinations/impl/snowflake/factory.py index 73f6175624..6f06edf213 100644 --- a/dlt/destinations/impl/snowflake/factory.py +++ b/dlt/destinations/impl/snowflake/factory.py @@ -19,9 +19,9 @@ class snowflake(Destination[SnowflakeClientConfiguration, "SnowflakeClient"]): def _raw_capabilities(self) -> DestinationCapabilitiesContext: caps = DestinationCapabilitiesContext() caps.preferred_loader_file_format = "jsonl" - caps.supported_loader_file_formats = ["jsonl", "parquet"] + caps.supported_loader_file_formats = ["jsonl", "parquet", "csv"] caps.preferred_staging_file_format = "jsonl" - caps.supported_staging_file_formats = ["jsonl", "parquet"] + caps.supported_staging_file_formats = ["jsonl", "parquet", "csv"] # snowflake is case sensitive but all unquoted identifiers are upper cased # so upper case identifiers are considered case insensitive caps.escape_identifier = escape_snowflake_identifier diff --git a/dlt/destinations/impl/snowflake/snowflake.py b/dlt/destinations/impl/snowflake/snowflake.py index f30c16643d..20fa1a2886 100644 --- a/dlt/destinations/impl/snowflake/snowflake.py +++ b/dlt/destinations/impl/snowflake/snowflake.py @@ -163,9 +163,17 @@ def __init__( from_clause = f"FROM {stage_file_path}" # decide on source format, stage_file_path will either be a local file or a bucket path - source_format = "( TYPE = 'JSON', BINARY_FORMAT = 'BASE64' )" + if file_name.endswith("jsonl"): + source_format = "( TYPE = 'JSON', BINARY_FORMAT = 'BASE64' )" if file_name.endswith("parquet"): source_format = "(TYPE = 'PARQUET', BINARY_AS_TEXT = FALSE, USE_LOGICAL_TYPE = TRUE)" + if file_name.endswith("csv"): + # empty strings are NULL, no data is NULL, missing columns (ERROR_ON_COLUMN_COUNT_MISMATCH) are NULL + source_format = ( + "(TYPE = 'CSV', BINARY_FORMAT = 'UTF-8', PARSE_HEADER = TRUE," + " FIELD_OPTIONALLY_ENCLOSED_BY = '\"', NULL_IF = ('')," + " ERROR_ON_COLUMN_COUNT_MISMATCH = FALSE)" + ) with client.begin_transaction(): # PUT and COPY in one tx if local file, otherwise only copy diff --git a/docs/website/docs/dlt-ecosystem/destinations/snowflake.md b/docs/website/docs/dlt-ecosystem/destinations/snowflake.md index deaaff3562..4642751011 100644 --- a/docs/website/docs/dlt-ecosystem/destinations/snowflake.md +++ b/docs/website/docs/dlt-ecosystem/destinations/snowflake.md @@ -124,12 +124,17 @@ The data is loaded using an internal Snowflake stage. We use the `PUT` command a * [insert-values](../file-formats/insert-format.md) is used by default * [parquet](../file-formats/parquet.md) is supported * [jsonl](../file-formats/jsonl.md) is supported +* [csv](../file-formats/csv.md) is supported When staging is enabled: * [jsonl](../file-formats/jsonl.md) is used by default * [parquet](../file-formats/parquet.md) is supported +* [csv](../file-formats/csv.md) is supported + +:::caution +When loading from `parquet`, Snowflake will store `complex` types (JSON) in `VARIANT` as a string. Use the `jsonl` format instead or use `PARSE_JSON` to update the `VARIANT` field after loading. +::: -> ❗ When loading from `parquet`, Snowflake will store `complex` types (JSON) in `VARIANT` as a string. Use the `jsonl` format instead or use `PARSE_JSON` to update the `VARIANT` field after loading. ## Supported column hints Snowflake supports the following [column hints](https://dlthub.com/docs/general-usage/schema#tables-and-columns): diff --git a/docs/website/docs/dlt-ecosystem/file-formats/csv.md b/docs/website/docs/dlt-ecosystem/file-formats/csv.md index 4a57a0e2d6..a57311b7dc 100644 --- a/docs/website/docs/dlt-ecosystem/file-formats/csv.md +++ b/docs/website/docs/dlt-ecosystem/file-formats/csv.md @@ -16,7 +16,7 @@ Internally we use two implementations: ## Supported Destinations -Supported by: **Postgres**, **Filesystem** +Supported by: **Postgres**, **Filesystem**, **snowflake** By setting the `loader_file_format` argument to `csv` in the run command, the pipeline will store your data in the csv format at the destination: diff --git a/tests/load/pipeline/test_postgres.py b/tests/load/pipeline/test_postgres.py index a64ee300cd..02ca7d3d3c 100644 --- a/tests/load/pipeline/test_postgres.py +++ b/tests/load/pipeline/test_postgres.py @@ -14,7 +14,7 @@ @pytest.mark.parametrize( "destination_config", - destinations_configs(default_sql_configs=True, subset=["postgres"]), + destinations_configs(default_sql_configs=True, subset=["postgres", "snowflake"]), ids=lambda x: x.name, ) @pytest.mark.parametrize("item_type", ["object", "table"]) @@ -80,7 +80,7 @@ def test_postgres_encoded_binary( @pytest.mark.parametrize( "destination_config", - destinations_configs(default_sql_configs=True, subset=["postgres"]), + destinations_configs(default_sql_configs=True, subset=["postgres", "snowflake"]), ids=lambda x: x.name, ) def test_postgres_empty_csv_from_arrow(destination_config: DestinationTestConfiguration) -> None: @@ -90,17 +90,20 @@ def test_postgres_empty_csv_from_arrow(destination_config: DestinationTestConfig table, _, _ = arrow_table_all_data_types("arrow-table", include_json=False) load_info = pipeline.run( - table.schema.empty_table(), table_name="table", loader_file_format="csv" + table.schema.empty_table(), table_name="arrow_table", loader_file_format="csv" ) assert_load_info(load_info) assert len(load_info.load_packages[0].jobs["completed_jobs"]) == 1 job = load_info.load_packages[0].jobs["completed_jobs"][0].file_path assert job.endswith("csv") - assert_data_table_counts(pipeline, {"table": 0}) + assert_data_table_counts(pipeline, {"arrow_table": 0}) with pipeline.sql_client() as client: - with client.execute_query('SELECT * FROM "table"') as cur: + with client.execute_query("SELECT * FROM arrow_table") as cur: columns = [col.name for col in cur.description] assert len(cur.fetchall()) == 0 - # all columns in order - assert columns == list(pipeline.default_schema.get_table_columns("table").keys()) + # all columns in order, also casefold to the destination casing (we use cursor.description) + casefold = pipeline.destination.capabilities().casefold_identifier + assert columns == list( + map(casefold, pipeline.default_schema.get_table_columns("arrow_table").keys()) + ) From 6df7a3496f259e023bb555354d5a183ddac26be9 Mon Sep 17 00:00:00 2001 From: Marcin Rudolf Date: Mon, 17 Jun 2024 22:22:51 +0200 Subject: [PATCH 073/105] creates separate csv tests --- tests/load/pipeline/test_csv_loading.py | 73 +++++++++++++++++++++++++ tests/load/pipeline/test_postgres.py | 67 +---------------------- 2 files changed, 74 insertions(+), 66 deletions(-) create mode 100644 tests/load/pipeline/test_csv_loading.py diff --git a/tests/load/pipeline/test_csv_loading.py b/tests/load/pipeline/test_csv_loading.py new file mode 100644 index 0000000000..3f83bc9d5e --- /dev/null +++ b/tests/load/pipeline/test_csv_loading.py @@ -0,0 +1,73 @@ +import os +import pytest + +from dlt.common.utils import uniq_id + +from tests.load.pipeline.utils import destinations_configs, DestinationTestConfiguration +from tests.cases import arrow_table_all_data_types, prepare_shuffled_tables +from tests.pipeline.utils import assert_data_table_counts, assert_load_info, load_tables_to_dicts +from tests.utils import TestDataItemFormat + + +@pytest.mark.parametrize( + "destination_config", + destinations_configs(default_sql_configs=True, subset=["postgres", "snowflake"]), + ids=lambda x: x.name, +) +@pytest.mark.parametrize("item_type", ["object", "table"]) +def test_load_csv( + destination_config: DestinationTestConfiguration, item_type: TestDataItemFormat +) -> None: + os.environ["DATA_WRITER__DISABLE_COMPRESSION"] = "True" + pipeline = destination_config.setup_pipeline("postgres_" + uniq_id(), full_refresh=True) + table, shuffled_table, shuffled_removed_column = prepare_shuffled_tables() + + # convert to pylist when loading from objects, this will kick the csv-reader in + if item_type == "object": + table, shuffled_table, shuffled_removed_column = ( + table.to_pylist(), + shuffled_table.to_pylist(), + shuffled_removed_column.to_pylist(), + ) + + load_info = pipeline.run( + [shuffled_removed_column, shuffled_table, table], + table_name="table", + loader_file_format="csv", + ) + assert_load_info(load_info) + job = load_info.load_packages[0].jobs["completed_jobs"][0].file_path + assert job.endswith("csv") + assert_data_table_counts(pipeline, {"table": 5432 * 3}) + load_tables_to_dicts(pipeline, "table") + + +@pytest.mark.parametrize( + "destination_config", + destinations_configs(default_sql_configs=True, subset=["postgres", "snowflake"]), + ids=lambda x: x.name, +) +def test_empty_csv_from_arrow(destination_config: DestinationTestConfiguration) -> None: + os.environ["DATA_WRITER__DISABLE_COMPRESSION"] = "True" + os.environ["RESTORE_FROM_DESTINATION"] = "False" + pipeline = destination_config.setup_pipeline("postgres_" + uniq_id(), full_refresh=True) + table, _, _ = arrow_table_all_data_types("arrow-table", include_json=False) + + load_info = pipeline.run( + table.schema.empty_table(), table_name="arrow_table", loader_file_format="csv" + ) + assert_load_info(load_info) + assert len(load_info.load_packages[0].jobs["completed_jobs"]) == 1 + job = load_info.load_packages[0].jobs["completed_jobs"][0].file_path + assert job.endswith("csv") + assert_data_table_counts(pipeline, {"arrow_table": 0}) + with pipeline.sql_client() as client: + with client.execute_query("SELECT * FROM arrow_table") as cur: + columns = [col.name for col in cur.description] + assert len(cur.fetchall()) == 0 + + # all columns in order, also casefold to the destination casing (we use cursor.description) + casefold = pipeline.destination.capabilities().casefold_identifier + assert columns == list( + map(casefold, pipeline.default_schema.get_table_columns("arrow_table").keys()) + ) diff --git a/tests/load/pipeline/test_postgres.py b/tests/load/pipeline/test_postgres.py index 02ca7d3d3c..045aadfe98 100644 --- a/tests/load/pipeline/test_postgres.py +++ b/tests/load/pipeline/test_postgres.py @@ -7,44 +7,10 @@ from dlt.common.utils import uniq_id from tests.load.pipeline.utils import destinations_configs, DestinationTestConfiguration -from tests.cases import arrow_table_all_data_types, prepare_shuffled_tables -from tests.pipeline.utils import assert_data_table_counts, assert_load_info, load_tables_to_dicts +from tests.pipeline.utils import assert_load_info, load_tables_to_dicts from tests.utils import TestDataItemFormat -@pytest.mark.parametrize( - "destination_config", - destinations_configs(default_sql_configs=True, subset=["postgres", "snowflake"]), - ids=lambda x: x.name, -) -@pytest.mark.parametrize("item_type", ["object", "table"]) -def test_postgres_load_csv( - destination_config: DestinationTestConfiguration, item_type: TestDataItemFormat -) -> None: - os.environ["DATA_WRITER__DISABLE_COMPRESSION"] = "True" - pipeline = destination_config.setup_pipeline("postgres_" + uniq_id(), full_refresh=True) - table, shuffled_table, shuffled_removed_column = prepare_shuffled_tables() - - # convert to pylist when loading from objects, this will kick the csv-reader in - if item_type == "object": - table, shuffled_table, shuffled_removed_column = ( - table.to_pylist(), - shuffled_table.to_pylist(), - shuffled_removed_column.to_pylist(), - ) - - load_info = pipeline.run( - [shuffled_removed_column, shuffled_table, table], - table_name="table", - loader_file_format="csv", - ) - assert_load_info(load_info) - job = load_info.load_packages[0].jobs["completed_jobs"][0].file_path - assert job.endswith("csv") - assert_data_table_counts(pipeline, {"table": 5432 * 3}) - load_tables_to_dicts(pipeline, "table") - - @pytest.mark.parametrize( "destination_config", destinations_configs(default_sql_configs=True, subset=["postgres"]), @@ -76,34 +42,3 @@ def test_postgres_encoded_binary( # print(bytes(data["table"][0]["hash"])) # data in postgres equals unencoded blob assert data["table"][0]["hash"].tobytes() == blob - - -@pytest.mark.parametrize( - "destination_config", - destinations_configs(default_sql_configs=True, subset=["postgres", "snowflake"]), - ids=lambda x: x.name, -) -def test_postgres_empty_csv_from_arrow(destination_config: DestinationTestConfiguration) -> None: - os.environ["DATA_WRITER__DISABLE_COMPRESSION"] = "True" - os.environ["RESTORE_FROM_DESTINATION"] = "False" - pipeline = destination_config.setup_pipeline("postgres_" + uniq_id(), full_refresh=True) - table, _, _ = arrow_table_all_data_types("arrow-table", include_json=False) - - load_info = pipeline.run( - table.schema.empty_table(), table_name="arrow_table", loader_file_format="csv" - ) - assert_load_info(load_info) - assert len(load_info.load_packages[0].jobs["completed_jobs"]) == 1 - job = load_info.load_packages[0].jobs["completed_jobs"][0].file_path - assert job.endswith("csv") - assert_data_table_counts(pipeline, {"arrow_table": 0}) - with pipeline.sql_client() as client: - with client.execute_query("SELECT * FROM arrow_table") as cur: - columns = [col.name for col in cur.description] - assert len(cur.fetchall()) == 0 - - # all columns in order, also casefold to the destination casing (we use cursor.description) - casefold = pipeline.destination.capabilities().casefold_identifier - assert columns == list( - map(casefold, pipeline.default_schema.get_table_columns("arrow_table").keys()) - ) From 57aec2e518e225605b93133391d9b69acb16f59f Mon Sep 17 00:00:00 2001 From: Marcin Rudolf Date: Wed, 19 Jun 2024 12:52:53 +0200 Subject: [PATCH 074/105] allows to import files into extract storage, adds import file writer and spec --- dlt/common/data_writers/__init__.py | 2 ++ dlt/common/data_writers/buffered.py | 26 ++++++++++++-- dlt/common/data_writers/writers.py | 36 +++++++++++++++++-- dlt/common/storages/data_item_storage.py | 5 ++- .../data_writers/test_buffered_writer.py | 21 +++++++++++ 5 files changed, 84 insertions(+), 6 deletions(-) diff --git a/dlt/common/data_writers/__init__.py b/dlt/common/data_writers/__init__.py index e412b61a09..945e74a37b 100644 --- a/dlt/common/data_writers/__init__.py +++ b/dlt/common/data_writers/__init__.py @@ -3,6 +3,7 @@ DataWriterMetrics, TDataItemFormat, FileWriterSpec, + create_import_spec, resolve_best_writer_spec, get_best_writer_spec, is_native_writer, @@ -17,6 +18,7 @@ __all__ = [ "DataWriter", "FileWriterSpec", + "create_import_spec", "resolve_best_writer_spec", "get_best_writer_spec", "is_native_writer", diff --git a/dlt/common/data_writers/buffered.py b/dlt/common/data_writers/buffered.py index bd32c68c49..bd69ce7c0b 100644 --- a/dlt/common/data_writers/buffered.py +++ b/dlt/common/data_writers/buffered.py @@ -1,6 +1,7 @@ import gzip import time -from typing import ClassVar, List, IO, Any, Optional, Type, Generic +import contextlib +from typing import ClassVar, Iterator, List, IO, Any, Optional, Type, Generic from dlt.common.typing import TDataItem, TDataItems from dlt.common.data_writers.exceptions import ( @@ -138,17 +139,26 @@ def write_empty_file(self, columns: TTableSchemaColumns) -> DataWriterMetrics: self._last_modified = time.time() return self._rotate_file(allow_empty_file=True) - def import_file(self, file_path: str, metrics: DataWriterMetrics) -> DataWriterMetrics: + def import_file( + self, file_path: str, metrics: DataWriterMetrics, with_extension: str = None + ) -> DataWriterMetrics: """Import a file from `file_path` into items storage under a new file name. Does not check the imported file format. Uses counts from `metrics` as a base. Logically closes the imported file The preferred import method is a hard link to avoid copying the data. If current filesystem does not support it, a regular copy is used. + + Alternative extension may be provided via `with_extension` so various file formats may be imported into the same folder. """ # TODO: we should separate file storage from other storages. this creates circular deps from dlt.common.storages import FileStorage - self._rotate_file() + # import file with alternative extension + spec = self.writer_spec + if with_extension: + spec = self.writer_spec._replace(file_extension=with_extension) + with self.alternative_spec(spec): + self._rotate_file() FileStorage.link_hard_with_fallback(file_path, self._file_name) self._last_modified = time.time() metrics = metrics._replace( @@ -176,6 +186,16 @@ def close(self, skip_flush: bool = False) -> None: def closed(self) -> bool: return self._closed + @contextlib.contextmanager + def alternative_spec(self, spec: FileWriterSpec) -> Iterator[FileWriterSpec]: + """Temporarily changes the writer spec ie. for the moment file is rotated""" + old_spec = self.writer_spec + try: + self.writer_spec = spec + yield spec + finally: + self.writer_spec = old_spec + def __enter__(self) -> "BufferedDataWriter[TWriter]": return self diff --git a/dlt/common/data_writers/writers.py b/dlt/common/data_writers/writers.py index 8936dae605..059cb6d88d 100644 --- a/dlt/common/data_writers/writers.py +++ b/dlt/common/data_writers/writers.py @@ -26,7 +26,11 @@ FileSpecNotFound, InvalidDataItem, ) -from dlt.common.destination import DestinationCapabilitiesContext, TLoaderFileFormat +from dlt.common.destination import ( + DestinationCapabilitiesContext, + TLoaderFileFormat, + ALL_SUPPORTED_FILE_FORMATS, +) from dlt.common.schema.typing import TTableSchemaColumns from dlt.common.typing import StrAny @@ -34,7 +38,7 @@ from dlt.common.libs.pyarrow import pyarrow as pa -TDataItemFormat = Literal["arrow", "object"] +TDataItemFormat = Literal["arrow", "object", "file"] TWriter = TypeVar("TWriter", bound="DataWriter") @@ -124,6 +128,9 @@ def item_format_from_file_extension(cls, extension: str) -> TDataItemFormat: return "object" elif extension == "parquet": return "arrow" + # those files may be imported by normalizer as is + elif extension in ALL_SUPPORTED_FILE_FORMATS: + return "file" else: raise ValueError(f"Cannot figure out data item format for extension {extension}") @@ -132,6 +139,8 @@ def writer_class_from_spec(spec: FileWriterSpec) -> Type["DataWriter"]: try: return WRITER_SPECS[spec] except KeyError: + if spec.data_item_format == "file": + return ImportFileWriter raise FileSpecNotFound(spec.file_format, spec.data_item_format, spec) @staticmethod @@ -147,6 +156,15 @@ def class_factory( raise FileFormatForItemFormatNotFound(file_format, data_item_format) +class ImportFileWriter(DataWriter): + """May only import files, fails on any open/write operations""" + + def write_header(self, columns_schema: TTableSchemaColumns) -> None: + raise NotImplementedError( + "ImportFileWriter cannot write any files. You have bug in your code." + ) + + class JsonlWriter(DataWriter): def write_data(self, rows: Sequence[Any]) -> None: super().write_data(rows) @@ -389,6 +407,7 @@ class CsvDataWriterConfiguration(BaseConfiguration): delimiter: str = "," include_header: bool = True quoting: CsvQuoting = "quote_needed" + on_error_continue: bool = False __section__: ClassVar[str] = known_sections.DATA_WRITER @@ -783,3 +802,16 @@ def get_best_writer_spec( return DataWriter.class_factory(file_format, item_format, native_writers).writer_spec() except DataWriterNotFound: return DataWriter.class_factory(file_format, item_format, ALL_WRITERS).writer_spec() + + +def create_import_spec( + item_file_format: TLoaderFileFormat, + possible_file_formats: Sequence[TLoaderFileFormat], +) -> FileWriterSpec: + """Creates writer spec that may be used only to import files""" + # can the item file be directly imported? + if item_file_format not in possible_file_formats: + raise SpecLookupFailed("file", possible_file_formats, item_file_format) + + spec = DataWriter.class_factory(item_file_format, "object", ALL_WRITERS).writer_spec() + return spec._replace(data_item_format="file") diff --git a/dlt/common/storages/data_item_storage.py b/dlt/common/storages/data_item_storage.py index f6072c0260..29a9da8acf 100644 --- a/dlt/common/storages/data_item_storage.py +++ b/dlt/common/storages/data_item_storage.py @@ -60,15 +60,18 @@ def import_items_file( table_name: str, file_path: str, metrics: DataWriterMetrics, + with_extension: str = None, ) -> DataWriterMetrics: """Import a file from `file_path` into items storage under a new file name. Does not check the imported file format. Uses counts from `metrics` as a base. Logically closes the imported file The preferred import method is a hard link to avoid copying the data. If current filesystem does not support it, a regular copy is used. + + Alternative extension may be provided via `with_extension` so various file formats may be imported into the same folder. """ writer = self._get_writer(load_id, schema_name, table_name) - return writer.import_file(file_path, metrics) + return writer.import_file(file_path, metrics, with_extension) def close_writers(self, load_id: str, skip_flush: bool = False) -> None: """Flush, write footers (skip_flush), write metrics and close files in all diff --git a/tests/extract/data_writers/test_buffered_writer.py b/tests/extract/data_writers/test_buffered_writer.py index b6da132de9..5cad5a35b9 100644 --- a/tests/extract/data_writers/test_buffered_writer.py +++ b/tests/extract/data_writers/test_buffered_writer.py @@ -264,6 +264,27 @@ def test_import_file(writer_type: Type[DataWriter]) -> None: assert metrics.file_size == 231 +@pytest.mark.parametrize("writer_type", ALL_WRITERS) +def test_import_file_with_extension(writer_type: Type[DataWriter]) -> None: + now = time.time() + with get_writer(writer_type) as writer: + # won't destroy the original + metrics = writer.import_file( + "tests/extract/cases/imported.any", + DataWriterMetrics("", 1, 231, 0, 0), + with_extension="any", + ) + assert len(writer.closed_files) == 1 + assert os.path.isfile(metrics.file_path) + # extension is correctly set + assert metrics.file_path.endswith(".any") + assert writer.closed_files[0] == metrics + assert metrics.created <= metrics.last_modified + assert metrics.created >= now + assert metrics.items_count == 1 + assert metrics.file_size == 231 + + @pytest.mark.parametrize( "disable_compression", [True, False], ids=["no_compression", "compression"] ) From fee7af53a63fd81ca14e12d3403fc08e7a8ae252 Mon Sep 17 00:00:00 2001 From: Marcin Rudolf Date: Wed, 19 Jun 2024 12:53:22 +0200 Subject: [PATCH 075/105] handles ImportFileMeta in extractor --- dlt/extract/extractors.py | 53 ++++++++++++++++++++++++++++++++++----- 1 file changed, 47 insertions(+), 6 deletions(-) diff --git a/dlt/extract/extractors.py b/dlt/extract/extractors.py index bff035f001..4f88f8c645 100644 --- a/dlt/extract/extractors.py +++ b/dlt/extract/extractors.py @@ -4,6 +4,7 @@ from dlt.common import logger from dlt.common.configuration.inject import with_config from dlt.common.configuration.specs import BaseConfiguration, configspec +from dlt.common.data_writers import DataWriterMetrics from dlt.common.destination.capabilities import DestinationCapabilitiesContext from dlt.common.exceptions import MissingDependencyException from dlt.common.runtime.collector import Collector, NULL_COLLECTOR @@ -16,7 +17,7 @@ TTableSchemaColumns, TPartialTableSchema, ) -from dlt.extract.hints import HintsMeta +from dlt.extract.hints import HintsMeta, TResourceHints from dlt.extract.resource import DltResource from dlt.extract.items import TableNameMeta from dlt.extract.storage import ExtractorItemStorage @@ -45,6 +46,23 @@ def materialize_schema_item() -> MaterializedEmptyList: return MaterializedEmptyList() +class ImportFileMeta(HintsMeta): + __slots__ = ("file_path", "metrics", "with_extension") + + def __init__( + self, + file_path: str, + metrics: DataWriterMetrics, + with_extension: str = None, + hints: TResourceHints = None, + create_table_variant: bool = None, + ) -> None: + super().__init__(hints, create_table_variant) + self.file_path = file_path + self.metrics = metrics + self.with_extension = with_extension + + class Extractor: @configspec class ExtractorConfiguration(BaseConfiguration): @@ -76,7 +94,7 @@ def __init__( def write_items(self, resource: DltResource, items: TDataItems, meta: Any) -> None: """Write `items` to `resource` optionally computing table schemas and revalidating/filtering data""" - if isinstance(meta, HintsMeta): + if isinstance(meta, HintsMeta) and meta.hints: # update the resource with new hints, remove all caches so schema is recomputed # and contracts re-applied resource.merge_hints(meta.hints, meta.create_table_variant) @@ -91,7 +109,7 @@ def write_items(self, resource: DltResource, items: TDataItems, meta: Any) -> No self._write_to_static_table(resource, table_name, items, meta) else: # table has name or other hints depending on data items - self._write_to_dynamic_table(resource, items) + self._write_to_dynamic_table(resource, items, meta) def write_empty_items_file(self, table_name: str) -> None: table_name = self.naming.normalize_table_identifier(table_name) @@ -127,7 +145,24 @@ def _write_item( if isinstance(items, MaterializedEmptyList): self.resources_with_empty.add(resource_name) - def _write_to_dynamic_table(self, resource: DltResource, items: TDataItems) -> None: + def _import_item( + self, + table_name: str, + resource_name: str, + meta: ImportFileMeta, + ) -> None: + metrics = self.item_storage.import_items_file( + self.load_id, + self.schema.name, + table_name, + meta.file_path, + meta.metrics, + meta.with_extension, + ) + self.collector.update(table_name, inc=metrics.items_count) + self.resources_with_items.add(resource_name) + + def _write_to_dynamic_table(self, resource: DltResource, items: TDataItems, meta: Any) -> None: if not isinstance(items, list): items = [items] @@ -141,7 +176,10 @@ def _write_to_dynamic_table(self, resource: DltResource, items: TDataItems) -> N ) # write to storage with inferred table name if table_name not in self._filtered_tables: - self._write_item(table_name, resource.name, item) + if isinstance(meta, ImportFileMeta): + self._import_item(table_name, resource.name, meta) + else: + self._write_item(table_name, resource.name, items) def _write_to_static_table( self, resource: DltResource, table_name: str, items: TDataItems, meta: Any @@ -149,7 +187,10 @@ def _write_to_static_table( if table_name not in self._table_contracts: items = self._compute_and_update_table(resource, table_name, items, meta) if table_name not in self._filtered_tables: - self._write_item(table_name, resource.name, items) + if isinstance(meta, ImportFileMeta): + self._import_item(table_name, resource.name, meta) + else: + self._write_item(table_name, resource.name, items) def _compute_table(self, resource: DltResource, items: TDataItems, meta: Any) -> TTableSchema: """Computes a schema for a new or dynamic table and normalizes identifiers""" From 96c7222e756afb927a91d0d1d3e76b1c59d084f0 Mon Sep 17 00:00:00 2001 From: Marcin Rudolf Date: Wed, 19 Jun 2024 12:54:18 +0200 Subject: [PATCH 076/105] adds import file item normalizer and router to normalize --- dlt/normalize/items_normalizers.py | 22 +++ dlt/normalize/normalize.py | 231 +-------------------------- dlt/normalize/worker.py | 240 +++++++++++++++++++++++++++++ tests/normalize/test_normalize.py | 19 +-- 4 files changed, 280 insertions(+), 232 deletions(-) create mode 100644 dlt/normalize/worker.py diff --git a/dlt/normalize/items_normalizers.py b/dlt/normalize/items_normalizers.py index d5d037ebf3..e4408a83d7 100644 --- a/dlt/normalize/items_normalizers.py +++ b/dlt/normalize/items_normalizers.py @@ -407,3 +407,25 @@ def __call__(self, extracted_items_file: str, root_table_name: str) -> List[TSch ) return base_schema_update + + +class FileImportNormalizer(ItemsNormalizer): + def __call__(self, extracted_items_file: str, root_table_name: str) -> List[TSchemaUpdate]: + logger.info( + f"Table {root_table_name} {self.item_storage.writer_spec.file_format} file" + f" {extracted_items_file} will be directly imported without normalization" + ) + with self.normalize_storage.extracted_packages.storage.open_file( + extracted_items_file, "rb" + ) as f: + # TODO: sniff the schema depending on a file type + file_metrics = DataWriterMetrics(extracted_items_file, 0, f.tell(), 0, 0) + parts = ParsedLoadJobFileName.parse(extracted_items_file) + self.item_storage.import_items_file( + self.load_id, + self.schema.name, + parts.table_name, + self.normalize_storage.extracted_packages.storage.make_full_path(extracted_items_file), + file_metrics, + ) + return [] diff --git a/dlt/normalize/normalize.py b/dlt/normalize/normalize.py index 8b54e210fa..98154cd5cf 100644 --- a/dlt/normalize/normalize.py +++ b/dlt/normalize/normalize.py @@ -1,33 +1,23 @@ import os import itertools -from typing import Callable, List, Dict, NamedTuple, Sequence, Tuple, Set, Optional +from typing import List, Dict, Sequence, Optional, Callable from concurrent.futures import Future, Executor from dlt.common import logger from dlt.common.runtime.signals import sleep from dlt.common.configuration import with_config, known_sections from dlt.common.configuration.accessors import config -from dlt.common.configuration.container import Container -from dlt.common.data_writers import ( - DataWriter, - DataWriterMetrics, - TDataItemFormat, - resolve_best_writer_spec, - get_best_writer_spec, - is_native_writer, -) +from dlt.common.data_writers import DataWriterMetrics from dlt.common.data_writers.writers import EMPTY_DATA_WRITER_METRICS from dlt.common.runners import TRunMetrics, Runnable, NullExecutor from dlt.common.runtime import signals from dlt.common.runtime.collector import Collector, NULL_COLLECTOR -from dlt.common.schema.typing import TStoredSchema, TTableSchema +from dlt.common.schema.typing import TStoredSchema from dlt.common.schema.utils import merge_schema_updates from dlt.common.storages import ( NormalizeStorage, SchemaStorage, LoadStorage, - LoadStorageConfiguration, - NormalizeStorageConfiguration, ParsedLoadJobFileName, ) from dlt.common.schema import TSchemaUpdate, Schema @@ -40,20 +30,10 @@ ) from dlt.common.storages.exceptions import LoadPackageNotFound from dlt.common.storages.load_package import LoadPackageInfo -from dlt.common.utils import chunks from dlt.normalize.configuration import NormalizeConfiguration from dlt.normalize.exceptions import NormalizeJobFailed -from dlt.normalize.items_normalizers import ( - ArrowItemsNormalizer, - JsonLItemsNormalizer, - ItemsNormalizer, -) - - -class TWorkerRV(NamedTuple): - schema_updates: List[TSchemaUpdate] - file_metrics: List[DataWriterMetrics] +from dlt.normalize.worker import w_normalize_files, group_worker_files, TWorkerRV # normalize worker wrapping function signature @@ -99,181 +79,6 @@ def create_storages(self) -> None: config=self.config._load_storage_config, ) - @staticmethod - def w_normalize_files( - config: NormalizeConfiguration, - normalize_storage_config: NormalizeStorageConfiguration, - loader_storage_config: LoadStorageConfiguration, - stored_schema: TStoredSchema, - load_id: str, - extracted_items_files: Sequence[str], - ) -> TWorkerRV: - destination_caps = config.destination_capabilities - schema_updates: List[TSchemaUpdate] = [] - # normalizers are cached per table name - item_normalizers: Dict[str, ItemsNormalizer] = {} - - preferred_file_format = ( - destination_caps.preferred_loader_file_format - or destination_caps.preferred_staging_file_format - ) - # TODO: capabilities.supported_*_formats can be None, it should have defaults - supported_file_formats = destination_caps.supported_loader_file_formats or [] - supported_table_formats = destination_caps.supported_table_formats or [] - - # process all files with data items and write to buffered item storage - with Container().injectable_context(destination_caps): - schema = Schema.from_stored_schema(stored_schema) - normalize_storage = NormalizeStorage(False, normalize_storage_config) - load_storage = LoadStorage(False, supported_file_formats, loader_storage_config) - - def _get_items_normalizer( - item_format: TDataItemFormat, table_schema: Optional[TTableSchema] - ) -> ItemsNormalizer: - table_name = table_schema["name"] - if table_name in item_normalizers: - return item_normalizers[table_name] - - if ( - "table_format" in table_schema - and table_schema["table_format"] not in supported_table_formats - ): - logger.warning( - "Destination does not support the configured `table_format` value " - f"`{table_schema['table_format']}` for table `{table_schema['name']}`. " - "The setting will probably be ignored." - ) - - items_preferred_file_format = preferred_file_format - items_supported_file_formats = supported_file_formats - if destination_caps.loader_file_format_adapter is not None: - items_preferred_file_format, items_supported_file_formats = ( - destination_caps.loader_file_format_adapter( - preferred_file_format, - ( - supported_file_formats.copy() - if isinstance(supported_file_formats, list) - else supported_file_formats - ), - table_schema=table_schema, - ) - ) - - # force file format - best_writer_spec = None - if config.loader_file_format: - if config.loader_file_format in items_supported_file_formats: - # TODO: pass supported_file_formats, when used in pipeline we already checked that - # but if normalize is used standalone `supported_loader_file_formats` may be unresolved - best_writer_spec = get_best_writer_spec( - item_format, config.loader_file_format - ) - else: - logger.warning( - f"The configured value `{config.loader_file_format}` " - "for `loader_file_format` is not supported for table " - f"`{table_schema['name']}` and will be ignored. Dlt " - "will use a supported format instead." - ) - - if best_writer_spec is None: - # find best spec among possible formats taking into account destination preference - best_writer_spec = resolve_best_writer_spec( - item_format, items_supported_file_formats, items_preferred_file_format - ) - # if best_writer_spec.file_format != preferred_file_format: - # logger.warning( - # f"For data items yielded as {item_format} jobs in file format" - # f" {preferred_file_format} cannot be created." - # f" {best_writer_spec.file_format} jobs will be used instead." - # " This may decrease the performance." - # ) - item_storage = load_storage.create_item_storage(best_writer_spec) - if not is_native_writer(item_storage.writer_cls): - logger.warning( - f"For data items yielded as {item_format} and job file format" - f" {best_writer_spec.file_format} native writer could not be found. A" - f" {item_storage.writer_cls.__name__} writer is used that internally" - f" converts {item_format}. This will degrade performance." - ) - cls = ArrowItemsNormalizer if item_format == "arrow" else JsonLItemsNormalizer - logger.info( - f"Created items normalizer {cls.__name__} with writer" - f" {item_storage.writer_cls.__name__} for item format {item_format} and file" - f" format {item_storage.writer_spec.file_format}" - ) - norm = item_normalizers[table_name] = cls( - item_storage, - normalize_storage, - schema, - load_id, - config, - ) - return norm - - def _gather_metrics_and_close( - parsed_fn: ParsedLoadJobFileName, in_exception: bool - ) -> List[DataWriterMetrics]: - writer_metrics: List[DataWriterMetrics] = [] - try: - try: - for normalizer in item_normalizers.values(): - normalizer.item_storage.close_writers(load_id, skip_flush=in_exception) - except Exception: - # if we had exception during flushing the writers, close them without flushing - if not in_exception: - for normalizer in item_normalizers.values(): - normalizer.item_storage.close_writers(load_id, skip_flush=True) - raise - finally: - # always gather metrics - for normalizer in item_normalizers.values(): - norm_metrics = normalizer.item_storage.closed_files(load_id) - writer_metrics.extend(norm_metrics) - for normalizer in item_normalizers.values(): - normalizer.item_storage.remove_closed_files(load_id) - except Exception as exc: - if in_exception: - # swallow exception if we already handle exceptions - return writer_metrics - else: - # enclose the exception during the closing in job failed exception - job_id = parsed_fn.job_id() if parsed_fn else "" - raise NormalizeJobFailed(load_id, job_id, str(exc), writer_metrics) - return writer_metrics - - parsed_file_name: ParsedLoadJobFileName = None - try: - root_tables: Set[str] = set() - for extracted_items_file in extracted_items_files: - parsed_file_name = ParsedLoadJobFileName.parse(extracted_items_file) - # normalize table name in case the normalization changed - # NOTE: this is the best we can do, until a full lineage information is in the schema - root_table_name = schema.naming.normalize_table_identifier( - parsed_file_name.table_name - ) - root_tables.add(root_table_name) - normalizer = _get_items_normalizer( - DataWriter.item_format_from_file_extension(parsed_file_name.file_format), - stored_schema["tables"].get(root_table_name, {"name": root_table_name}), - ) - logger.debug( - f"Processing extracted items in {extracted_items_file} in load_id" - f" {load_id} with table name {root_table_name} and schema {schema.name}" - ) - partial_updates = normalizer(extracted_items_file, root_table_name) - schema_updates.extend(partial_updates) - logger.debug(f"Processed file {extracted_items_file}") - except Exception as exc: - job_id = parsed_file_name.job_id() if parsed_file_name else "" - writer_metrics = _gather_metrics_and_close(parsed_file_name, in_exception=True) - raise NormalizeJobFailed(load_id, job_id, str(exc), writer_metrics) from exc - else: - writer_metrics = _gather_metrics_and_close(parsed_file_name, in_exception=False) - - logger.info(f"Processed all items in {len(extracted_items_files)} files") - return TWorkerRV(schema_updates, writer_metrics) - def update_schema(self, schema: Schema, schema_updates: List[TSchemaUpdate]) -> None: for schema_update in schema_updates: for table_name, table_updates in schema_update.items(): @@ -284,26 +89,9 @@ def update_schema(self, schema: Schema, schema_updates: List[TSchemaUpdate]) -> # merge columns where we expect identifiers to be normalized schema.update_table(partial_table, normalize_identifiers=False) - @staticmethod - def group_worker_files(files: Sequence[str], no_groups: int) -> List[Sequence[str]]: - # sort files so the same tables are in the same worker - files = list(sorted(files)) - - chunk_size = max(len(files) // no_groups, 1) - chunk_files = list(chunks(files, chunk_size)) - # distribute the remainder files to existing groups starting from the end - remainder_l = len(chunk_files) - no_groups - l_idx = 0 - while remainder_l > 0: - for idx, file in enumerate(reversed(chunk_files.pop())): - chunk_files[-l_idx - idx - remainder_l].append(file) # type: ignore - remainder_l -= 1 - l_idx = idx + 1 - return chunk_files - def map_parallel(self, schema: Schema, load_id: str, files: Sequence[str]) -> TWorkerRV: workers: int = getattr(self.pool, "_max_workers", 1) - chunk_files = self.group_worker_files(files, workers) + chunk_files = group_worker_files(files, workers) schema_dict: TStoredSchema = schema.to_dict() param_chunk = [ ( @@ -319,10 +107,7 @@ def map_parallel(self, schema: Schema, load_id: str, files: Sequence[str]) -> TW # return stats summary = TWorkerRV([], []) # push all tasks to queue - tasks = [ - (self.pool.submit(Normalize.w_normalize_files, *params), params) - for params in param_chunk - ] + tasks = [(self.pool.submit(w_normalize_files, *params), params) for params in param_chunk] while len(tasks) > 0: sleep(0.3) @@ -358,7 +143,7 @@ def map_parallel(self, schema: Schema, load_id: str, files: Sequence[str]) -> TW # TODO: it's time for a named tuple params = params[:3] + (schema_dict,) + params[4:] retry_pending: Future[TWorkerRV] = self.pool.submit( - Normalize.w_normalize_files, *params + w_normalize_files, *params ) tasks.append((retry_pending, params)) # remove finished tasks @@ -368,7 +153,7 @@ def map_parallel(self, schema: Schema, load_id: str, files: Sequence[str]) -> TW return summary def map_single(self, schema: Schema, load_id: str, files: Sequence[str]) -> TWorkerRV: - result = Normalize.w_normalize_files( + result = w_normalize_files( self.config, self.normalize_storage.config, self.load_storage.config, diff --git a/dlt/normalize/worker.py b/dlt/normalize/worker.py new file mode 100644 index 0000000000..1338dac984 --- /dev/null +++ b/dlt/normalize/worker.py @@ -0,0 +1,240 @@ +from typing import Callable, List, Dict, NamedTuple, Sequence, Set, Optional, Type + +from dlt.common import logger +from dlt.common.configuration.container import Container +from dlt.common.data_writers import ( + DataWriter, + DataWriterMetrics, + create_import_spec, + resolve_best_writer_spec, + get_best_writer_spec, + is_native_writer, +) +from dlt.common.utils import chunks +from dlt.common.schema.typing import TStoredSchema, TTableSchema +from dlt.common.storages import ( + NormalizeStorage, + LoadStorage, + LoadStorageConfiguration, + NormalizeStorageConfiguration, + ParsedLoadJobFileName, +) +from dlt.common.schema import TSchemaUpdate, Schema + +from dlt.normalize.configuration import NormalizeConfiguration +from dlt.normalize.exceptions import NormalizeJobFailed +from dlt.normalize.items_normalizers import ( + ArrowItemsNormalizer, + FileImportNormalizer, + JsonLItemsNormalizer, + ItemsNormalizer, +) + + +class TWorkerRV(NamedTuple): + schema_updates: List[TSchemaUpdate] + file_metrics: List[DataWriterMetrics] + + +def group_worker_files(files: Sequence[str], no_groups: int) -> List[Sequence[str]]: + # sort files so the same tables are in the same worker + files = list(sorted(files)) + + chunk_size = max(len(files) // no_groups, 1) + chunk_files = list(chunks(files, chunk_size)) + # distribute the remainder files to existing groups starting from the end + remainder_l = len(chunk_files) - no_groups + l_idx = 0 + while remainder_l > 0: + for idx, file in enumerate(reversed(chunk_files.pop())): + chunk_files[-l_idx - idx - remainder_l].append(file) # type: ignore + remainder_l -= 1 + l_idx = idx + 1 + return chunk_files + + +def w_normalize_files( + config: NormalizeConfiguration, + normalize_storage_config: NormalizeStorageConfiguration, + loader_storage_config: LoadStorageConfiguration, + stored_schema: TStoredSchema, + load_id: str, + extracted_items_files: Sequence[str], +) -> TWorkerRV: + destination_caps = config.destination_capabilities + schema_updates: List[TSchemaUpdate] = [] + # normalizers are cached per table name + item_normalizers: Dict[str, ItemsNormalizer] = {} + + preferred_file_format = ( + destination_caps.preferred_loader_file_format + or destination_caps.preferred_staging_file_format + ) + # TODO: capabilities.supported_*_formats can be None, it should have defaults + supported_file_formats = destination_caps.supported_loader_file_formats or [] + supported_table_formats = destination_caps.supported_table_formats or [] + + # process all files with data items and write to buffered item storage + with Container().injectable_context(destination_caps): + schema = Schema.from_stored_schema(stored_schema) + normalize_storage = NormalizeStorage(False, normalize_storage_config) + load_storage = LoadStorage(False, supported_file_formats, loader_storage_config) + + def _get_items_normalizer( + parsed_file_name: ParsedLoadJobFileName, table_schema: Optional[TTableSchema] + ) -> ItemsNormalizer: + item_format = DataWriter.item_format_from_file_extension(parsed_file_name.file_format) + + table_name = table_schema["name"] + if table_name in item_normalizers: + return item_normalizers[table_name] + + if ( + "table_format" in table_schema + and table_schema["table_format"] not in supported_table_formats + ): + logger.warning( + "Destination does not support the configured `table_format` value " + f"`{table_schema['table_format']}` for table `{table_schema['name']}`. " + "The setting will probably be ignored." + ) + + items_preferred_file_format = preferred_file_format + items_supported_file_formats = supported_file_formats + if destination_caps.loader_file_format_adapter is not None: + items_preferred_file_format, items_supported_file_formats = ( + destination_caps.loader_file_format_adapter( + preferred_file_format, + ( + supported_file_formats.copy() + if isinstance(supported_file_formats, list) + else supported_file_formats + ), + table_schema=table_schema, + ) + ) + + best_writer_spec = None + if item_format == "file": + # if we want to import file, create a spec that may be used only for importing + best_writer_spec = create_import_spec( + parsed_file_name.file_format, items_supported_file_formats # type: ignore[arg-type] + ) + + if config.loader_file_format and best_writer_spec is None: + # force file format + if config.loader_file_format in items_supported_file_formats: + # TODO: pass supported_file_formats, when used in pipeline we already checked that + # but if normalize is used standalone `supported_loader_file_formats` may be unresolved + best_writer_spec = get_best_writer_spec(item_format, config.loader_file_format) + else: + logger.warning( + f"The configured value `{config.loader_file_format}` " + "for `loader_file_format` is not supported for table " + f"`{table_schema['name']}` and will be ignored. Dlt " + "will use a supported format instead." + ) + + if best_writer_spec is None: + # find best spec among possible formats taking into account destination preference + best_writer_spec = resolve_best_writer_spec( + item_format, items_supported_file_formats, items_preferred_file_format + ) + # if best_writer_spec.file_format != preferred_file_format: + # logger.warning( + # f"For data items yielded as {item_format} jobs in file format" + # f" {preferred_file_format} cannot be created." + # f" {best_writer_spec.file_format} jobs will be used instead." + # " This may decrease the performance." + # ) + item_storage = load_storage.create_item_storage(best_writer_spec) + if not is_native_writer(item_storage.writer_cls): + logger.warning( + f"For data items yielded as {item_format} and job file format" + f" {best_writer_spec.file_format} native writer could not be found. A" + f" {item_storage.writer_cls.__name__} writer is used that internally" + f" converts {item_format}. This will degrade performance." + ) + cls: Type[ItemsNormalizer] + if item_format == "arrow": + cls = ArrowItemsNormalizer + elif item_format == "object": + cls = JsonLItemsNormalizer + else: + cls = FileImportNormalizer + logger.info( + f"Created items normalizer {cls.__name__} with writer" + f" {item_storage.writer_cls.__name__} for item format {item_format} and file" + f" format {item_storage.writer_spec.file_format}" + ) + norm = item_normalizers[table_name] = cls( + item_storage, + normalize_storage, + schema, + load_id, + config, + ) + return norm + + def _gather_metrics_and_close( + parsed_fn: ParsedLoadJobFileName, in_exception: bool + ) -> List[DataWriterMetrics]: + writer_metrics: List[DataWriterMetrics] = [] + try: + try: + for normalizer in item_normalizers.values(): + normalizer.item_storage.close_writers(load_id, skip_flush=in_exception) + except Exception: + # if we had exception during flushing the writers, close them without flushing + if not in_exception: + for normalizer in item_normalizers.values(): + normalizer.item_storage.close_writers(load_id, skip_flush=True) + raise + finally: + # always gather metrics + for normalizer in item_normalizers.values(): + norm_metrics = normalizer.item_storage.closed_files(load_id) + writer_metrics.extend(norm_metrics) + for normalizer in item_normalizers.values(): + normalizer.item_storage.remove_closed_files(load_id) + except Exception as exc: + if in_exception: + # swallow exception if we already handle exceptions + return writer_metrics + else: + # enclose the exception during the closing in job failed exception + job_id = parsed_fn.job_id() if parsed_fn else "" + raise NormalizeJobFailed(load_id, job_id, str(exc), writer_metrics) + return writer_metrics + + parsed_file_name: ParsedLoadJobFileName = None + try: + root_tables: Set[str] = set() + for extracted_items_file in extracted_items_files: + parsed_file_name = ParsedLoadJobFileName.parse(extracted_items_file) + # normalize table name in case the normalization changed + # NOTE: this is the best we can do, until a full lineage information is in the schema + root_table_name = schema.naming.normalize_table_identifier( + parsed_file_name.table_name + ) + root_tables.add(root_table_name) + normalizer = _get_items_normalizer( + parsed_file_name, + stored_schema["tables"].get(root_table_name, {"name": root_table_name}), + ) + logger.debug( + f"Processing extracted items in {extracted_items_file} in load_id" + f" {load_id} with table name {root_table_name} and schema {schema.name}" + ) + partial_updates = normalizer(extracted_items_file, root_table_name) + schema_updates.extend(partial_updates) + logger.debug(f"Processed file {extracted_items_file}") + except Exception as exc: + job_id = parsed_file_name.job_id() if parsed_file_name else "" + writer_metrics = _gather_metrics_and_close(parsed_file_name, in_exception=True) + raise NormalizeJobFailed(load_id, job_id, str(exc), writer_metrics) from exc + else: + writer_metrics = _gather_metrics_and_close(parsed_file_name, in_exception=False) + + logger.info(f"Processed all items in {len(extracted_items_files)} files") + return TWorkerRV(schema_updates, writer_metrics) diff --git a/tests/normalize/test_normalize.py b/tests/normalize/test_normalize.py index 2c130f886b..7463184be7 100644 --- a/tests/normalize/test_normalize.py +++ b/tests/normalize/test_normalize.py @@ -16,6 +16,7 @@ from dlt.extract.extract import ExtractStorage from dlt.normalize import Normalize +from dlt.normalize.worker import group_worker_files from dlt.normalize.exceptions import NormalizeJobFailed from tests.cases import JSON_TYPED_DICT, JSON_TYPED_DICT_TYPES @@ -510,28 +511,28 @@ def test_collect_metrics_on_exception(raw_normalize: Normalize) -> None: def test_group_worker_files() -> None: files = ["f%03d" % idx for idx in range(0, 100)] - assert Normalize.group_worker_files([], 4) == [] - assert Normalize.group_worker_files(["f001"], 1) == [["f001"]] - assert Normalize.group_worker_files(["f001"], 100) == [["f001"]] - assert Normalize.group_worker_files(files[:4], 4) == [["f000"], ["f001"], ["f002"], ["f003"]] - assert Normalize.group_worker_files(files[:5], 4) == [ + assert group_worker_files([], 4) == [] + assert group_worker_files(["f001"], 1) == [["f001"]] + assert group_worker_files(["f001"], 100) == [["f001"]] + assert group_worker_files(files[:4], 4) == [["f000"], ["f001"], ["f002"], ["f003"]] + assert group_worker_files(files[:5], 4) == [ ["f000"], ["f001"], ["f002"], ["f003", "f004"], ] - assert Normalize.group_worker_files(files[:8], 4) == [ + assert group_worker_files(files[:8], 4) == [ ["f000", "f001"], ["f002", "f003"], ["f004", "f005"], ["f006", "f007"], ] - assert Normalize.group_worker_files(files[:8], 3) == [ + assert group_worker_files(files[:8], 3) == [ ["f000", "f001"], ["f002", "f003", "f006"], ["f004", "f005", "f007"], ] - assert Normalize.group_worker_files(files[:5], 3) == [ + assert group_worker_files(files[:5], 3) == [ ["f000"], ["f001", "f003"], ["f002", "f004"], @@ -539,7 +540,7 @@ def test_group_worker_files() -> None: # check if sorted files = ["tab1.1", "chd.3", "tab1.2", "chd.4", "tab1.3"] - assert Normalize.group_worker_files(files, 3) == [ + assert group_worker_files(files, 3) == [ ["chd.3"], ["chd.4", "tab1.2"], ["tab1.1", "tab1.3"], From 116add0c3cec596646f5e9d1dd2f556bd8751ddc Mon Sep 17 00:00:00 2001 From: Marcin Rudolf Date: Wed, 19 Jun 2024 12:55:21 +0200 Subject: [PATCH 077/105] supports csv format config for snowflake --- .../impl/snowflake/configuration.py | 4 +++ dlt/destinations/impl/snowflake/snowflake.py | 32 ++++++++++++++++--- 2 files changed, 31 insertions(+), 5 deletions(-) diff --git a/dlt/destinations/impl/snowflake/configuration.py b/dlt/destinations/impl/snowflake/configuration.py index c8cc805712..eed4e23b87 100644 --- a/dlt/destinations/impl/snowflake/configuration.py +++ b/dlt/destinations/impl/snowflake/configuration.py @@ -3,6 +3,7 @@ from typing import Final, Optional, Any, Dict, ClassVar, List, TYPE_CHECKING, Union from dlt import version +from dlt.common.data_writers.writers import CsvDataWriterConfiguration from dlt.common.libs.sql_alchemy import URL from dlt.common.exceptions import MissingDependencyException from dlt.common.typing import TSecretStrValue @@ -138,6 +139,9 @@ class SnowflakeClientConfiguration(DestinationClientDwhWithStagingConfiguration) keep_staged_files: bool = True """Whether to keep or delete the staged files after COPY INTO succeeds""" + csv_format: Optional[CsvDataWriterConfiguration] = None + """Optional csv format configuration""" + def fingerprint(self) -> str: """Returns a fingerprint of host part of a connection string""" if self.credentials and self.credentials.host: diff --git a/dlt/destinations/impl/snowflake/snowflake.py b/dlt/destinations/impl/snowflake/snowflake.py index 20fa1a2886..83ae23f752 100644 --- a/dlt/destinations/impl/snowflake/snowflake.py +++ b/dlt/destinations/impl/snowflake/snowflake.py @@ -1,6 +1,7 @@ from typing import ClassVar, Optional, Sequence, Tuple, List, Any from urllib.parse import urlparse, urlunparse +from dlt.common.data_writers.writers import CsvDataWriterConfiguration from dlt.common.destination import DestinationCapabilitiesContext from dlt.common.destination.reference import ( FollowupJob, @@ -85,6 +86,7 @@ def __init__( table_name: str, load_id: str, client: SnowflakeSqlClient, + config: SnowflakeClientConfiguration, stage_name: Optional[str] = None, keep_staged_files: bool = True, staging_credentials: Optional[CredentialsConfiguration] = None, @@ -107,6 +109,14 @@ def __init__( credentials_clause = "" files_clause = "" stage_file_path = "" + on_error_clause = "" + + case_folding = ( + "CASE_SENSITIVE" + if client.capabilities.casefold_identifier is str + else "CASE_INSENSITIVE" + ) + column_match_clause = f"MATCH_BY_COLUMN_NAME='{case_folding}'" if bucket_path: bucket_url = urlparse(bucket_path) @@ -166,14 +176,24 @@ def __init__( if file_name.endswith("jsonl"): source_format = "( TYPE = 'JSON', BINARY_FORMAT = 'BASE64' )" if file_name.endswith("parquet"): - source_format = "(TYPE = 'PARQUET', BINARY_AS_TEXT = FALSE, USE_LOGICAL_TYPE = TRUE)" + source_format = ( + "(TYPE = 'PARQUET', BINARY_AS_TEXT = FALSE, USE_LOGICAL_TYPE = TRUE," + " USE_VECTORIZED_SCANNER = TRUE)" + ) if file_name.endswith("csv"): # empty strings are NULL, no data is NULL, missing columns (ERROR_ON_COLUMN_COUNT_MISMATCH) are NULL + csv_format = config.csv_format or CsvDataWriterConfiguration() source_format = ( - "(TYPE = 'CSV', BINARY_FORMAT = 'UTF-8', PARSE_HEADER = TRUE," - " FIELD_OPTIONALLY_ENCLOSED_BY = '\"', NULL_IF = ('')," - " ERROR_ON_COLUMN_COUNT_MISMATCH = FALSE)" + "(TYPE = 'CSV', BINARY_FORMAT = 'UTF-8', PARSE_HEADER =" + f" {csv_format.include_header}, FIELD_OPTIONALLY_ENCLOSED_BY = '\"', NULL_IF =" + " (''), ERROR_ON_COLUMN_COUNT_MISMATCH = FALSE," + f" FIELD_DELIMITER='{csv_format.delimiter}')" ) + # disable column match if headers are not provided + if not csv_format.include_header: + column_match_clause = "" + if csv_format.on_error_continue: + on_error_clause = "ON_ERROR = CONTINUE" with client.begin_transaction(): # PUT and COPY in one tx if local file, otherwise only copy @@ -187,7 +207,8 @@ def __init__( {files_clause} {credentials_clause} FILE_FORMAT = {source_format} - MATCH_BY_COLUMN_NAME='CASE_INSENSITIVE' + {column_match_clause} + {on_error_clause} """) if stage_file_path and not keep_staged_files: client.execute_sql(f"REMOVE {stage_file_path}") @@ -223,6 +244,7 @@ def start_file_load(self, table: TTableSchema, file_path: str, load_id: str) -> table["name"], load_id, self.sql_client, + self.config, stage_name=self.config.stage_name, keep_staged_files=self.config.keep_staged_files, staging_credentials=( From 42eacaf20d83c5ee68aa072167b3dc2a5e407e71 Mon Sep 17 00:00:00 2001 From: Marcin Rudolf Date: Thu, 20 Jun 2024 23:55:24 +0200 Subject: [PATCH 078/105] removes realpath wherever possible and adds fast make_full_path to FileStorage --- dlt/common/storages/file_storage.py | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/dlt/common/storages/file_storage.py b/dlt/common/storages/file_storage.py index d768ec720a..7d14b8f7f7 100644 --- a/dlt/common/storages/file_storage.py +++ b/dlt/common/storages/file_storage.py @@ -6,7 +6,7 @@ import tempfile import shutil import pathvalidate -from typing import IO, Any, Optional, List, cast, overload +from typing import IO, Any, Optional, List, cast from dlt.common.typing import AnyFun from dlt.common.utils import encoding_for_mode, uniq_id @@ -18,7 +18,7 @@ class FileStorage: def __init__(self, storage_path: str, file_type: str = "t", makedirs: bool = False) -> None: # make it absolute path - self.storage_path = os.path.realpath(storage_path) # os.path.join(, '') + self.storage_path = os.path.realpath(storage_path) self.file_type = file_type if makedirs: os.makedirs(storage_path, exist_ok=True) @@ -243,7 +243,8 @@ def atomic_import( FileStorage.move_atomic_to_file(external_file_path, dest_file_path) ) - def in_storage(self, path: str) -> bool: + def is_path_in_storage(self, path: str) -> bool: + """Checks if a given path is below storage root, without checking for item existence""" assert path is not None # all paths are relative to root if not os.path.isabs(path): @@ -256,25 +257,30 @@ def in_storage(self, path: str) -> bool: def to_relative_path(self, path: str) -> str: if path == "": return "" - if not self.in_storage(path): + if not self.is_path_in_storage(path): raise ValueError(path) if not os.path.isabs(path): path = os.path.realpath(os.path.join(self.storage_path, path)) # for abs paths find the relative return os.path.relpath(path, start=self.storage_path) - def make_full_path(self, path: str) -> str: + def make_full_path_safe(self, path: str) -> str: + """Verifies that path is under storage root and then returns normalized absolute path""" # try to make a relative path if paths are absolute or overlapping path = self.to_relative_path(path) # then assume that it is a path relative to storage root return os.path.realpath(os.path.join(self.storage_path, path)) + def make_full_path(self, path: str) -> str: + """Joins path with storage root. Intended for path known to be relative to storage root""" + return os.path.join(self.storage_path, path) + def from_wd_to_relative_path(self, wd_relative_path: str) -> str: path = os.path.realpath(wd_relative_path) return self.to_relative_path(path) def from_relative_path_to_wd(self, relative_path: str) -> str: - return os.path.relpath(self.make_full_path(relative_path), start=".") + return os.path.relpath(self.make_full_path_safe(relative_path), start=".") @staticmethod def get_file_name_from_file_path(file_path: str) -> str: From 3793d069c8de20b2c0b18bfc9994098d53b23748 Mon Sep 17 00:00:00 2001 From: Marcin Rudolf Date: Thu, 20 Jun 2024 23:55:58 +0200 Subject: [PATCH 079/105] adds additional methods to load_package storage to make listings faster --- dlt/common/destination/reference.py | 2 +- dlt/common/storages/exceptions.py | 17 ++ dlt/common/storages/load_package.py | 184 +++++++++++++----- dlt/destinations/impl/dummy/dummy.py | 2 +- .../impl/filesystem/filesystem.py | 12 +- dlt/destinations/job_client_impl.py | 6 +- dlt/extract/extractors.py | 2 +- dlt/load/configuration.py | 2 - dlt/load/load.py | 39 ++-- dlt/load/utils.py | 10 +- dlt/pipeline/dbt.py | 2 +- tests/common/storages/test_file_storage.py | 36 ++-- tests/common/storages/test_load_package.py | 178 ++++++++++++++++- tests/common/storages/test_load_storage.py | 12 +- tests/load/test_dummy_client.py | 55 ++---- tests/load/utils.py | 2 +- 16 files changed, 415 insertions(+), 146 deletions(-) diff --git a/dlt/common/destination/reference.py b/dlt/common/destination/reference.py index 374dac146b..4141057196 100644 --- a/dlt/common/destination/reference.py +++ b/dlt/common/destination/reference.py @@ -334,7 +334,7 @@ def should_truncate_table_before_load(self, table: TTableSchema) -> bool: def create_table_chain_completed_followup_jobs( self, table_chain: Sequence[TTableSchema], - table_chain_jobs: Optional[Sequence[LoadJobInfo]] = None, + completed_table_chain_jobs: Optional[Sequence[LoadJobInfo]] = None, ) -> List[NewLoadJob]: """Creates a list of followup jobs that should be executed after a table chain is completed""" return [] diff --git a/dlt/common/storages/exceptions.py b/dlt/common/storages/exceptions.py index 26a76bb5c0..028491dd9c 100644 --- a/dlt/common/storages/exceptions.py +++ b/dlt/common/storages/exceptions.py @@ -79,6 +79,23 @@ def __init__(self, load_id: str) -> None: super().__init__(f"Package with load id {load_id} could not be found") +class LoadPackageAlreadyCompleted(LoadStorageException): + def __init__(self, load_id: str) -> None: + self.load_id = load_id + super().__init__( + f"Package with load id {load_id} is already completed, but another complete was" + " requested" + ) + + +class LoadPackageNotCompleted(LoadStorageException): + def __init__(self, load_id: str) -> None: + self.load_id = load_id + super().__init__( + f"Package with load id {load_id} is not yet completed, but method required that" + ) + + class SchemaStorageException(StorageException): pass diff --git a/dlt/common/storages/load_package.py b/dlt/common/storages/load_package.py index 4d72458e3e..f29a3f4446 100644 --- a/dlt/common/storages/load_package.py +++ b/dlt/common/storages/load_package.py @@ -5,7 +5,7 @@ import datetime # noqa: 251 import humanize -from pathlib import Path +from pathlib import PurePath from pendulum.datetime import DateTime from typing import ( ClassVar, @@ -37,7 +37,12 @@ from dlt.common.schema import Schema, TSchemaTables from dlt.common.schema.typing import TStoredSchema, TTableSchemaColumns, TTableSchema from dlt.common.storages import FileStorage -from dlt.common.storages.exceptions import LoadPackageNotFound, CurrentLoadPackageStateNotAvailable +from dlt.common.storages.exceptions import ( + LoadPackageAlreadyCompleted, + LoadPackageNotCompleted, + LoadPackageNotFound, + CurrentLoadPackageStateNotAvailable, +) from dlt.common.typing import DictStrAny, SupportsHumanize from dlt.common.utils import flatten_list_or_items from dlt.common.versioned_state import ( @@ -52,6 +57,7 @@ TJobFileFormat = Literal["sql", "reference", TLoaderFileFormat] """Loader file formats with internal job types""" +JOB_EXCEPTION_EXTENSION = ".exception" class TPipelineStateDoc(TypedDict, total=False): @@ -165,7 +171,7 @@ def with_retry(self) -> "ParsedLoadJobFileName": @staticmethod def parse(file_name: str) -> "ParsedLoadJobFileName": - p = Path(file_name) + p = PurePath(file_name) parts = p.name.split(".") if len(parts) != 4: raise TerminalValueError(parts) @@ -319,13 +325,16 @@ def __init__(self, storage: FileStorage, initial_state: TLoadPackageStatus) -> N # def get_package_path(self, load_id: str) -> str: + """Gets path of the package relative to storage root""" return load_id - def get_job_folder_path(self, load_id: str, folder: TJobState) -> str: - return os.path.join(self.get_package_path(load_id), folder) + def get_job_state_folder_path(self, load_id: str, state: TJobState) -> str: + """Gets path to the jobs in `state` in package `load_id`, relative to the storage root""" + return os.path.join(self.get_package_path(load_id), state) - def get_job_file_path(self, load_id: str, folder: TJobState, file_name: str) -> str: - return os.path.join(self.get_job_folder_path(load_id, folder), file_name) + def get_job_file_path(self, load_id: str, state: TJobState, file_name: str) -> str: + """Get path to job with `file_name` in `state` in package `load_id`, relative to the storage root""" + return os.path.join(self.get_job_state_folder_path(load_id, state), file_name) def list_packages(self) -> Sequence[str]: """Lists all load ids in storage, earliest first @@ -338,29 +347,42 @@ def list_packages(self) -> Sequence[str]: def list_new_jobs(self, load_id: str) -> Sequence[str]: new_jobs = self.storage.list_folder_files( - self.get_job_folder_path(load_id, PackageStorage.NEW_JOBS_FOLDER) + self.get_job_state_folder_path(load_id, PackageStorage.NEW_JOBS_FOLDER) ) return new_jobs def list_started_jobs(self, load_id: str) -> Sequence[str]: return self.storage.list_folder_files( - self.get_job_folder_path(load_id, PackageStorage.STARTED_JOBS_FOLDER) + self.get_job_state_folder_path(load_id, PackageStorage.STARTED_JOBS_FOLDER) ) def list_failed_jobs(self, load_id: str) -> Sequence[str]: - return self.storage.list_folder_files( - self.get_job_folder_path(load_id, PackageStorage.FAILED_JOBS_FOLDER) - ) - - def list_jobs_for_table(self, load_id: str, table_name: str) -> Sequence[LoadJobInfo]: - return self.filter_jobs_for_table(self.list_all_jobs(load_id), table_name) - - def list_all_jobs(self, load_id: str) -> Sequence[LoadJobInfo]: - info = self.get_load_package_info(load_id) - return [job for job in flatten_list_or_items(iter(info.jobs.values()))] # type: ignore + return [ + file + for file in self.storage.list_folder_files( + self.get_job_state_folder_path(load_id, PackageStorage.FAILED_JOBS_FOLDER) + ) + if not file.endswith(JOB_EXCEPTION_EXTENSION) + ] + + def list_job_with_states_for_table( + self, load_id: str, table_name: str + ) -> Sequence[Tuple[TJobState, ParsedLoadJobFileName]]: + return self.filter_jobs_for_table(self.list_all_jobs_with_states(load_id), table_name) + + def list_all_jobs_with_states( + self, load_id: str + ) -> Sequence[Tuple[TJobState, ParsedLoadJobFileName]]: + info = self.get_load_package_jobs(load_id) + state_jobs = [] + for state, jobs in info.items(): + state_jobs.extend([(state, job) for job in jobs]) + return state_jobs def list_failed_jobs_infos(self, load_id: str) -> Sequence[LoadJobInfo]: """List all failed jobs and associated error messages for a load package with `load_id`""" + if not self.is_package_completed(load_id): + raise LoadPackageNotCompleted(load_id) failed_jobs: List[LoadJobInfo] = [] package_path = self.get_package_path(load_id) package_created_at = pendulum.from_timestamp( @@ -371,12 +393,19 @@ def list_failed_jobs_infos(self, load_id: str) -> Sequence[LoadJobInfo]: ) ) for file in self.list_failed_jobs(load_id): - if not file.endswith(".exception"): - failed_jobs.append( - self._read_job_file_info("failed_jobs", file, package_created_at) + failed_jobs.append( + self._read_job_file_info( + load_id, "failed_jobs", ParsedLoadJobFileName.parse(file), package_created_at ) + ) return failed_jobs + def is_package_completed(self, load_id: str) -> bool: + package_path = self.get_package_path(load_id) + return self.storage.has_file( + os.path.join(package_path, PackageStorage.PACKAGE_COMPLETED_FILE_NAME) + ) + # # Move jobs # @@ -385,7 +414,9 @@ def import_job( self, load_id: str, job_file_path: str, job_state: TJobState = "new_jobs" ) -> None: """Adds new job by moving the `job_file_path` into `new_jobs` of package `load_id`""" - self.storage.atomic_import(job_file_path, self.get_job_folder_path(load_id, job_state)) + self.storage.atomic_import( + job_file_path, self.get_job_state_folder_path(load_id, job_state) + ) def start_job(self, load_id: str, file_name: str) -> str: return self._move_job( @@ -397,7 +428,7 @@ def fail_job(self, load_id: str, file_name: str, failed_message: Optional[str]) if failed_message: self.storage.save( self.get_job_file_path( - load_id, PackageStorage.FAILED_JOBS_FOLDER, file_name + ".exception" + load_id, PackageStorage.FAILED_JOBS_FOLDER, file_name + JOB_EXCEPTION_EXTENSION ), failed_message, ) @@ -455,6 +486,8 @@ def create_package(self, load_id: str, initial_state: TLoadPackageState = None) def complete_loading_package(self, load_id: str, load_state: TLoadPackageStatus) -> str: """Completes loading the package by writing marker file with`package_state. Returns path to the completed package""" load_path = self.get_package_path(load_id) + if self.is_package_completed(load_id): + raise LoadPackageAlreadyCompleted(load_id) # save marker file self.storage.save( os.path.join(load_path, PackageStorage.PACKAGE_COMPLETED_FILE_NAME), load_state @@ -468,7 +501,7 @@ def remove_completed_jobs(self, load_id: str) -> None: # delete completed jobs if not has_failed_jobs: self.storage.delete_folder( - self.get_job_folder_path(load_id, PackageStorage.COMPLETED_JOBS_FOLDER), + self.get_job_state_folder_path(load_id, PackageStorage.COMPLETED_JOBS_FOLDER), recursively=True, ) @@ -533,11 +566,32 @@ def get_load_package_state_path(self, load_id: str) -> str: # Get package info # - def get_load_package_info(self, load_id: str) -> LoadPackageInfo: - """Gets information on normalized/completed package with given load_id, all jobs and their statuses.""" + def get_load_package_jobs(self, load_id: str) -> Dict[TJobState, List[ParsedLoadJobFileName]]: + """Gets all jobs in a package and returns them as lists assigned to a particular state.""" package_path = self.get_package_path(load_id) if not self.storage.has_folder(package_path): raise LoadPackageNotFound(load_id) + all_jobs: Dict[TJobState, List[ParsedLoadJobFileName]] = {} + for state in WORKING_FOLDERS: + jobs: List[ParsedLoadJobFileName] = [] + with contextlib.suppress(FileNotFoundError): + # we ignore if load package lacks one of working folders. completed_jobs may be deleted on archiving + for file in self.storage.list_folder_files( + self.get_job_state_folder_path(load_id, state), to_root=False + ): + if not file.endswith(JOB_EXCEPTION_EXTENSION): + jobs.append(ParsedLoadJobFileName.parse(file)) + all_jobs[state] = jobs + return all_jobs + + def get_load_package_info(self, load_id: str) -> LoadPackageInfo: + """Gets information on normalized/completed package with given load_id, all jobs and their statuses. + + Will reach to the file system to get additional stats, mtime, also collects exceptions for failed jobs. + NOTE: do not call this function often. it should be used only to generate metrics + """ + package_path = self.get_package_path(load_id) + package_jobs = self.get_load_package_jobs(load_id) package_created_at: DateTime = None package_state = self.initial_state @@ -560,15 +614,11 @@ def get_load_package_info(self, load_id: str) -> LoadPackageInfo: schema = Schema.from_dict(self._load_schema(load_id)) # read jobs with all statuses - all_jobs: Dict[TJobState, List[LoadJobInfo]] = {} - for state in WORKING_FOLDERS: - jobs: List[LoadJobInfo] = [] - with contextlib.suppress(FileNotFoundError): - # we ignore if load package lacks one of working folders. completed_jobs may be deleted on archiving - for file in self.storage.list_folder_files(os.path.join(package_path, state)): - if not file.endswith(".exception"): - jobs.append(self._read_job_file_info(state, file, package_created_at)) - all_jobs[state] = jobs + all_job_infos: Dict[TJobState, List[LoadJobInfo]] = {} + for state, jobs in package_jobs.items(): + all_job_infos[state] = [ + self._read_job_file_info(load_id, state, job, package_created_at) for job in jobs + ] return LoadPackageInfo( load_id, @@ -577,15 +627,46 @@ def get_load_package_info(self, load_id: str) -> LoadPackageInfo: schema, applied_update, package_created_at, - all_jobs, + all_job_infos, ) - def _read_job_file_info(self, state: TJobState, file: str, now: DateTime = None) -> LoadJobInfo: - try: - failed_message = self.storage.load(file + ".exception") - except FileNotFoundError: - failed_message = None - full_path = self.storage.make_full_path(file) + def get_job_failed_message(self, load_id: str, job: ParsedLoadJobFileName) -> str: + """Get exception message of a failed job.""" + rel_path = self.get_job_file_path(load_id, "failed_jobs", job.file_name()) + if not self.storage.has_file(rel_path): + raise FileNotFoundError(rel_path) + failed_message: str = None + with contextlib.suppress(FileNotFoundError): + failed_message = self.storage.load(rel_path + JOB_EXCEPTION_EXTENSION) + return failed_message + + def job_to_job_info( + self, load_id: str, state: TJobState, job: ParsedLoadJobFileName + ) -> LoadJobInfo: + """Creates partial job info by converting job object. size, mtime and failed message will not be populated""" + full_path = os.path.join( + self.storage.storage_path, self.get_job_file_path(load_id, state, job.file_name()) + ) + return LoadJobInfo( + state, + full_path, + 0, + None, + 0, + job, + None, + ) + + def _read_job_file_info( + self, load_id: str, state: TJobState, job: ParsedLoadJobFileName, now: DateTime = None + ) -> LoadJobInfo: + """Creates job info by reading additional props from storage""" + failed_message = None + if state == "failed_jobs": + failed_message = self.get_job_failed_message(load_id, job) + full_path = os.path.join( + self.storage.storage_path, self.get_job_file_path(load_id, state, job.file_name()) + ) st = os.stat(full_path) return LoadJobInfo( state, @@ -593,7 +674,7 @@ def _read_job_file_info(self, state: TJobState, file: str, now: DateTime = None) st.st_size, pendulum.from_timestamp(st.st_mtime), PackageStorage._job_elapsed_time_seconds(full_path, now.timestamp() if now else None), - ParsedLoadJobFileName.parse(file), + job, failed_message, ) @@ -611,10 +692,11 @@ def _move_job( ) -> str: # ensure we move file names, not paths assert file_name == FileStorage.get_file_name_from_file_path(file_name) - load_path = self.get_package_path(load_id) - dest_path = os.path.join(load_path, dest_folder, new_file_name or file_name) - self.storage.atomic_rename(os.path.join(load_path, source_folder, file_name), dest_path) - # print(f"{join(load_path, source_folder, file_name)} -> {dest_path}") + + dest_path = self.get_job_file_path(load_id, dest_folder, new_file_name or file_name) + self.storage.atomic_rename( + self.get_job_file_path(load_id, source_folder, file_name), dest_path + ) return self.storage.make_full_path(dest_path) def _load_schema(self, load_id: str) -> DictStrAny: @@ -659,9 +741,9 @@ def _job_elapsed_time_seconds(file_path: str, now_ts: float = None) -> float: @staticmethod def filter_jobs_for_table( - all_jobs: Iterable[LoadJobInfo], table_name: str - ) -> Sequence[LoadJobInfo]: - return [job for job in all_jobs if job.job_file_info.table_name == table_name] + all_jobs: Iterable[Tuple[TJobState, ParsedLoadJobFileName]], table_name: str + ) -> Sequence[Tuple[TJobState, ParsedLoadJobFileName]]: + return [job for job in all_jobs if job[1].table_name == table_name] @configspec diff --git a/dlt/destinations/impl/dummy/dummy.py b/dlt/destinations/impl/dummy/dummy.py index 965a558a5d..c41b7dca61 100644 --- a/dlt/destinations/impl/dummy/dummy.py +++ b/dlt/destinations/impl/dummy/dummy.py @@ -162,7 +162,7 @@ def restore_file_load(self, file_path: str) -> LoadJob: def create_table_chain_completed_followup_jobs( self, table_chain: Sequence[TTableSchema], - table_chain_jobs: Optional[Sequence[LoadJobInfo]] = None, + completed_table_chain_jobs: Optional[Sequence[LoadJobInfo]] = None, ) -> List[NewLoadJob]: """Creates a list of followup jobs that should be executed after a table chain is completed""" return [] diff --git a/dlt/destinations/impl/filesystem/filesystem.py b/dlt/destinations/impl/filesystem/filesystem.py index 4cffebd7ce..ef438de9a0 100644 --- a/dlt/destinations/impl/filesystem/filesystem.py +++ b/dlt/destinations/impl/filesystem/filesystem.py @@ -531,19 +531,23 @@ def get_stored_schema_by_hash(self, version_hash: str) -> Optional[StorageSchema def create_table_chain_completed_followup_jobs( self, table_chain: Sequence[TTableSchema], - table_chain_jobs: Optional[Sequence[LoadJobInfo]] = None, + completed_table_chain_jobs: Optional[Sequence[LoadJobInfo]] = None, ) -> List[NewLoadJob]: def get_table_jobs( table_jobs: Sequence[LoadJobInfo], table_name: str ) -> Sequence[LoadJobInfo]: return [job for job in table_jobs if job.job_file_info.table_name == table_name] - assert table_chain_jobs is not None - jobs = super().create_table_chain_completed_followup_jobs(table_chain, table_chain_jobs) + assert completed_table_chain_jobs is not None + jobs = super().create_table_chain_completed_followup_jobs( + table_chain, completed_table_chain_jobs + ) table_format = table_chain[0].get("table_format") if table_format == "delta": delta_jobs = [ - DeltaLoadFilesystemJob(self, table, get_table_jobs(table_chain_jobs, table["name"])) + DeltaLoadFilesystemJob( + self, table, get_table_jobs(completed_table_chain_jobs, table["name"]) + ) for table in table_chain ] jobs.extend(delta_jobs) diff --git a/dlt/destinations/job_client_impl.py b/dlt/destinations/job_client_impl.py index 3d384b2c28..14dfd8b894 100644 --- a/dlt/destinations/job_client_impl.py +++ b/dlt/destinations/job_client_impl.py @@ -241,10 +241,12 @@ def _create_replace_followup_jobs( def create_table_chain_completed_followup_jobs( self, table_chain: Sequence[TTableSchema], - table_chain_jobs: Optional[Sequence[LoadJobInfo]] = None, + completed_table_chain_jobs: Optional[Sequence[LoadJobInfo]] = None, ) -> List[NewLoadJob]: """Creates a list of followup jobs for merge write disposition and staging replace strategies""" - jobs = super().create_table_chain_completed_followup_jobs(table_chain, table_chain_jobs) + jobs = super().create_table_chain_completed_followup_jobs( + table_chain, completed_table_chain_jobs + ) write_disposition = table_chain[0]["write_disposition"] if write_disposition == "append": jobs.extend(self._create_append_followup_jobs(table_chain)) diff --git a/dlt/extract/extractors.py b/dlt/extract/extractors.py index 4f88f8c645..3dd53f5ce4 100644 --- a/dlt/extract/extractors.py +++ b/dlt/extract/extractors.py @@ -179,7 +179,7 @@ def _write_to_dynamic_table(self, resource: DltResource, items: TDataItems, meta if isinstance(meta, ImportFileMeta): self._import_item(table_name, resource.name, meta) else: - self._write_item(table_name, resource.name, items) + self._write_item(table_name, resource.name, item) def _write_to_static_table( self, resource: DltResource, table_name: str, items: TDataItems, meta: Any diff --git a/dlt/load/configuration.py b/dlt/load/configuration.py index b3fc2fbcd4..34c72228e4 100644 --- a/dlt/load/configuration.py +++ b/dlt/load/configuration.py @@ -1,5 +1,3 @@ -from typing import TYPE_CHECKING - from dlt.common.configuration import configspec from dlt.common.storages import LoadStorageConfiguration from dlt.common.runners.configuration import PoolRunnerConfiguration, TPoolType diff --git a/dlt/load/load.py b/dlt/load/load.py index cf5618c024..b0ae6f60f6 100644 --- a/dlt/load/load.py +++ b/dlt/load/load.py @@ -251,13 +251,20 @@ def create_followup_jobs( schema.tables, starting_job.job_file_info().table_name ) # if all tables of chain completed, create follow up jobs - all_jobs = self.load_storage.normalized_packages.list_all_jobs(load_id) + all_jobs_states = self.load_storage.normalized_packages.list_all_jobs_with_states( + load_id + ) if table_chain := get_completed_table_chain( - schema, all_jobs, top_job_table, starting_job.job_file_info().job_id() + schema, all_jobs_states, top_job_table, starting_job.job_file_info().job_id() ): table_chain_names = [table["name"] for table in table_chain] + # create job infos that contain full path to job table_chain_jobs = [ - job for job in all_jobs if job.job_file_info.table_name in table_chain_names + self.load_storage.normalized_packages.job_to_job_info(load_id, *job_state) + for job_state in all_jobs_states + if job_state[1].table_name in table_chain_names + # job being completed is still in started_jobs + and job_state[0] in ("completed_jobs", "started_jobs") ] if follow_up_jobs := client.create_table_chain_completed_followup_jobs( table_chain, table_chain_jobs @@ -424,10 +431,10 @@ def load_single_package(self, load_id: str, schema: Schema) -> None: self.complete_package(load_id, schema, False) return # update counter we only care about the jobs that are scheduled to be loaded - package_info = self.load_storage.normalized_packages.get_load_package_info(load_id) - total_jobs = reduce(lambda p, c: p + len(c), package_info.jobs.values(), 0) - no_failed_jobs = len(package_info.jobs["failed_jobs"]) - no_completed_jobs = len(package_info.jobs["completed_jobs"]) + no_failed_jobs + package_jobs = self.load_storage.normalized_packages.get_load_package_jobs(load_id) + total_jobs = reduce(lambda p, c: p + len(c), package_jobs.values(), 0) + no_failed_jobs = len(package_jobs["failed_jobs"]) + no_completed_jobs = len(package_jobs["completed_jobs"]) + no_failed_jobs self.collector.update("Jobs", no_completed_jobs, total_jobs) if no_failed_jobs > 0: self.collector.update( @@ -439,26 +446,28 @@ def load_single_package(self, load_id: str, schema: Schema) -> None: remaining_jobs = self.complete_jobs(load_id, jobs, schema) if len(remaining_jobs) == 0: # get package status - package_info = self.load_storage.normalized_packages.get_load_package_info( + package_jobs = self.load_storage.normalized_packages.get_load_package_jobs( load_id ) # possibly raise on failed jobs if self.config.raise_on_failed_jobs: - if package_info.jobs["failed_jobs"]: - failed_job = package_info.jobs["failed_jobs"][0] + if package_jobs["failed_jobs"]: + failed_job = package_jobs["failed_jobs"][0] raise LoadClientJobFailed( load_id, - failed_job.job_file_info.job_id(), - failed_job.failed_message, + failed_job.job_id(), + self.load_storage.normalized_packages.get_job_failed_message( + load_id, failed_job + ), ) # possibly raise on too many retries if self.config.raise_on_max_retries: - for new_job in package_info.jobs["new_jobs"]: - r_c = new_job.job_file_info.retry_count + for new_job in package_jobs["new_jobs"]: + r_c = new_job.retry_count if r_c > 0 and r_c % self.config.raise_on_max_retries == 0: raise LoadClientJobRetry( load_id, - new_job.job_file_info.job_id(), + new_job.job_id(), r_c, self.config.raise_on_max_retries, ) diff --git a/dlt/load/utils.py b/dlt/load/utils.py index 5126cbd11e..6a2df1bca9 100644 --- a/dlt/load/utils.py +++ b/dlt/load/utils.py @@ -1,7 +1,7 @@ -from typing import List, Set, Iterable, Callable, Optional +from typing import List, Set, Iterable, Callable, Optional, Tuple from dlt.common import logger -from dlt.common.storages.load_package import LoadJobInfo, PackageStorage +from dlt.common.storages.load_package import LoadJobInfo, PackageStorage, TJobState from dlt.common.schema.utils import ( fill_hints_from_parent_and_clone_table, get_child_tables, @@ -19,7 +19,7 @@ def get_completed_table_chain( schema: Schema, - all_jobs: Iterable[LoadJobInfo], + all_jobs: Iterable[Tuple[TJobState, ParsedLoadJobFileName]], top_merged_table: TTableSchema, being_completed_job_id: str = None, ) -> List[TTableSchema]: @@ -51,8 +51,8 @@ def get_completed_table_chain( else: # all jobs must be completed in order for merge to be created if any( - job.state not in ("failed_jobs", "completed_jobs") - and job.job_file_info.job_id() != being_completed_job_id + job[0] not in ("failed_jobs", "completed_jobs") + and job[1].job_id() != being_completed_job_id for job in table_jobs ): return None diff --git a/dlt/pipeline/dbt.py b/dlt/pipeline/dbt.py index ee900005fd..0b6ec5f896 100644 --- a/dlt/pipeline/dbt.py +++ b/dlt/pipeline/dbt.py @@ -38,7 +38,7 @@ def get_venv( # keep venv inside pipeline if path is relative if not os.path.isabs(venv_path): pipeline._pipeline_storage.create_folder(venv_path, exists_ok=True) - venv_dir = pipeline._pipeline_storage.make_full_path(venv_path) + venv_dir = pipeline._pipeline_storage.make_full_path_safe(venv_path) else: venv_dir = venv_path # try to restore existing venv diff --git a/tests/common/storages/test_file_storage.py b/tests/common/storages/test_file_storage.py index eae765398b..7a10e29097 100644 --- a/tests/common/storages/test_file_storage.py +++ b/tests/common/storages/test_file_storage.py @@ -39,38 +39,40 @@ def test_to_relative_path(test_storage: FileStorage) -> None: def test_make_full_path(test_storage: FileStorage) -> None: # fully within storage relative_path = os.path.join("dir", "to", "file") - path = test_storage.make_full_path(relative_path) + path = test_storage.make_full_path_safe(relative_path) assert path.endswith(os.path.join(TEST_STORAGE_ROOT, relative_path)) # overlapped with storage root_path = os.path.join(TEST_STORAGE_ROOT, relative_path) - path = test_storage.make_full_path(root_path) + path = test_storage.make_full_path_safe(root_path) assert path.endswith(root_path) assert path.count(TEST_STORAGE_ROOT) == 2 # absolute path with different root than TEST_STORAGE_ROOT does not lead into storage so calculating full path impossible with pytest.raises(ValueError): - test_storage.make_full_path(os.path.join("/", root_path)) + test_storage.make_full_path_safe(os.path.join("/", root_path)) # relative path out of the root with pytest.raises(ValueError): - test_storage.make_full_path("..") + test_storage.make_full_path_safe("..") # absolute overlapping path - path = test_storage.make_full_path(os.path.abspath(root_path)) + path = test_storage.make_full_path_safe(os.path.abspath(root_path)) assert path.endswith(root_path) - assert test_storage.make_full_path("") == test_storage.storage_path - assert test_storage.make_full_path(".") == test_storage.storage_path + assert test_storage.make_full_path_safe("") == test_storage.storage_path + assert test_storage.make_full_path_safe(".") == test_storage.storage_path def test_in_storage(test_storage: FileStorage) -> None: # always relative to storage root - assert test_storage.in_storage("a/b/c") is True - assert test_storage.in_storage(f"../{TEST_STORAGE_ROOT}/b/c") is True - assert test_storage.in_storage("../a/b/c") is False - assert test_storage.in_storage("../../../a/b/c") is False - assert test_storage.in_storage("/a") is False - assert test_storage.in_storage(".") is True - assert test_storage.in_storage(os.curdir) is True - assert test_storage.in_storage(os.path.realpath(os.curdir)) is False + assert test_storage.is_path_in_storage("a/b/c") is True + assert test_storage.is_path_in_storage(f"../{TEST_STORAGE_ROOT}/b/c") is True + assert test_storage.is_path_in_storage("../a/b/c") is False + assert test_storage.is_path_in_storage("../../../a/b/c") is False + assert test_storage.is_path_in_storage("/a") is False + assert test_storage.is_path_in_storage(".") is True + assert test_storage.is_path_in_storage(os.curdir) is True + assert test_storage.is_path_in_storage(os.path.realpath(os.curdir)) is False assert ( - test_storage.in_storage(os.path.join(os.path.realpath(os.curdir), TEST_STORAGE_ROOT)) + test_storage.is_path_in_storage( + os.path.join(os.path.realpath(os.curdir), TEST_STORAGE_ROOT) + ) is True ) @@ -164,7 +166,7 @@ def test_rmtree_ro(test_storage: FileStorage) -> None: test_storage.create_folder("protected") path = test_storage.save("protected/barbapapa.txt", "barbapapa") os.chmod(path, stat.S_IREAD) - os.chmod(test_storage.make_full_path("protected"), stat.S_IREAD) + os.chmod(test_storage.make_full_path_safe("protected"), stat.S_IREAD) with pytest.raises(PermissionError): test_storage.delete_folder("protected", recursively=True, delete_ro=False) test_storage.delete_folder("protected", recursively=True, delete_ro=True) diff --git a/tests/common/storages/test_load_package.py b/tests/common/storages/test_load_package.py index ecbc5d296d..45bc8d157e 100644 --- a/tests/common/storages/test_load_package.py +++ b/tests/common/storages/test_load_package.py @@ -8,10 +8,8 @@ from dlt.common import sleep from dlt.common.schema import Schema from dlt.common.storages import PackageStorage, LoadStorage, ParsedLoadJobFileName +from dlt.common.storages.exceptions import LoadPackageAlreadyCompleted, LoadPackageNotCompleted from dlt.common.utils import uniq_id - -from tests.common.storages.utils import start_loading_file, assert_package_info, load_storage -from tests.utils import autouse_test_storage from dlt.common.pendulum import pendulum from dlt.common.configuration.container import Container from dlt.common.storages.load_package import ( @@ -23,6 +21,9 @@ clear_destination_state, ) +from tests.common.storages.utils import start_loading_file, assert_package_info, load_storage +from tests.utils import TEST_STORAGE_ROOT, autouse_test_storage + def test_is_partially_loaded(load_storage: LoadStorage) -> None: load_id, file_name = start_loading_file( @@ -243,6 +244,177 @@ def test_build_parse_job_path(load_storage: LoadStorage) -> None: ParsedLoadJobFileName.parse("tab.id.wrong_retry.jsonl") +def test_load_package_listings(load_storage: LoadStorage) -> None: + # 100 csv files + load_id = create_load_package(load_storage.new_packages, 100) + new_jobs = load_storage.new_packages.list_new_jobs(load_id) + assert len(new_jobs) == 100 + assert len(load_storage.new_packages.list_job_with_states_for_table(load_id, "items_1")) == 100 + assert len(load_storage.new_packages.list_job_with_states_for_table(load_id, "items_2")) == 0 + assert len(load_storage.new_packages.list_all_jobs_with_states(load_id)) == 100 + assert len(load_storage.new_packages.list_started_jobs(load_id)) == 0 + assert len(load_storage.new_packages.list_failed_jobs(load_id)) == 0 + assert load_storage.new_packages.is_package_completed(load_id) is False + with pytest.raises(LoadPackageNotCompleted): + load_storage.new_packages.list_failed_jobs_infos(load_id) + # add a few more files + add_new_jobs(load_storage.new_packages, load_id, 7, "items_2") + assert len(load_storage.new_packages.list_job_with_states_for_table(load_id, "items_1")) == 100 + assert len(load_storage.new_packages.list_job_with_states_for_table(load_id, "items_2")) == 7 + j_w_s = load_storage.new_packages.list_all_jobs_with_states(load_id) + assert len(j_w_s) == 107 + assert all(job[0] == "new_jobs" for job in j_w_s) + with pytest.raises(FileNotFoundError): + load_storage.new_packages.get_job_failed_message(load_id, j_w_s[0][1]) + # get package infos + package_jobs = load_storage.new_packages.get_load_package_jobs(load_id) + assert len(package_jobs["new_jobs"]) == 107 + # other folders empty + assert len(package_jobs["started_jobs"]) == 0 + package_info = load_storage.new_packages.get_load_package_info(load_id) + assert len(package_info.jobs["new_jobs"]) == 107 + assert len(package_info.jobs["completed_jobs"]) == 0 + assert package_info.load_id == load_id + # full path + assert package_info.package_path == load_storage.new_packages.storage.make_full_path(load_id) + assert package_info.state == "new" + assert package_info.completed_at is None + + # move some files + new_jobs = sorted(load_storage.new_packages.list_new_jobs(load_id)) + load_storage.new_packages.start_job(load_id, os.path.basename(new_jobs[0])) + load_storage.new_packages.start_job(load_id, os.path.basename(new_jobs[1])) + load_storage.new_packages.start_job(load_id, os.path.basename(new_jobs[-1])) + load_storage.new_packages.start_job(load_id, os.path.basename(new_jobs[-2])) + + assert len(load_storage.new_packages.list_started_jobs(load_id)) == 4 + assert len(load_storage.new_packages.list_new_jobs(load_id)) == 103 + assert len(load_storage.new_packages.list_job_with_states_for_table(load_id, "items_1")) == 100 + assert len(load_storage.new_packages.list_job_with_states_for_table(load_id, "items_2")) == 7 + package_jobs = load_storage.new_packages.get_load_package_jobs(load_id) + assert len(package_jobs["new_jobs"]) == 103 + assert len(package_jobs["started_jobs"]) == 4 + package_info = load_storage.new_packages.get_load_package_info(load_id) + assert len(package_info.jobs["new_jobs"]) == 103 + assert len(package_info.jobs["started_jobs"]) == 4 + + # complete and fail some + load_storage.new_packages.complete_job(load_id, os.path.basename(new_jobs[0])) + load_storage.new_packages.fail_job(load_id, os.path.basename(new_jobs[1]), None) + load_storage.new_packages.fail_job(load_id, os.path.basename(new_jobs[-1]), "error!") + path = load_storage.new_packages.retry_job(load_id, os.path.basename(new_jobs[-2])) + assert ParsedLoadJobFileName.parse(path).retry_count == 1 + assert ( + load_storage.new_packages.get_job_failed_message( + load_id, ParsedLoadJobFileName.parse(new_jobs[1]) + ) + is None + ) + assert ( + load_storage.new_packages.get_job_failed_message( + load_id, ParsedLoadJobFileName.parse(new_jobs[-1]) + ) + == "error!" + ) + # can't move again + with pytest.raises(FileNotFoundError): + load_storage.new_packages.complete_job(load_id, os.path.basename(new_jobs[0])) + assert len(load_storage.new_packages.list_started_jobs(load_id)) == 0 + # retry back in new + assert len(load_storage.new_packages.list_new_jobs(load_id)) == 104 + package_jobs = load_storage.new_packages.get_load_package_jobs(load_id) + assert len(package_jobs["new_jobs"]) == 104 + assert len(package_jobs["started_jobs"]) == 0 + assert len(package_jobs["completed_jobs"]) == 1 + assert len(package_jobs["failed_jobs"]) == 2 + assert len(load_storage.new_packages.list_failed_jobs(load_id)) == 2 + package_info = load_storage.new_packages.get_load_package_info(load_id) + assert len(package_info.jobs["new_jobs"]) == 104 + assert len(package_info.jobs["started_jobs"]) == 0 + assert len(package_info.jobs["completed_jobs"]) == 1 + assert len(package_info.jobs["failed_jobs"]) == 2 + + # complete package + load_storage.new_packages.complete_loading_package(load_id, "aborted") + assert load_storage.new_packages.is_package_completed(load_id) + with pytest.raises(LoadPackageAlreadyCompleted): + load_storage.new_packages.complete_loading_package(load_id, "aborted") + + for job in package_info.jobs["failed_jobs"] + load_storage.new_packages.list_failed_jobs_infos( # type: ignore[operator] + load_id + ): + if job.job_file_info.table_name == "items_1": + assert job.failed_message is None + elif job.job_file_info.table_name == "items_2": + assert job.failed_message == "error!" + else: + raise AssertionError() + assert job.created_at is not None + assert job.elapsed is not None + assert job.file_size > 0 + assert job.state == "failed_jobs" + # must be abs path! + assert os.path.isabs(job.file_path) + + +def test_get_load_package_info_perf(load_storage: LoadStorage) -> None: + import time + + st_t = time.time() + for _ in range(10000): + load_storage.loaded_packages.storage.make_full_path("198291092.121/new/ABD.CX.gx") + # os.path.basename("198291092.121/new/ABD.CX.gx") + print(time.time() - st_t) + + st_t = time.time() + load_id = create_load_package(load_storage.loaded_packages, 10000) + print(time.time() - st_t) + + st_t = time.time() + # move half of the files to failed + for file_name in load_storage.loaded_packages.list_new_jobs(load_id)[:1000]: + load_storage.loaded_packages.start_job(load_id, os.path.basename(file_name)) + load_storage.loaded_packages.fail_job( + load_id, os.path.basename(file_name), f"FAILED {file_name}" + ) + print(time.time() - st_t) + + st_t = time.time() + load_storage.loaded_packages.get_load_package_info(load_id) + print(time.time() - st_t) + + st_t = time.time() + table_stat = {} + for file in load_storage.loaded_packages.list_new_jobs(load_id): + parsed = ParsedLoadJobFileName.parse(file) + table_stat[parsed.table_name] = parsed + print(time.time() - st_t) + + +def create_load_package( + package_storage: PackageStorage, new_jobs: int, table_name="items_1" +) -> str: + schema = Schema("test") + load_id = create_load_id() + package_storage.create_package(load_id) + package_storage.save_schema(load_id, schema) + add_new_jobs(package_storage, load_id, new_jobs, table_name) + return load_id + + +def add_new_jobs( + package_storage: PackageStorage, load_id: str, new_jobs: int, table_name="items_1" +) -> None: + for _ in range(new_jobs): + file_name = PackageStorage.build_job_file_name( + table_name, ParsedLoadJobFileName.new_file_id(), 0, False, "csv" + ) + file_path = os.path.join(TEST_STORAGE_ROOT, file_name) + with open(file_path, "wt", encoding="utf-8") as f: + f.write("a|b|c") + package_storage.import_job(load_id, file_path) + + def test_migrate_to_load_package_state() -> None: """ Here we test that an existing load package without a state will not error diff --git a/tests/common/storages/test_load_storage.py b/tests/common/storages/test_load_storage.py index e8686ac2f9..49deaff23e 100644 --- a/tests/common/storages/test_load_storage.py +++ b/tests/common/storages/test_load_storage.py @@ -33,7 +33,7 @@ def test_complete_successful_package(load_storage: LoadStorage) -> None: # but completed packages are deleted load_storage.maybe_remove_completed_jobs(load_id) assert not load_storage.loaded_packages.storage.has_folder( - load_storage.loaded_packages.get_job_folder_path(load_id, "completed_jobs") + load_storage.loaded_packages.get_job_state_folder_path(load_id, "completed_jobs") ) assert_package_info(load_storage, load_id, "loaded", "completed_jobs", jobs_count=0) # delete completed package @@ -56,7 +56,7 @@ def test_complete_successful_package(load_storage: LoadStorage) -> None: ) # has completed loads assert load_storage.loaded_packages.storage.has_folder( - load_storage.loaded_packages.get_job_folder_path(load_id, "completed_jobs") + load_storage.loaded_packages.get_job_state_folder_path(load_id, "completed_jobs") ) load_storage.delete_loaded_package(load_id) assert not load_storage.storage.has_folder(load_storage.get_loaded_package_path(load_id)) @@ -82,14 +82,14 @@ def test_complete_package_failed_jobs(load_storage: LoadStorage) -> None: assert load_storage.storage.has_folder(load_storage.get_loaded_package_path(load_id)) # has completed loads assert load_storage.loaded_packages.storage.has_folder( - load_storage.loaded_packages.get_job_folder_path(load_id, "completed_jobs") + load_storage.loaded_packages.get_job_state_folder_path(load_id, "completed_jobs") ) assert_package_info(load_storage, load_id, "loaded", "failed_jobs") # get failed jobs info failed_files = sorted(load_storage.loaded_packages.list_failed_jobs(load_id)) - # job + message - assert len(failed_files) == 2 + # only jobs + assert len(failed_files) == 1 assert load_storage.loaded_packages.storage.has_file(failed_files[0]) failed_info = load_storage.list_failed_jobs_in_loaded_package(load_id) assert failed_info[0].file_path == load_storage.loaded_packages.storage.make_full_path( @@ -117,7 +117,7 @@ def test_abort_package(load_storage: LoadStorage) -> None: assert_package_info(load_storage, load_id, "normalized", "failed_jobs") load_storage.complete_load_package(load_id, True) assert load_storage.loaded_packages.storage.has_folder( - load_storage.loaded_packages.get_job_folder_path(load_id, "completed_jobs") + load_storage.loaded_packages.get_job_state_folder_path(load_id, "completed_jobs") ) assert_package_info(load_storage, load_id, "aborted", "failed_jobs") diff --git a/tests/load/test_dummy_client.py b/tests/load/test_dummy_client.py index b03c6c422b..beb2c88688 100644 --- a/tests/load/test_dummy_client.py +++ b/tests/load/test_dummy_client.py @@ -4,11 +4,11 @@ from unittest import mock import pytest from unittest.mock import patch -from typing import List +from typing import List, Tuple from dlt.common.exceptions import TerminalException, TerminalValueError from dlt.common.storages import FileStorage, PackageStorage, ParsedLoadJobFileName -from dlt.common.storages.load_package import LoadJobInfo +from dlt.common.storages.load_package import LoadJobInfo, TJobState from dlt.common.storages.load_storage import JobFileFormatUnsupported from dlt.common.destination.reference import LoadJob, TDestination from dlt.common.schema.utils import ( @@ -97,15 +97,11 @@ def test_unsupported_write_disposition() -> None: with ThreadPoolExecutor() as pool: load.run(pool) # job with unsupported write disp. is failed - exception_file = [ - f - for f in load.load_storage.normalized_packages.list_failed_jobs(load_id) - if f.endswith(".exception") - ][0] - assert ( - "LoadClientUnsupportedWriteDisposition" - in load.load_storage.normalized_packages.storage.load(exception_file) + failed_job = load.load_storage.normalized_packages.list_failed_jobs(load_id)[0] + failed_message = load.load_storage.normalized_packages.get_job_failed_message( + load_id, ParsedLoadJobFileName.parse(failed_job) ) + assert "LoadClientUnsupportedWriteDisposition" in failed_message def test_get_new_jobs_info() -> None: @@ -125,7 +121,7 @@ def test_get_completed_table_chain_single_job_per_table() -> None: schema.tables[table_name] = fill_hints_from_parent_and_clone_table(schema.tables, table) top_job_table = get_top_level_table(schema.tables, "event_user") - all_jobs = load.load_storage.normalized_packages.list_all_jobs(load_id) + all_jobs = load.load_storage.normalized_packages.list_all_jobs_with_states(load_id) assert get_completed_table_chain(schema, all_jobs, top_job_table) is None # fake being completed assert ( @@ -144,12 +140,12 @@ def test_get_completed_table_chain_single_job_per_table() -> None: load.load_storage.normalized_packages.start_job( load_id, "event_loop_interrupted.839c6e6b514e427687586ccc65bf133f.0.jsonl" ) - all_jobs = load.load_storage.normalized_packages.list_all_jobs(load_id) + all_jobs = load.load_storage.normalized_packages.list_all_jobs_with_states(load_id) assert get_completed_table_chain(schema, all_jobs, loop_top_job_table) is None load.load_storage.normalized_packages.complete_job( load_id, "event_loop_interrupted.839c6e6b514e427687586ccc65bf133f.0.jsonl" ) - all_jobs = load.load_storage.normalized_packages.list_all_jobs(load_id) + all_jobs = load.load_storage.normalized_packages.list_all_jobs_with_states(load_id) assert get_completed_table_chain(schema, all_jobs, loop_top_job_table) == [ schema.get_table("event_loop_interrupted") ] @@ -531,25 +527,15 @@ def test_get_completed_table_chain_cases() -> None: # child completed, parent not event_user = schema.get_table("event_user") event_user_entities = schema.get_table("event_user__parse_data__entities") - event_user_job = LoadJobInfo( + event_user_job: Tuple[TJobState, ParsedLoadJobFileName] = ( "started_jobs", - "path", - 0, - None, - 0, ParsedLoadJobFileName("event_user", "event_user_id", 0, "jsonl"), - None, ) - event_user_entities_job = LoadJobInfo( + event_user_entities_job: Tuple[TJobState, ParsedLoadJobFileName] = ( "completed_jobs", - "path", - 0, - None, - 0, ParsedLoadJobFileName( "event_user__parse_data__entities", "event_user__parse_data__entities_id", 0, "jsonl" ), - None, ) chain = get_completed_table_chain(schema, [event_user_job, event_user_entities_job], event_user) assert chain is None @@ -559,24 +545,21 @@ def test_get_completed_table_chain_cases() -> None: schema, [event_user_job, event_user_entities_job], event_user, - event_user_job.job_file_info.job_id(), + event_user_job[1].job_id(), ) # full chain assert chain == [event_user, event_user_entities] # parent failed, child completed chain = get_completed_table_chain( - schema, [event_user_job._replace(state="failed_jobs"), event_user_entities_job], event_user + schema, [("failed_jobs", event_user_job[1]), event_user_entities_job], event_user ) assert chain == [event_user, event_user_entities] # both failed chain = get_completed_table_chain( schema, - [ - event_user_job._replace(state="failed_jobs"), - event_user_entities_job._replace(state="failed_jobs"), - ], + [("failed_jobs", event_user_job[1]), ("failed_jobs", event_user_entities_job[1])], event_user, ) assert chain == [event_user, event_user_entities] @@ -587,16 +570,16 @@ def test_get_completed_table_chain_cases() -> None: event_user["write_disposition"] = w_d # type:ignore[typeddict-item] chain = get_completed_table_chain( - schema, [event_user_job], event_user, event_user_job.job_file_info.job_id() + schema, [event_user_job], event_user, event_user_job[1].job_id() ) assert chain == user_chain # but if child is present and incomplete... chain = get_completed_table_chain( schema, - [event_user_job, event_user_entities_job._replace(state="new_jobs")], + [event_user_job, ("new_jobs", event_user_entities_job[1])], event_user, - event_user_job.job_file_info.job_id(), + event_user_job[1].job_id(), ) # noting is returned assert chain is None @@ -607,7 +590,7 @@ def test_get_completed_table_chain_cases() -> None: ] del deep_child["x-normalizer"] chain = get_completed_table_chain( - schema, [event_user_job], event_user, event_user_job.job_file_info.job_id() + schema, [event_user_job], event_user, event_user_job[1].job_id() ) user_chain.remove(deep_child) assert chain == user_chain @@ -782,7 +765,7 @@ def assert_complete_job(load: Load, should_delete_completed: bool = False) -> No assert not load.load_storage.storage.has_folder( load.load_storage.get_normalized_package_path(load_id) ) - completed_path = load.load_storage.loaded_packages.get_job_folder_path( + completed_path = load.load_storage.loaded_packages.get_job_state_folder_path( load_id, "completed_jobs" ) if should_delete_completed: diff --git a/tests/load/utils.py b/tests/load/utils.py index eb40b8243f..d56f36b132 100644 --- a/tests/load/utils.py +++ b/tests/load/utils.py @@ -683,7 +683,7 @@ def prepare_load_package( shutil.copy( path, load_storage.new_packages.storage.make_full_path( - load_storage.new_packages.get_job_folder_path(load_id, "new_jobs") + load_storage.new_packages.get_job_state_folder_path(load_id, "new_jobs") ), ) schema_path = Path("./tests/load/cases/loading/schema.json") From 88eec9cab026e7d92143e020a406386c1a1923b0 Mon Sep 17 00:00:00 2001 From: Marcin Rudolf Date: Sat, 22 Jun 2024 00:44:00 +0200 Subject: [PATCH 080/105] adds file_format to dlt.resource, uses preferred file format for dlt state table --- dlt/extract/__init__.py | 3 ++- dlt/extract/decorators.py | 51 +++++++++++++++++++++++++++++++++++++-- dlt/extract/extractors.py | 31 +++++++++++++++++++----- dlt/extract/hints.py | 12 +++++++++ 4 files changed, 88 insertions(+), 9 deletions(-) diff --git a/dlt/extract/__init__.py b/dlt/extract/__init__.py index 03b2e59539..4029241634 100644 --- a/dlt/extract/__init__.py +++ b/dlt/extract/__init__.py @@ -4,13 +4,14 @@ from dlt.extract.decorators import source, resource, transformer, defer from dlt.extract.incremental import Incremental from dlt.extract.wrappers import wrap_additional_type -from dlt.extract.extractors import materialize_schema_item +from dlt.extract.extractors import materialize_schema_item, with_file_import __all__ = [ "DltResource", "DltSource", "with_table_name", "with_hints", + "with_file_import", "make_hints", "source", "resource", diff --git a/dlt/extract/decorators.py b/dlt/extract/decorators.py index a44a70785d..4a2f2a2807 100644 --- a/dlt/extract/decorators.py +++ b/dlt/extract/decorators.py @@ -35,6 +35,7 @@ from dlt.common.schema.schema import Schema from dlt.common.schema.typing import ( TColumnNames, + TFileFormat, TWriteDisposition, TWriteDispositionConfig, TAnySchemaColumns, @@ -302,6 +303,7 @@ def resource( merge_key: TTableHintTemplate[TColumnNames] = None, schema_contract: TTableHintTemplate[TSchemaContract] = None, table_format: TTableHintTemplate[TTableFormat] = None, + file_format: TTableHintTemplate[TFileFormat] = None, selected: bool = True, spec: Type[BaseConfiguration] = None, parallelized: bool = False, @@ -322,6 +324,7 @@ def resource( merge_key: TTableHintTemplate[TColumnNames] = None, schema_contract: TTableHintTemplate[TSchemaContract] = None, table_format: TTableHintTemplate[TTableFormat] = None, + file_format: TTableHintTemplate[TFileFormat] = None, selected: bool = True, spec: Type[BaseConfiguration] = None, parallelized: bool = False, @@ -342,6 +345,7 @@ def resource( merge_key: TTableHintTemplate[TColumnNames] = None, schema_contract: TTableHintTemplate[TSchemaContract] = None, table_format: TTableHintTemplate[TTableFormat] = None, + file_format: TTableHintTemplate[TFileFormat] = None, selected: bool = True, spec: Type[BaseConfiguration] = None, parallelized: bool = False, @@ -365,6 +369,7 @@ def resource( merge_key: TTableHintTemplate[TColumnNames] = None, schema_contract: TTableHintTemplate[TSchemaContract] = None, table_format: TTableHintTemplate[TTableFormat] = None, + file_format: TTableHintTemplate[TFileFormat] = None, selected: bool = True, spec: Type[BaseConfiguration] = None, parallelized: bool = False, @@ -384,6 +389,7 @@ def resource( merge_key: TTableHintTemplate[TColumnNames] = None, schema_contract: TTableHintTemplate[TSchemaContract] = None, table_format: TTableHintTemplate[TTableFormat] = None, + file_format: TTableHintTemplate[TFileFormat] = None, selected: bool = True, spec: Type[BaseConfiguration] = None, parallelized: bool = False, @@ -419,9 +425,10 @@ def resource( If not present, the name of the decorated function will be used. table_name (TTableHintTemplate[str], optional): An table name, if different from `name`. - max_table_nesting (int, optional): A schema hint that sets the maximum depth of nested table above which the remaining nodes are loaded as structs or JSON. This argument also accepts a callable that is used to dynamically create tables for stream-like resources yielding many datatypes. + max_table_nesting (int, optional): A schema hint that sets the maximum depth of nested table above which the remaining nodes are loaded as structs or JSON. + write_disposition (TTableHintTemplate[TWriteDispositionConfig], optional): Controls how to write data to a table. Accepts a shorthand string literal or configuration dictionary. Allowed shorthand string literals: `append` will always add new data at the end of the table. `replace` will replace existing data with new data. `skip` will prevent data from loading. "merge" will deduplicate and merge data based on "primary_key" and "merge_key" hints. Defaults to "append". Write behaviour can be further customized through a configuration dictionary. For example, to obtain an SCD2 table provide `write_disposition={"disposition": "merge", "strategy": "scd2"}`. @@ -439,7 +446,12 @@ def resource( This argument also accepts a callable that is used to dynamically create tables for stream-like resources yielding many datatypes. schema_contract (TSchemaContract, optional): Schema contract settings that will be applied to all resources of this source (if not overridden in the resource itself) - table_format (Literal["iceberg"], optional): Defines the storage format of the table. Currently only "iceberg" is supported on Athena, other destinations ignore this hint. + + table_format (Literal["iceberg", "delta"], optional): Defines the storage format of the table. Currently only "iceberg" is supported on Athena, and "delta" on the filesystem. + Other destinations ignore this hint. + + file_format (Literal["preferred", ...], optional): Format of the file in which resource data is stored. Useful when importing external files. Use `preferred` to force + a file format that is preferred by the destination used. This setting superseded the `load_file_format` passed to pipeline `run` method. selected (bool, optional): When `True` `dlt pipeline` will extract and load this resource, if `False`, the resource will be ignored. @@ -470,6 +482,7 @@ def make_resource(_name: str, _section: str, _data: Any) -> TDltResourceImpl: merge_key=merge_key, schema_contract=schema_contract, table_format=table_format, + file_format=file_format, ) resource = _impl_cls.from_data( @@ -580,10 +593,14 @@ def transformer( data_from: TUnboundDltResource = DltResource.Empty, name: str = None, table_name: TTableHintTemplate[str] = None, + max_table_nesting: int = None, write_disposition: TTableHintTemplate[TWriteDisposition] = None, columns: TTableHintTemplate[TAnySchemaColumns] = None, primary_key: TTableHintTemplate[TColumnNames] = None, merge_key: TTableHintTemplate[TColumnNames] = None, + schema_contract: TTableHintTemplate[TSchemaContract] = None, + table_format: TTableHintTemplate[TTableFormat] = None, + file_format: TTableHintTemplate[TFileFormat] = None, selected: bool = True, spec: Type[BaseConfiguration] = None, parallelized: bool = False, @@ -597,10 +614,14 @@ def transformer( data_from: TUnboundDltResource = DltResource.Empty, name: TTableHintTemplate[str] = None, table_name: TTableHintTemplate[str] = None, + max_table_nesting: int = None, write_disposition: TTableHintTemplate[TWriteDisposition] = None, columns: TTableHintTemplate[TAnySchemaColumns] = None, primary_key: TTableHintTemplate[TColumnNames] = None, merge_key: TTableHintTemplate[TColumnNames] = None, + schema_contract: TTableHintTemplate[TSchemaContract] = None, + table_format: TTableHintTemplate[TTableFormat] = None, + file_format: TTableHintTemplate[TFileFormat] = None, selected: bool = True, spec: Type[BaseConfiguration] = None, parallelized: bool = False, @@ -618,10 +639,14 @@ def transformer( data_from: TUnboundDltResource = DltResource.Empty, name: str = None, table_name: TTableHintTemplate[str] = None, + max_table_nesting: int = None, write_disposition: TTableHintTemplate[TWriteDisposition] = None, columns: TTableHintTemplate[TAnySchemaColumns] = None, primary_key: TTableHintTemplate[TColumnNames] = None, merge_key: TTableHintTemplate[TColumnNames] = None, + schema_contract: TTableHintTemplate[TSchemaContract] = None, + table_format: TTableHintTemplate[TTableFormat] = None, + file_format: TTableHintTemplate[TFileFormat] = None, selected: bool = True, spec: Type[BaseConfiguration] = None, parallelized: bool = False, @@ -635,10 +660,14 @@ def transformer( data_from: TUnboundDltResource = DltResource.Empty, name: TTableHintTemplate[str] = None, table_name: TTableHintTemplate[str] = None, + max_table_nesting: int = None, write_disposition: TTableHintTemplate[TWriteDisposition] = None, columns: TTableHintTemplate[TAnySchemaColumns] = None, primary_key: TTableHintTemplate[TColumnNames] = None, merge_key: TTableHintTemplate[TColumnNames] = None, + schema_contract: TTableHintTemplate[TSchemaContract] = None, + table_format: TTableHintTemplate[TTableFormat] = None, + file_format: TTableHintTemplate[TFileFormat] = None, selected: bool = True, spec: Type[BaseConfiguration] = None, parallelized: bool = False, @@ -652,10 +681,14 @@ def transformer( data_from: TUnboundDltResource = DltResource.Empty, name: TTableHintTemplate[str] = None, table_name: TTableHintTemplate[str] = None, + max_table_nesting: int = None, write_disposition: TTableHintTemplate[TWriteDisposition] = None, columns: TTableHintTemplate[TAnySchemaColumns] = None, primary_key: TTableHintTemplate[TColumnNames] = None, merge_key: TTableHintTemplate[TColumnNames] = None, + schema_contract: TTableHintTemplate[TSchemaContract] = None, + table_format: TTableHintTemplate[TTableFormat] = None, + file_format: TTableHintTemplate[TFileFormat] = None, selected: bool = True, spec: Type[BaseConfiguration] = None, parallelized: bool = False, @@ -698,6 +731,8 @@ def transformer( table_name (TTableHintTemplate[str], optional): An table name, if different from `name`. This argument also accepts a callable that is used to dynamically create tables for stream-like resources yielding many datatypes. + max_table_nesting (int, optional): A schema hint that sets the maximum depth of nested table above which the remaining nodes are loaded as structs or JSON. + write_disposition (Literal["skip", "append", "replace", "merge"], optional): Controls how to write data to a table. `append` will always add new data at the end of the table. `replace` will replace existing data with new data. `skip` will prevent data from loading. "merge" will deduplicate and merge data based on "primary_key" and "merge_key" hints. Defaults to "append". This argument also accepts a callable that is used to dynamically create tables for stream-like resources yielding many datatypes. @@ -710,6 +745,14 @@ def transformer( merge_key (str | Sequence[str]): A column name or a list of column names that define a merge key. Typically used with "merge" write disposition to remove overlapping data ranges ie. to keep a single record for a given day. This argument also accepts a callable that is used to dynamically create tables for stream-like resources yielding many datatypes. + schema_contract (TSchemaContract, optional): Schema contract settings that will be applied to all resources of this source (if not overridden in the resource itself) + + table_format (Literal["iceberg", "delta"], optional): Defines the storage format of the table. Currently only "iceberg" is supported on Athena, and "delta" on the filesystem. + Other destinations ignore this hint. + + file_format (Literal["preferred", ...], optional): Format of the file in which resource data is stored. Useful when importing external files. Use `preferred` to force + a file format that is preferred by the destination used. This setting superseded the `load_file_format` passed to pipeline `run` method. + selected (bool, optional): When `True` `dlt pipeline` will extract and load this resource, if `False`, the resource will be ignored. spec (Type[BaseConfiguration], optional): A specification of configuration and secret values required by the source. @@ -728,10 +771,14 @@ def transformer( f, name=name, table_name=table_name, + max_table_nesting=max_table_nesting, write_disposition=write_disposition, columns=columns, primary_key=primary_key, merge_key=merge_key, + schema_contract=schema_contract, + table_format=table_format, + file_format=file_format, selected=selected, spec=spec, standalone=standalone, diff --git a/dlt/extract/extractors.py b/dlt/extract/extractors.py index 3dd53f5ce4..16aeae5b19 100644 --- a/dlt/extract/extractors.py +++ b/dlt/extract/extractors.py @@ -8,7 +8,7 @@ from dlt.common.destination.capabilities import DestinationCapabilitiesContext from dlt.common.exceptions import MissingDependencyException from dlt.common.runtime.collector import Collector, NULL_COLLECTOR -from dlt.common.typing import TDataItems, TDataItem +from dlt.common.typing import TDataItems, TDataItem, TLoaderFileFormat from dlt.common.schema import Schema, utils from dlt.common.schema.typing import ( TSchemaContractDict, @@ -19,7 +19,7 @@ ) from dlt.extract.hints import HintsMeta, TResourceHints from dlt.extract.resource import DltResource -from dlt.extract.items import TableNameMeta +from dlt.extract.items import DataItemWithMeta, TableNameMeta from dlt.extract.storage import ExtractorItemStorage try: @@ -47,20 +47,39 @@ def materialize_schema_item() -> MaterializedEmptyList: class ImportFileMeta(HintsMeta): - __slots__ = ("file_path", "metrics", "with_extension") + __slots__ = ("file_path", "metrics", "file_format") def __init__( self, file_path: str, metrics: DataWriterMetrics, - with_extension: str = None, + file_format: TLoaderFileFormat = None, hints: TResourceHints = None, create_table_variant: bool = None, ) -> None: super().__init__(hints, create_table_variant) self.file_path = file_path self.metrics = metrics - self.with_extension = with_extension + self.file_format = file_format + + +def with_file_import( + item: TDataItems, + file_path: str, + file_format: TLoaderFileFormat = None, + items_count: int = 0, + hints: TResourceHints = None, +) -> DataItemWithMeta: + """Marks `item` to correspond to a file under `file_path` which will be imported into extract storage. `item` may be used + for a schema inference (from arrow table / pandas) but it will not be saved into storage. + + You can provide optional `hints` that will be applied to the current resource. Note that you should avoid schema inference at + runtime if possible and if that is not possible - to do that only once per extract process. Create `TResourceHints` with `make_hints`. + + If number of records in `file_path` is known, pass it in `items_count` so `dlt` can generate correct extract metrics. + """ + metrics = DataWriterMetrics(file_path, items_count, 0, 0, 0) + return DataItemWithMeta(ImportFileMeta(file_path, metrics, file_format, hints, False), item) class Extractor: @@ -157,7 +176,7 @@ def _import_item( table_name, meta.file_path, meta.metrics, - meta.with_extension, + meta.file_format, ) self.collector.update(table_name, inc=metrics.items_count) self.resources_with_items.add(resource_name) diff --git a/dlt/extract/hints.py b/dlt/extract/hints.py index 6fd1928970..bc10177223 100644 --- a/dlt/extract/hints.py +++ b/dlt/extract/hints.py @@ -5,6 +5,7 @@ from dlt.common.schema.typing import ( TColumnNames, TColumnProp, + TFileFormat, TPartialTableSchema, TTableSchema, TTableSchemaColumns, @@ -48,6 +49,7 @@ class TResourceHints(TypedDict, total=False): incremental: Incremental[Any] schema_contract: TTableHintTemplate[TSchemaContract] table_format: TTableHintTemplate[TTableFormat] + file_format: TTableHintTemplate[TFileFormat] validator: ValidateItem original_columns: TTableHintTemplate[TAnySchemaColumns] @@ -72,6 +74,7 @@ def make_hints( merge_key: TTableHintTemplate[TColumnNames] = None, schema_contract: TTableHintTemplate[TSchemaContract] = None, table_format: TTableHintTemplate[TTableFormat] = None, + file_format: TTableHintTemplate[TFileFormat] = None, ) -> TResourceHints: """A convenience function to create resource hints. Accepts both static and dynamic hints based on data. @@ -91,6 +94,7 @@ def make_hints( columns=clean_columns, # type: ignore schema_contract=schema_contract, # type: ignore table_format=table_format, # type: ignore + file_format=file_format, # type: ignore ) if not table_name: new_template.pop("name") @@ -209,6 +213,7 @@ def apply_hints( schema_contract: TTableHintTemplate[TSchemaContract] = None, additional_table_hints: Optional[Dict[str, TTableHintTemplate[Any]]] = None, table_format: TTableHintTemplate[TTableFormat] = None, + file_format: TTableHintTemplate[TFileFormat] = None, create_table_variant: bool = False, ) -> None: """Creates or modifies existing table schema by setting provided hints. Accepts both static and dynamic hints based on data. @@ -256,6 +261,7 @@ def apply_hints( merge_key, schema_contract, table_format, + file_format, ) else: t = self._clone_hints(t) @@ -320,6 +326,11 @@ def apply_hints( t["table_format"] = table_format else: t.pop("table_format", None) + if file_format is not None: + if file_format: + t["file_format"] = file_format + else: + t.pop("file_format", None) # set properties that can't be passed to make_hints if incremental is not None: @@ -375,6 +386,7 @@ def merge_hints( incremental=hints_template.get("incremental"), schema_contract=hints_template.get("schema_contract"), table_format=hints_template.get("table_format"), + file_format=hints_template.get("file_format"), create_table_variant=create_table_variant, ) From 8e0f0a80e37e51a64bc5ed2a3cd21f2b8689b03a Mon Sep 17 00:00:00 2001 From: Marcin Rudolf Date: Sat, 22 Jun 2024 00:44:42 +0200 Subject: [PATCH 081/105] docs for importing files, file_format --- .../dlt-ecosystem/destinations/postgres.md | 22 ++++++ .../dlt-ecosystem/destinations/snowflake.md | 23 ++++++ .../docs/dlt-ecosystem/file-formats/csv.md | 9 +++ docs/website/docs/general-usage/resource.md | 79 ++++++++++++++++++- 4 files changed, 129 insertions(+), 4 deletions(-) diff --git a/docs/website/docs/dlt-ecosystem/destinations/postgres.md b/docs/website/docs/dlt-ecosystem/destinations/postgres.md index ae504728c3..4c72f040d0 100644 --- a/docs/website/docs/dlt-ecosystem/destinations/postgres.md +++ b/docs/website/docs/dlt-ecosystem/destinations/postgres.md @@ -105,6 +105,28 @@ The Postgres destination creates UNIQUE indexes by default on columns with the ` create_indexes=false ``` +### Setting up `csv` format +You can provide [non-default](../file-formats/csv.md#default-settings) csv settings via configuration file or explicitly. +```toml +[destination.postgres.csv_format] +delimiter="|" +include_header=false +``` +or +```python +from dlt.destinations import postgres +from dlt.common.data_writers.configuration import CsvFormatConfiguration + +csv_format = CsvFormatConfiguration(delimiter="|", include_header=False) + +dest_ = postgres(csv_format=csv_format) +``` +Above we set `csv` file without header, with **|** as a separator. + +:::tip +You'll need those setting when [importing external files](../../general-usage/resource.md#import-external-files) +::: + ### dbt support This destination [integrates with dbt](../transformations/dbt/dbt.md) via dbt-postgres. diff --git a/docs/website/docs/dlt-ecosystem/destinations/snowflake.md b/docs/website/docs/dlt-ecosystem/destinations/snowflake.md index 4642751011..8fab261281 100644 --- a/docs/website/docs/dlt-ecosystem/destinations/snowflake.md +++ b/docs/website/docs/dlt-ecosystem/destinations/snowflake.md @@ -253,6 +253,29 @@ stage_name="DLT_STAGE" keep_staged_files=true ``` +### Setting up `csv` format +You can provide [non-default](../file-formats/csv.md#default-settings) csv settings via configuration file or explicitly. +```toml +[destination.snowflake.csv_format] +delimiter="|" +include_header=false +on_error_continue=true +``` +or +```python +from dlt.destinations import snowflake +from dlt.common.data_writers.configuration import CsvFormatConfiguration + +csv_format = CsvFormatConfiguration(delimiter="|", include_header=False, on_error_continue=True) + +dest_ = snowflake(csv_format=csv_format) +``` +Above we set `csv` file without header, with **|** as a separator and we request to ignore lines with errors. + +:::tip +You'll need those setting when [importing external files](../../general-usage/resource.md#import-external-files) +::: + ### dbt support This destination [integrates with dbt](../transformations/dbt/dbt.md) via [dbt-snowflake](https://github.com/dbt-labs/dbt-snowflake). Both password and key pair authentication are supported and shared with dbt runners. diff --git a/docs/website/docs/dlt-ecosystem/file-formats/csv.md b/docs/website/docs/dlt-ecosystem/file-formats/csv.md index a57311b7dc..052fc7195e 100644 --- a/docs/website/docs/dlt-ecosystem/file-formats/csv.md +++ b/docs/website/docs/dlt-ecosystem/file-formats/csv.md @@ -59,6 +59,15 @@ NORMALIZE__DATA_WRITER__INCLUDE_HEADER=False NORMALIZE__DATA_WRITER__QUOTING=quote_all ``` +### Destination settings +A few additional settings are available when copying `csv` to destination tables: +* **on_error_continue** - skip lines with errors (only Snowflake) +* **encoding** - encoding of the `csv` file + +:::tip +You'll need those setting when [importing external files](../../general-usage/resource.md#import-external-files) +::: + ## Limitations **arrow writer** diff --git a/docs/website/docs/general-usage/resource.md b/docs/website/docs/general-usage/resource.md index ac7f7e6b38..8822410ca0 100644 --- a/docs/website/docs/general-usage/resource.md +++ b/docs/website/docs/general-usage/resource.md @@ -488,6 +488,59 @@ be adjusted after the `batch` is processed in the extract pipeline but before an You can emit columns as Pydantic model and use dynamic hints (ie. lambda for table name) as well. You should avoid redefining `Incremental` this way. ::: +### Import external files +You can import external files ie. `csv`, `parquet` and `jsonl` by yielding items marked with `with_file_import`, optionally passing table schema corresponding +the the imported file. `dlt` will not read, parse and normalize any names (ie. `csv` or `arrow` headers) and will attempt to copy the file into the destination as is. +```python +import os +import dlt + +from filesystem import filesystem + +columns: List[TColumnSchema] = [ + {"name": "id", "data_type": "bigint"}, + {"name": "name", "data_type": "text"}, + {"name": "description", "data_type": "text"}, + {"name": "ordered_at", "data_type": "date"}, + {"name": "price", "data_type": "decimal"}, +] + +import_folder = "/tmp/import" + +@dlt.transformer(columns=columns) +def orders(items: Iterator[FileItemDict]): + for item in items: + # copy file locally + dest_file = os.path.join(import_folder, item["file_name"]) + # download file + item.fsspec.download(item["file_url"], dest_file) + # tell dlt to import the file, mind that `item` below will not be + # saved, dest_file will be imported instead + yield dlt.mark.with_file_import(item, dest_file, "csv") + + +# use filesystem verified source to glob a bucket +downloader = filesystem( + bucket_url="s3://my_bucket/csv", + file_glob="today/*.csv.gz") | orders + +info = pipeline.run(orders, destination="snowflake") +``` +In the example above, we glob all zipped csv files present on **my_bucket/csv/today** (using `filesystem` verified source) and send file descriptors to `orders` transformer. Transformer downloads and imports the files into extract package. At the end, `dlt` sends them to snowflake (the table will be created because we use `column` hints to define the schema). + +If imported `csv` files are not in `dlt` [default format](../dlt-ecosystem/file-formats/csv.md#default-settings), you may need to pass additional configuration. +```toml +[destination.snowflake.csv_format] +delimiter="|" +include_header=false +on_error_continue=true +``` + +You can sniff the schema from the data ie. using `duckdb` to infer the table schema from csv file. `dlt.mark.with_file_import` accepts additional arguments that you can use to pass hints at run time. + +:::note +If you do not define any columns, the table will not be created in the destination. `dlt` will still attempt to load data into it, so you create a fitting table upfront, the load process will succeed. +::: ### Duplicate and rename resources There are cases when you your resources are generic (ie. bucket filesystem) and you want to load several instances of it (ie. files from different folders) to separate tables. In example below we use `filesystem` source to load csvs from two different folders into separate tables: @@ -538,12 +591,30 @@ pipeline.run(generate_rows(10)) # load a list of resources pipeline.run([generate_rows(10), generate_rows(20)]) ``` + +### Pick loader file format for a particular resource +You can request a particular loader file format to be used for a resource. +```python +@dlt.resource(file_format="parquet") +def generate_rows(nr): + for i in range(nr): + yield {'id':i, 'example_string':'abc'} +``` +Resource above will be saved and loaded from a `parquet` file (if destination supports it). + +:::note +A special `file_format`: **preferred** will load resource using a format that is preferred by a destination. This settings supersedes the `loader_file_format` passed to `run` method. +::: + ### Do a full refresh -To do a full refresh of an `append` or `merge` resources you temporarily change the write -disposition to replace. You can use `apply_hints` method of a resource or just provide alternative -write disposition when loading: +To do a full refresh of an `append` or `merge` resources you set the `refresh` argument on `run` method to `drop_data`. This will truncate the tables without dropping them. + +```py +p.run(merge_source(), refresh="drop_data") +``` +You can also [fully drop the tables](pipeline.md#refresh-pipeline-data-and-state) in the `merge_source`: ```py -p.run(merge_source(), write_disposition="replace") +p.run(merge_source(), refresh="drop_sources") ``` From b1c095cf4c51d95b5be99315eb9df3e8386edb5c Mon Sep 17 00:00:00 2001 From: Marcin Rudolf Date: Sat, 22 Jun 2024 00:45:17 +0200 Subject: [PATCH 082/105] code improvements and tests --- dlt/common/data_writers/buffered.py | 7 +- dlt/common/data_writers/configuration.py | 31 ++++ dlt/common/data_writers/exceptions.py | 10 ++ dlt/common/data_writers/writers.py | 46 ++---- dlt/common/destination/capabilities.py | 8 +- dlt/common/destination/exceptions.py | 13 ++ dlt/common/destination/reference.py | 3 + dlt/common/schema/typing.py | 4 +- dlt/common/schema/utils.py | 16 +- dlt/common/typing.py | 21 +++ dlt/common/validation.py | 3 +- .../impl/postgres/configuration.py | 6 +- dlt/destinations/impl/postgres/factory.py | 4 + dlt/destinations/impl/postgres/postgres.py | 90 +++++++++-- .../impl/snowflake/configuration.py | 6 +- dlt/destinations/impl/snowflake/factory.py | 3 + dlt/destinations/impl/snowflake/snowflake.py | 11 +- dlt/normalize/items_normalizers.py | 7 + dlt/normalize/worker.py | 26 +++- dlt/pipeline/mark.py | 2 + dlt/pipeline/state_sync.py | 6 +- .../common/data_writers/test_data_writers.py | 13 ++ tests/extract/test_extract.py | 2 + tests/extract/test_sources.py | 6 + tests/load/cases/loading/csv_header.csv | 3 + tests/load/cases/loading/csv_no_header.csv | 2 + tests/load/cases/loading/csv_no_header.csv.gz | Bin 0 -> 90 bytes tests/load/cases/loading/header.jsonl | 2 + tests/load/pipeline/test_csv_loading.py | 96 +++++++++++- tests/load/pipeline/utils.py | 2 +- tests/pipeline/test_arrow_sources.py | 43 ++++++ tests/pipeline/test_pipeline.py | 141 +++++++++++++++++- tests/pipeline/test_pipeline_extra.py | 55 +++++++ 33 files changed, 613 insertions(+), 75 deletions(-) create mode 100644 dlt/common/data_writers/configuration.py create mode 100644 tests/load/cases/loading/csv_header.csv create mode 100644 tests/load/cases/loading/csv_no_header.csv create mode 100644 tests/load/cases/loading/csv_no_header.csv.gz create mode 100644 tests/load/cases/loading/header.jsonl diff --git a/dlt/common/data_writers/buffered.py b/dlt/common/data_writers/buffered.py index bd69ce7c0b..8077007edb 100644 --- a/dlt/common/data_writers/buffered.py +++ b/dlt/common/data_writers/buffered.py @@ -7,6 +7,7 @@ from dlt.common.data_writers.exceptions import ( BufferedDataWriterClosed, DestinationCapabilitiesRequired, + FileImportNotFound, InvalidFileNameTemplateException, ) from dlt.common.data_writers.writers import TWriter, DataWriter, DataWriterMetrics, FileWriterSpec @@ -159,7 +160,11 @@ def import_file( spec = self.writer_spec._replace(file_extension=with_extension) with self.alternative_spec(spec): self._rotate_file() - FileStorage.link_hard_with_fallback(file_path, self._file_name) + try: + FileStorage.link_hard_with_fallback(file_path, self._file_name) + except FileNotFoundError as f_ex: + raise FileImportNotFound(file_path, self._file_name) from f_ex + self._last_modified = time.time() metrics = metrics._replace( file_path=self._file_name, diff --git a/dlt/common/data_writers/configuration.py b/dlt/common/data_writers/configuration.py new file mode 100644 index 0000000000..a837cb47b0 --- /dev/null +++ b/dlt/common/data_writers/configuration.py @@ -0,0 +1,31 @@ +from typing import ClassVar, Literal, Optional +from dlt.common.configuration import configspec, known_sections +from dlt.common.configuration.specs import BaseConfiguration + +CsvQuoting = Literal["quote_all", "quote_needed"] + + +@configspec +class CsvFormatConfiguration(BaseConfiguration): + delimiter: str = "," + include_header: bool = True + quoting: CsvQuoting = "quote_needed" + + # read options + on_error_continue: bool = False + encoding: str = "utf-8" + + __section__: ClassVar[str] = known_sections.DATA_WRITER + + +@configspec +class ParquetFormatConfiguration(BaseConfiguration): + flavor: Optional[str] = None # could be ie. "spark" + version: Optional[str] = "2.4" + data_page_size: Optional[int] = None + timestamp_timezone: str = "UTC" + row_group_size: Optional[int] = None + coerce_timestamps: Optional[Literal["s", "ms", "us", "ns"]] = None + allow_truncated_timestamps: bool = False + + __section__: ClassVar[str] = known_sections.DATA_WRITER diff --git a/dlt/common/data_writers/exceptions.py b/dlt/common/data_writers/exceptions.py index 1d5c58f787..3b11ed70fc 100644 --- a/dlt/common/data_writers/exceptions.py +++ b/dlt/common/data_writers/exceptions.py @@ -22,6 +22,16 @@ def __init__(self, file_name: str): super().__init__(f"Writer with recent file name {file_name} is already closed") +class FileImportNotFound(DataWriterException, FileNotFoundError): + def __init__(self, import_file_path: str, local_file_path: str) -> None: + self.import_file_path = import_file_path + self.local_file_path = local_file_path + super().__init__( + f"Attempt to import non existing file {import_file_path} into extract storage file" + f" {local_file_path}" + ) + + class DestinationCapabilitiesRequired(DataWriterException, ValueError): def __init__(self, file_format: TLoaderFileFormat): self.file_format = file_format diff --git a/dlt/common/data_writers/writers.py b/dlt/common/data_writers/writers.py index 059cb6d88d..d324792a83 100644 --- a/dlt/common/data_writers/writers.py +++ b/dlt/common/data_writers/writers.py @@ -4,7 +4,6 @@ IO, TYPE_CHECKING, Any, - ClassVar, Dict, List, Literal, @@ -17,8 +16,7 @@ ) from dlt.common.json import json -from dlt.common.configuration import configspec, known_sections, with_config -from dlt.common.configuration.specs import BaseConfiguration +from dlt.common.configuration import with_config from dlt.common.data_writers.exceptions import ( SpecLookupFailed, DataWriterNotFound, @@ -26,6 +24,11 @@ FileSpecNotFound, InvalidDataItem, ) +from dlt.common.data_writers.configuration import ( + CsvFormatConfiguration, + CsvQuoting, + ParquetFormatConfiguration, +) from dlt.common.destination import ( DestinationCapabilitiesContext, TLoaderFileFormat, @@ -34,6 +37,7 @@ from dlt.common.schema.typing import TTableSchemaColumns from dlt.common.typing import StrAny + if TYPE_CHECKING: from dlt.common.libs.pyarrow import pyarrow as pa @@ -164,6 +168,10 @@ def write_header(self, columns_schema: TTableSchemaColumns) -> None: "ImportFileWriter cannot write any files. You have bug in your code." ) + @classmethod + def writer_spec(cls) -> FileWriterSpec: + raise NotImplementedError("ImportFileWriter has no single spec") + class JsonlWriter(DataWriter): def write_data(self, rows: Sequence[Any]) -> None: @@ -278,21 +286,8 @@ def writer_spec(cls) -> FileWriterSpec: ) -@configspec -class ParquetDataWriterConfiguration(BaseConfiguration): - flavor: Optional[str] = None # could be ie. "spark" - version: Optional[str] = "2.4" - data_page_size: Optional[int] = None - timestamp_timezone: str = "UTC" - row_group_size: Optional[int] = None - coerce_timestamps: Optional[Literal["s", "ms", "us", "ns"]] = None - allow_truncated_timestamps: bool = False - - __section__: ClassVar[str] = known_sections.DATA_WRITER - - class ParquetDataWriter(DataWriter): - @with_config(spec=ParquetDataWriterConfiguration) + @with_config(spec=ParquetFormatConfiguration) def __init__( self, f: IO[Any], @@ -399,21 +394,8 @@ def writer_spec(cls) -> FileWriterSpec: ) -CsvQuoting = Literal["quote_all", "quote_needed"] - - -@configspec -class CsvDataWriterConfiguration(BaseConfiguration): - delimiter: str = "," - include_header: bool = True - quoting: CsvQuoting = "quote_needed" - on_error_continue: bool = False - - __section__: ClassVar[str] = known_sections.DATA_WRITER - - class CsvWriter(DataWriter): - @with_config(spec=CsvDataWriterConfiguration) + @with_config(spec=CsvFormatConfiguration) def __init__( self, f: IO[Any], @@ -544,7 +526,7 @@ def writer_spec(cls) -> FileWriterSpec: class ArrowToCsvWriter(DataWriter): - @with_config(spec=CsvDataWriterConfiguration) + @with_config(spec=CsvFormatConfiguration) def __init__( self, f: IO[Any], diff --git a/dlt/common/destination/capabilities.py b/dlt/common/destination/capabilities.py index 58a09e3e39..a972436c68 100644 --- a/dlt/common/destination/capabilities.py +++ b/dlt/common/destination/capabilities.py @@ -11,7 +11,7 @@ Union, get_args, ) - +from dlt.common.typing import TLoaderFileFormat from dlt.common.configuration.utils import serialize_value from dlt.common.configuration import configspec from dlt.common.configuration.specs import ContainerInjectableContext @@ -24,12 +24,6 @@ from dlt.common.arithmetics import DEFAULT_NUMERIC_PRECISION, DEFAULT_NUMERIC_SCALE from dlt.common.wei import EVM_DECIMAL_PRECISION -# known loader file formats -# jsonl - new line separated json documents -# typed-jsonl - internal extract -> normalize format bases on jsonl -# insert_values - insert SQL statements -# sql - any sql statement -TLoaderFileFormat = Literal["jsonl", "typed-jsonl", "insert_values", "parquet", "csv"] ALL_SUPPORTED_FILE_FORMATS: Set[TLoaderFileFormat] = set(get_args(TLoaderFileFormat)) diff --git a/dlt/common/destination/exceptions.py b/dlt/common/destination/exceptions.py index cd8f50bcce..c5f30401df 100644 --- a/dlt/common/destination/exceptions.py +++ b/dlt/common/destination/exceptions.py @@ -124,3 +124,16 @@ def __init__(self, schema_name: str, version_hash: str, stored_version_hash: str " schema in load package, you should first save it into schema storage. You can also" " use schema._bump_version() in test code to remove modified flag." ) + + +class DestinationInvalidFileFormat(DestinationTerminalException): + def __init__( + self, destination_type: str, file_format: str, file_name: str, message: str + ) -> None: + self.destination_type = destination_type + self.file_format = file_format + self.message = message + super().__init__( + f"Destination {destination_type} cannot process file {file_name} with format" + f" {file_format}: {message}" + ) diff --git a/dlt/common/destination/reference.py b/dlt/common/destination/reference.py index 4141057196..776a0f9a6f 100644 --- a/dlt/common/destination/reference.py +++ b/dlt/common/destination/reference.py @@ -30,6 +30,7 @@ from dlt.common.normalizers.naming import NamingConvention from dlt.common.schema import Schema, TTableSchema, TSchemaTables from dlt.common.schema.utils import ( + get_file_format, get_write_disposition, get_table_format, ) @@ -374,6 +375,8 @@ def prepare_load_table( table["write_disposition"] = get_write_disposition(self.schema.tables, table_name) if "table_format" not in table: table["table_format"] = get_table_format(self.schema.tables, table_name) + if "file_format" not in table: + table["file_format"] = get_file_format(self.schema.tables, table_name) return table except KeyError: raise UnknownTableException(self.schema.name, table_name) diff --git a/dlt/common/schema/typing.py b/dlt/common/schema/typing.py index e1bc6f5b78..b5081c5ff4 100644 --- a/dlt/common/schema/typing.py +++ b/dlt/common/schema/typing.py @@ -17,7 +17,7 @@ from dlt.common.data_types import TDataType from dlt.common.normalizers.typing import TNormalizersConfig -from dlt.common.typing import TSortOrder, TAnyDateTime +from dlt.common.typing import TSortOrder, TAnyDateTime, TLoaderFileFormat try: from pydantic import BaseModel as _PydanticBaseModel @@ -68,6 +68,7 @@ TWriteDisposition = Literal["skip", "append", "replace", "merge"] TTableFormat = Literal["iceberg", "delta"] +TFileFormat = Literal[Literal["preferred"], TLoaderFileFormat] TTypeDetections = Literal[ "timestamp", "iso_timestamp", "iso_date", "large_integer", "hexbytes_to_text", "wei_to_double" ] @@ -204,6 +205,7 @@ class TTableSchema(TTableProcessingHints, total=False): columns: TTableSchemaColumns resource: Optional[str] table_format: Optional[TTableFormat] + file_format: Optional[TFileFormat] class TPartialTableSchema(TTableSchema): diff --git a/dlt/common/schema/utils.py b/dlt/common/schema/utils.py index 039392a03e..7ca9265886 100644 --- a/dlt/common/schema/utils.py +++ b/dlt/common/schema/utils.py @@ -24,6 +24,7 @@ VERSION_TABLE_NAME, PIPELINE_STATE_TABLE_NAME, TColumnName, + TFileFormat, TPartialTableSchema, TSchemaTables, TSchemaUpdate, @@ -678,6 +679,12 @@ def get_table_format(tables: TSchemaTables, table_name: str) -> TTableFormat: ) +def get_file_format(tables: TSchemaTables, table_name: str) -> TFileFormat: + return cast( + TFileFormat, get_inherited_table_hint(tables, table_name, "file_format", allow_none=True) + ) + + def fill_hints_from_parent_and_clone_table( tables: TSchemaTables, table: TTableSchema ) -> TTableSchema: @@ -689,6 +696,8 @@ def fill_hints_from_parent_and_clone_table( table["write_disposition"] = get_write_disposition(tables, table["name"]) if "table_format" not in table: table["table_format"] = get_table_format(tables, table["name"]) + if "file_format" not in table: + table["file_format"] = get_file_format(tables, table["name"]) return table @@ -800,6 +809,7 @@ def pipeline_state_table() -> TTableSchema: # WARNING: do not reorder the columns table = new_table( PIPELINE_STATE_TABLE_NAME, + write_disposition="append", columns=[ {"name": "version", "data_type": "bigint", "nullable": False}, {"name": "engine_version", "data_type": "bigint", "nullable": False}, @@ -809,8 +819,9 @@ def pipeline_state_table() -> TTableSchema: {"name": "version_hash", "data_type": "text", "nullable": True}, {"name": "_dlt_load_id", "data_type": "text", "nullable": False}, ], + # always use caps preferred file format for processing + file_format="preferred", ) - table["write_disposition"] = "append" table["description"] = "Created by DLT. Tracks pipeline state" return table @@ -824,6 +835,7 @@ def new_table( resource: str = None, schema_contract: TSchemaContract = None, table_format: TTableFormat = None, + file_format: TFileFormat = None, ) -> TTableSchema: table: TTableSchema = { "name": table_name, @@ -842,6 +854,8 @@ def new_table( table["schema_contract"] = schema_contract if table_format: table["table_format"] = table_format + if file_format: + table["file_format"] = file_format if validate_schema: validate_dict_ignoring_xkeys( spec=TColumnSchema, diff --git a/dlt/common/typing.py b/dlt/common/typing.py index cd535cdcd2..a34f7941f5 100644 --- a/dlt/common/typing.py +++ b/dlt/common/typing.py @@ -106,6 +106,8 @@ VARIANT_FIELD_FORMAT = "v_%s" TFileOrPath = Union[str, PathLike, IO[Any]] TSortOrder = Literal["asc", "desc"] +TLoaderFileFormat = Literal["jsonl", "typed-jsonl", "insert_values", "parquet", "csv"] +"""known loader file formats""" class ConfigValueSentinel(NamedTuple): @@ -258,6 +260,25 @@ def is_literal_type(hint: Type[Any]) -> bool: return False +def get_literal_args(literal: Type[Any]) -> List[Any]: + """Recursively get arguments from nested Literal types and return an unified list.""" + if not hasattr(literal, "__origin__") or literal.__origin__ is not Literal: + raise ValueError("Provided type is not a Literal") + + unified_args = [] + + def _get_args(literal: Type[Any]) -> None: + for arg in get_args(literal): + if hasattr(arg, "__origin__") and arg.__origin__ is Literal: + _get_args(arg) + else: + unified_args.append(arg) + + _get_args(literal) + + return unified_args + + def is_newtype_type(t: Type[Any]) -> bool: if hasattr(t, "__supertype__"): return True diff --git a/dlt/common/validation.py b/dlt/common/validation.py index 0a8bced287..8862c10024 100644 --- a/dlt/common/validation.py +++ b/dlt/common/validation.py @@ -7,6 +7,7 @@ from dlt.common.exceptions import DictValidationException from dlt.common.typing import ( StrAny, + get_literal_args, get_type_name, is_callable_type, is_literal_type, @@ -114,7 +115,7 @@ def verify_prop(pk: str, pv: Any, t: Any) -> None: failed_validations, ) elif is_literal_type(t): - a_l = get_args(t) + a_l = get_literal_args(t) if pv not in a_l: raise DictValidationException( f"field '{pk}' with value {pv} is not one of: {a_l}", path, t, pk, pv diff --git a/dlt/destinations/impl/postgres/configuration.py b/dlt/destinations/impl/postgres/configuration.py index 0d12abbac7..cd42eef9ec 100644 --- a/dlt/destinations/impl/postgres/configuration.py +++ b/dlt/destinations/impl/postgres/configuration.py @@ -1,6 +1,7 @@ import dataclasses -from typing import Final, ClassVar, Any, List, TYPE_CHECKING, Union +from typing import Final, ClassVar, Any, List, Optional +from dlt.common.data_writers.configuration import CsvFormatConfiguration from dlt.common.libs.sql_alchemy import URL from dlt.common.configuration import configspec from dlt.common.configuration.specs import ConnectionStringCredentials @@ -39,6 +40,9 @@ class PostgresClientConfiguration(DestinationClientDwhWithStagingConfiguration): create_indexes: bool = True + csv_format: Optional[CsvFormatConfiguration] = None + """Optional csv format configuration""" + def fingerprint(self) -> str: """Returns a fingerprint of host part of a connection string""" if self.credentials and self.credentials.host: diff --git a/dlt/destinations/impl/postgres/factory.py b/dlt/destinations/impl/postgres/factory.py index 7260256be9..b873bf97d5 100644 --- a/dlt/destinations/impl/postgres/factory.py +++ b/dlt/destinations/impl/postgres/factory.py @@ -1,5 +1,6 @@ import typing as t +from dlt.common.data_writers.configuration import CsvFormatConfiguration from dlt.common.destination import Destination, DestinationCapabilitiesContext from dlt.common.data_writers.escape import escape_postgres_identifier, escape_postgres_literal from dlt.common.arithmetics import DEFAULT_NUMERIC_PRECISION, DEFAULT_NUMERIC_SCALE @@ -54,6 +55,7 @@ def __init__( self, credentials: t.Union[PostgresCredentials, t.Dict[str, t.Any], str] = None, create_indexes: bool = True, + csv_format: t.Optional[CsvFormatConfiguration] = None, destination_name: t.Optional[str] = None, environment: t.Optional[str] = None, **kwargs: t.Any, @@ -66,11 +68,13 @@ def __init__( credentials: Credentials to connect to the postgres database. Can be an instance of `PostgresCredentials` or a connection string in the format `postgres://user:password@host:port/database` create_indexes: Should unique indexes be created + csv_format: Formatting options for csv file format **kwargs: Additional arguments passed to the destination config """ super().__init__( credentials=credentials, create_indexes=create_indexes, + csv_format=csv_format, destination_name=destination_name, environment=environment, **kwargs, diff --git a/dlt/destinations/impl/postgres/postgres.py b/dlt/destinations/impl/postgres/postgres.py index 3a90048230..7b173a7711 100644 --- a/dlt/destinations/impl/postgres/postgres.py +++ b/dlt/destinations/impl/postgres/postgres.py @@ -1,5 +1,11 @@ -from typing import ClassVar, Dict, Optional, Sequence, List, Any - +from typing import Dict, Optional, Sequence, List, Any + +from dlt.common import logger +from dlt.common.data_writers.configuration import CsvFormatConfiguration +from dlt.common.destination.exceptions import ( + DestinationInvalidFileFormat, + DestinationTerminalException, +) from dlt.common.destination.reference import FollowupJob, LoadJob, NewLoadJob, TLoadJobState from dlt.common.destination import DestinationCapabilitiesContext from dlt.common.exceptions import TerminalValueError @@ -105,21 +111,85 @@ def generate_sql( class PostgresCsvCopyJob(LoadJob, FollowupJob): - def __init__(self, table_name: str, file_path: str, sql_client: Psycopg2SqlClient) -> None: + def __init__(self, table: TTableSchema, file_path: str, client: "PostgresClient") -> None: super().__init__(FileStorage.get_file_name_from_file_path(file_path)) + config = client.config + sql_client = client.sql_client + csv_format = config.csv_format or CsvFormatConfiguration() + table_name = table["name"] + sep = csv_format.delimiter + if csv_format.on_error_continue: + logger.warning( + f"When processing {file_path} on table {table_name} Postgres csv reader does not" + " support on_error_continue" + ) with FileStorage.open_zipsafe_ro(file_path, "rb") as f: - # all headers in first line - headers = f.readline().decode("utf-8").strip() - # quote headers if not quoted - all special keywords like "binary" must be quoted - headers = ",".join(h if h.startswith('"') else f'"{h}"' for h in headers.split(",")) + if csv_format.include_header: + # all headers in first line + headers_row: str = f.readline().decode(csv_format.encoding).strip() + split_headers = headers_row.split(sep) + else: + # read first row to figure out the headers + split_first_row: str = f.readline().decode(csv_format.encoding).strip().split(sep) + split_headers = list(client.schema.get_table_columns(table_name).keys()) + if len(split_first_row) > len(split_headers): + raise DestinationInvalidFileFormat( + "postgres", + "csv", + file_path, + f"First row {split_first_row} has more rows than columns {split_headers} in" + f" table {table_name}", + ) + if len(split_first_row) < len(split_headers): + logger.warning( + f"First row {split_first_row} has less rows than columns {split_headers} in" + f" table {table_name}. We will not load data to superfluous columns." + ) + split_headers = split_headers[: len(split_first_row)] + # stream the first row again + f.seek(0) + + # normalized and quoted headers + split_headers = [ + sql_client.escape_column_name(h.strip('"'), escape=True) for h in split_headers + ] + split_null_headers = [] + split_columns = [] + # detect columns with NULL to use in FORCE NULL + # detect headers that are not in columns + for col in client.schema.get_table_columns(table_name).values(): + norm_col = sql_client.escape_column_name(col["name"], escape=True) + split_columns.append(norm_col) + if norm_col in split_headers and col.get("nullable", True): + split_null_headers.append(norm_col) + split_unknown_headers = set(split_headers).difference(split_columns) + if split_unknown_headers: + raise DestinationInvalidFileFormat( + "postgres", + "csv", + file_path, + f"Following headers {split_unknown_headers} cannot be matched to columns" + f" {split_columns} of table {table_name}.", + ) + + # use comma to join + headers = ",".join(split_headers) + if split_null_headers: + null_headers = f"FORCE_NULL({','.join(split_null_headers)})," + else: + null_headers = "" + qualified_table_name = sql_client.make_qualified_table_name(table_name) copy_sql = ( - "COPY %s (%s) FROM STDIN WITH (FORMAT CSV, DELIMITER ',', NULL '', FORCE_NULL(%s))" + "COPY %s (%s) FROM STDIN WITH (FORMAT CSV, DELIMITER '%s', NULL ''," + " %s ENCODING '%s')" % ( qualified_table_name, headers, - headers, + sep, + null_headers, + csv_format.encoding, ) ) with sql_client.begin_transaction(): @@ -152,7 +222,7 @@ def __init__( def start_file_load(self, table: TTableSchema, file_path: str, load_id: str) -> LoadJob: job = super().start_file_load(table, file_path, load_id) if not job and file_path.endswith("csv"): - job = PostgresCsvCopyJob(table["name"], file_path, self.sql_client) + job = PostgresCsvCopyJob(table, file_path, self) return job def _get_column_def_sql(self, c: TColumnSchema, table_format: TTableFormat = None) -> str: diff --git a/dlt/destinations/impl/snowflake/configuration.py b/dlt/destinations/impl/snowflake/configuration.py index eed4e23b87..7571ccd739 100644 --- a/dlt/destinations/impl/snowflake/configuration.py +++ b/dlt/destinations/impl/snowflake/configuration.py @@ -1,9 +1,9 @@ import dataclasses import base64 -from typing import Final, Optional, Any, Dict, ClassVar, List, TYPE_CHECKING, Union +from typing import Final, Optional, Any, Dict, ClassVar, List from dlt import version -from dlt.common.data_writers.writers import CsvDataWriterConfiguration +from dlt.common.data_writers.configuration import CsvFormatConfiguration from dlt.common.libs.sql_alchemy import URL from dlt.common.exceptions import MissingDependencyException from dlt.common.typing import TSecretStrValue @@ -139,7 +139,7 @@ class SnowflakeClientConfiguration(DestinationClientDwhWithStagingConfiguration) keep_staged_files: bool = True """Whether to keep or delete the staged files after COPY INTO succeeds""" - csv_format: Optional[CsvDataWriterConfiguration] = None + csv_format: Optional[CsvFormatConfiguration] = None """Optional csv format configuration""" def fingerprint(self) -> str: diff --git a/dlt/destinations/impl/snowflake/factory.py b/dlt/destinations/impl/snowflake/factory.py index 6f06edf213..f531b8704e 100644 --- a/dlt/destinations/impl/snowflake/factory.py +++ b/dlt/destinations/impl/snowflake/factory.py @@ -1,5 +1,6 @@ import typing as t +from dlt.common.data_writers.configuration import CsvFormatConfiguration from dlt.common.destination import Destination, DestinationCapabilitiesContext from dlt.common.data_writers.escape import escape_snowflake_identifier from dlt.common.arithmetics import DEFAULT_NUMERIC_PRECISION, DEFAULT_NUMERIC_SCALE @@ -53,6 +54,7 @@ def __init__( credentials: t.Union[SnowflakeCredentials, t.Dict[str, t.Any], str] = None, stage_name: t.Optional[str] = None, keep_staged_files: bool = True, + csv_format: t.Optional[CsvFormatConfiguration] = None, destination_name: t.Optional[str] = None, environment: t.Optional[str] = None, **kwargs: t.Any, @@ -71,6 +73,7 @@ def __init__( credentials=credentials, stage_name=stage_name, keep_staged_files=keep_staged_files, + csv_format=csv_format, destination_name=destination_name, environment=environment, **kwargs, diff --git a/dlt/destinations/impl/snowflake/snowflake.py b/dlt/destinations/impl/snowflake/snowflake.py index 83ae23f752..a7c1a345c0 100644 --- a/dlt/destinations/impl/snowflake/snowflake.py +++ b/dlt/destinations/impl/snowflake/snowflake.py @@ -1,7 +1,7 @@ -from typing import ClassVar, Optional, Sequence, Tuple, List, Any +from typing import Optional, Sequence, List from urllib.parse import urlparse, urlunparse -from dlt.common.data_writers.writers import CsvDataWriterConfiguration +from dlt.common.data_writers.configuration import CsvFormatConfiguration from dlt.common.destination import DestinationCapabilitiesContext from dlt.common.destination.reference import ( FollowupJob, @@ -15,7 +15,6 @@ AwsCredentialsWithoutDefaults, AzureCredentialsWithoutDefaults, ) -from dlt.common.data_types import TDataType from dlt.common.storages.file_storage import FileStorage from dlt.common.schema import TColumnSchema, Schema, TTableSchemaColumns from dlt.common.schema.typing import TTableSchema, TColumnType, TTableFormat @@ -27,10 +26,8 @@ from dlt.destinations.impl.snowflake.configuration import SnowflakeClientConfiguration from dlt.destinations.impl.snowflake.sql_client import SnowflakeSqlClient -from dlt.destinations.sql_jobs import SqlJobParams from dlt.destinations.impl.snowflake.sql_client import SnowflakeSqlClient from dlt.destinations.job_impl import NewReferenceJob -from dlt.destinations.sql_client import SqlClientBase from dlt.destinations.type_mapping import TypeMapper @@ -182,12 +179,12 @@ def __init__( ) if file_name.endswith("csv"): # empty strings are NULL, no data is NULL, missing columns (ERROR_ON_COLUMN_COUNT_MISMATCH) are NULL - csv_format = config.csv_format or CsvDataWriterConfiguration() + csv_format = config.csv_format or CsvFormatConfiguration() source_format = ( "(TYPE = 'CSV', BINARY_FORMAT = 'UTF-8', PARSE_HEADER =" f" {csv_format.include_header}, FIELD_OPTIONALLY_ENCLOSED_BY = '\"', NULL_IF =" " (''), ERROR_ON_COLUMN_COUNT_MISMATCH = FALSE," - f" FIELD_DELIMITER='{csv_format.delimiter}')" + f" FIELD_DELIMITER='{csv_format.delimiter}', ENCODING='{csv_format.encoding}')" ) # disable column match if headers are not provided if not csv_format.include_header: diff --git a/dlt/normalize/items_normalizers.py b/dlt/normalize/items_normalizers.py index e4408a83d7..d623e6e809 100644 --- a/dlt/normalize/items_normalizers.py +++ b/dlt/normalize/items_normalizers.py @@ -415,6 +415,13 @@ def __call__(self, extracted_items_file: str, root_table_name: str) -> List[TSch f"Table {root_table_name} {self.item_storage.writer_spec.file_format} file" f" {extracted_items_file} will be directly imported without normalization" ) + completed_columns = self.schema.get_table_columns(root_table_name) + if not completed_columns: + logger.warning( + f"Table {root_table_name} has no completed columns for imported file" + f" {extracted_items_file} and will not be created! Pass column hints to the" + " resource or with dlt.mark.with_hints or create the destination table yourself." + ) with self.normalize_storage.extracted_packages.storage.open_file( extracted_items_file, "rb" ) as f: diff --git a/dlt/normalize/worker.py b/dlt/normalize/worker.py index 1338dac984..7476066fe7 100644 --- a/dlt/normalize/worker.py +++ b/dlt/normalize/worker.py @@ -81,7 +81,7 @@ def w_normalize_files( load_storage = LoadStorage(False, supported_file_formats, loader_storage_config) def _get_items_normalizer( - parsed_file_name: ParsedLoadJobFileName, table_schema: Optional[TTableSchema] + parsed_file_name: ParsedLoadJobFileName, table_schema: TTableSchema ) -> ItemsNormalizer: item_format = DataWriter.item_format_from_file_extension(parsed_file_name.file_format) @@ -121,17 +121,31 @@ def _get_items_normalizer( parsed_file_name.file_format, items_supported_file_formats # type: ignore[arg-type] ) - if config.loader_file_format and best_writer_spec is None: + config_loader_file_format = config.loader_file_format + if table_schema.get("file_format"): + # resource has a file format defined so use it + if table_schema["file_format"] == "preferred": + # use destination preferred + config_loader_file_format = items_preferred_file_format + else: + # use resource format + config_loader_file_format = table_schema["file_format"] + logger.info( + f"A file format for table {table_name} was specified in the resource so" + f" {config_loader_file_format} format being used." + ) + + if config_loader_file_format and best_writer_spec is None: # force file format - if config.loader_file_format in items_supported_file_formats: + if config_loader_file_format in items_supported_file_formats: # TODO: pass supported_file_formats, when used in pipeline we already checked that # but if normalize is used standalone `supported_loader_file_formats` may be unresolved - best_writer_spec = get_best_writer_spec(item_format, config.loader_file_format) + best_writer_spec = get_best_writer_spec(item_format, config_loader_file_format) else: logger.warning( - f"The configured value `{config.loader_file_format}` " + f"The configured value `{config_loader_file_format}` " "for `loader_file_format` is not supported for table " - f"`{table_schema['name']}` and will be ignored. Dlt " + f"`{table_name}` and will be ignored. Dlt " "will use a supported format instead." ) diff --git a/dlt/pipeline/mark.py b/dlt/pipeline/mark.py index 3956d9bbe2..aae77d0b3f 100644 --- a/dlt/pipeline/mark.py +++ b/dlt/pipeline/mark.py @@ -2,6 +2,8 @@ from dlt.extract import ( with_table_name, with_hints, + with_file_import, make_hints, + with_file_import, materialize_schema_item as materialize_table_schema, ) diff --git a/dlt/pipeline/state_sync.py b/dlt/pipeline/state_sync.py index d0164946f7..26ff6ddb54 100644 --- a/dlt/pipeline/state_sync.py +++ b/dlt/pipeline/state_sync.py @@ -100,12 +100,14 @@ def state_doc(state: TPipelineState, load_id: str = None) -> TPipelineStateDoc: def state_resource(state: TPipelineState, load_id: str) -> Tuple[DltResource, TPipelineStateDoc]: doc = state_doc(state, load_id) + state_table = pipeline_state_table() return ( dlt.resource( [doc], name=PIPELINE_STATE_TABLE_NAME, - write_disposition="append", - columns=pipeline_state_table()["columns"], + write_disposition=state_table["write_disposition"], + file_format=state_table["file_format"], + columns=state_table["columns"], ), doc, ) diff --git a/tests/common/data_writers/test_data_writers.py b/tests/common/data_writers/test_data_writers.py index cd5ff2eaf8..9b4e61a2f7 100644 --- a/tests/common/data_writers/test_data_writers.py +++ b/tests/common/data_writers/test_data_writers.py @@ -27,8 +27,10 @@ DataWriter, DataWriterMetrics, EMPTY_DATA_WRITER_METRICS, + ImportFileWriter, InsertValuesWriter, JsonlWriter, + create_import_spec, get_best_writer_spec, resolve_best_writer_spec, is_native_writer, @@ -259,3 +261,14 @@ def test_get_best_writer() -> None: assert WRITER_SPECS[get_best_writer_spec("arrow", "insert_values")] == ArrowToInsertValuesWriter with pytest.raises(DataWriterNotFound): get_best_writer_spec("arrow", "tsv") # type: ignore + + +def test_import_file_writer() -> None: + spec = create_import_spec("jsonl", ["jsonl"]) + assert spec.data_item_format == "file" + assert spec.file_format == "jsonl" + writer = DataWriter.writer_class_from_spec(spec) + assert writer is ImportFileWriter + w_ = writer(None) + with pytest.raises(NotImplementedError): + w_.write_header(None) diff --git a/tests/extract/test_extract.py b/tests/extract/test_extract.py index dc978b997a..dbec417f97 100644 --- a/tests/extract/test_extract.py +++ b/tests/extract/test_extract.py @@ -125,6 +125,7 @@ def with_table_hints(): {"id": 1, "pk2": "B"}, make_hints( write_disposition="merge", + file_format="preferred", columns=[{"name": "id", "precision": 16}, {"name": "text", "data_type": "decimal"}], primary_key="pk2", ), @@ -143,6 +144,7 @@ def with_table_hints(): assert "pk" in table["columns"] assert "text" in table["columns"] assert table["write_disposition"] == "merge" + assert table["file_format"] == "preferred" # make table name dynamic yield dlt.mark.with_hints( diff --git a/tests/extract/test_sources.py b/tests/extract/test_sources.py index e40b03219d..172576e3c8 100644 --- a/tests/extract/test_sources.py +++ b/tests/extract/test_sources.py @@ -1266,6 +1266,8 @@ def empty_gen(): primary_key=["a", "b"], merge_key=["c", "a"], schema_contract="freeze", + table_format="delta", + file_format="jsonl", ) table = empty_r.compute_table_schema() assert table["columns"]["a"] == { @@ -1280,11 +1282,15 @@ def empty_gen(): assert table["parent"] == "parent" assert empty_r.table_name == "table" assert table["schema_contract"] == "freeze" + assert table["table_format"] == "delta" + assert table["file_format"] == "jsonl" # reset empty_r.apply_hints( table_name="", parent_table_name="", + table_format="", + file_format="", primary_key=[], merge_key="", columns={}, diff --git a/tests/load/cases/loading/csv_header.csv b/tests/load/cases/loading/csv_header.csv new file mode 100644 index 0000000000..14c7514e51 --- /dev/null +++ b/tests/load/cases/loading/csv_header.csv @@ -0,0 +1,3 @@ +id|name|description|ordered_at|price +1|item|value|2024-04-12|128.4 +1|"item"|value with space|2024-04-12|128.4 \ No newline at end of file diff --git a/tests/load/cases/loading/csv_no_header.csv b/tests/load/cases/loading/csv_no_header.csv new file mode 100644 index 0000000000..1e3a63494e --- /dev/null +++ b/tests/load/cases/loading/csv_no_header.csv @@ -0,0 +1,2 @@ +1|item|value|2024-04-12|128.4 +1|"item"|value with space|2024-04-12|128.4 \ No newline at end of file diff --git a/tests/load/cases/loading/csv_no_header.csv.gz b/tests/load/cases/loading/csv_no_header.csv.gz new file mode 100644 index 0000000000000000000000000000000000000000..310950f4840a3721a2206cabd29d8bface3f8286 GIT binary patch literal 90 zcmb2|=HO`BS(?VcoLpQMpO+t>k(!v2S`=@j2jnvtuQ{pXd3LR)_Zdy!wT4CpMi)&D uE}ELGGBnwA!T6!knm{H`ZO12~r;NRgM7p?zCL2y=VvwyDNcCV~U;qHmn;y#m literal 0 HcmV?d00001 diff --git a/tests/load/cases/loading/header.jsonl b/tests/load/cases/loading/header.jsonl new file mode 100644 index 0000000000..c2f9fee551 --- /dev/null +++ b/tests/load/cases/loading/header.jsonl @@ -0,0 +1,2 @@ +{"id": 1, "name": "item", "description": "value", "ordered_at": "2024-04-12", "price": 128.4} +{"id": 1, "name": "item", "description": "value with space", "ordered_at": "2024-04-12", "price": 128.4} \ No newline at end of file diff --git a/tests/load/pipeline/test_csv_loading.py b/tests/load/pipeline/test_csv_loading.py index 3f83bc9d5e..7e83978afc 100644 --- a/tests/load/pipeline/test_csv_loading.py +++ b/tests/load/pipeline/test_csv_loading.py @@ -1,11 +1,21 @@ import os +from typing import List import pytest +import dlt +from dlt.common.data_writers.configuration import CsvFormatConfiguration +from dlt.common.schema.typing import TColumnSchema +from dlt.common.typing import TLoaderFileFormat from dlt.common.utils import uniq_id from tests.load.pipeline.utils import destinations_configs, DestinationTestConfiguration from tests.cases import arrow_table_all_data_types, prepare_shuffled_tables -from tests.pipeline.utils import assert_data_table_counts, assert_load_info, load_tables_to_dicts +from tests.pipeline.utils import ( + assert_data_table_counts, + assert_load_info, + assert_only_table_columns, + load_tables_to_dicts, +) from tests.utils import TestDataItemFormat @@ -42,6 +52,90 @@ def test_load_csv( load_tables_to_dicts(pipeline, "table") +@pytest.mark.parametrize( + "destination_config", + destinations_configs(default_sql_configs=True, subset=["postgres", "snowflake"]), + ids=lambda x: x.name, +) +@pytest.mark.parametrize("file_format", (None, "csv")) +@pytest.mark.parametrize("compression", (True, False)) +def test_custom_csv_no_header( + destination_config: DestinationTestConfiguration, + file_format: TLoaderFileFormat, + compression: bool, +) -> None: + os.environ["DATA_WRITER__DISABLE_COMPRESSION"] = str(not compression) + csv_format = CsvFormatConfiguration(delimiter="|", include_header=False) + # apply to collected config + pipeline = destination_config.setup_pipeline("postgres_" + uniq_id(), full_refresh=True) + # this will apply this to config when client instance is created + pipeline.destination.config_params["csv_format"] = csv_format + # verify + assert pipeline.destination_client().config.csv_format == csv_format # type: ignore[attr-defined] + # create a resource that imports file + + columns: List[TColumnSchema] = [ + {"name": "id", "data_type": "bigint"}, + {"name": "name", "data_type": "text"}, + {"name": "description", "data_type": "text"}, + {"name": "ordered_at", "data_type": "date"}, + {"name": "price", "data_type": "decimal"}, + ] + hints = dlt.mark.make_hints(columns=columns) + import_file = "tests/load/cases/loading/csv_no_header.csv" + if compression: + import_file += ".gz" + info = pipeline.run( + [dlt.mark.with_file_import(None, import_file, "csv", 2, hints)], + table_name="no_header", + loader_file_format=file_format, + ) + info.raise_on_failed_jobs() + print(info) + assert_only_table_columns(pipeline, "no_header", [col["name"] for col in columns]) + rows = load_tables_to_dicts(pipeline, "no_header") + assert len(rows["no_header"]) == 2 + # we should have twp files loaded + jobs = info.load_packages[0].jobs["completed_jobs"] + assert len(jobs) == 2 + job_extensions = [os.path.splitext(job.job_file_info.file_name())[1] for job in jobs] + assert ".csv" in job_extensions + assert ".insert_values" in job_extensions + + +@pytest.mark.parametrize( + "destination_config", + destinations_configs(default_sql_configs=True, subset=["postgres", "snowflake"]), + ids=lambda x: x.name, +) +def test_custom_wrong_header(destination_config: DestinationTestConfiguration) -> None: + csv_format = CsvFormatConfiguration(delimiter="|", include_header=True) + # apply to collected config + pipeline = destination_config.setup_pipeline("postgres_" + uniq_id(), full_refresh=True) + # this will apply this to config when client instance is created + pipeline.destination.config_params["csv_format"] = csv_format + # verify + assert pipeline.destination_client().config.csv_format == csv_format # type: ignore[attr-defined] + # create a resource that imports file + + columns: List[TColumnSchema] = [ + {"name": "object_id", "data_type": "bigint", "nullable": False}, + {"name": "name", "data_type": "text"}, + {"name": "description", "data_type": "text"}, + {"name": "ordered_at", "data_type": "date"}, + {"name": "price", "data_type": "decimal"}, + ] + hints = dlt.mark.make_hints(columns=columns) + import_file = "tests/load/cases/loading/csv_header.csv" + # snowflake will pass here because we do not match + info = pipeline.run( + [dlt.mark.with_file_import(None, import_file, "csv", 2, hints)], + table_name="no_header", + ) + assert info.has_failed_jobs + assert len(info.load_packages[0].jobs["failed_jobs"]) == 1 + + @pytest.mark.parametrize( "destination_config", destinations_configs(default_sql_configs=True, subset=["postgres", "snowflake"]), diff --git a/tests/load/pipeline/utils.py b/tests/load/pipeline/utils.py index 14a0dfb550..dcf115f7e7 100644 --- a/tests/load/pipeline/utils.py +++ b/tests/load/pipeline/utils.py @@ -1,4 +1,4 @@ -from typing import Any, Iterator, List, Sequence, TYPE_CHECKING, Callable +from typing import Iterator, TYPE_CHECKING import pytest import dlt diff --git a/tests/pipeline/test_arrow_sources.py b/tests/pipeline/test_arrow_sources.py index 667f26476b..0724e54f54 100644 --- a/tests/pipeline/test_arrow_sources.py +++ b/tests/pipeline/test_arrow_sources.py @@ -17,6 +17,7 @@ arrow_table_all_data_types, prepare_shuffled_tables, ) +from tests.pipeline.utils import assert_only_table_columns, load_tables_to_dicts from tests.utils import ( preserve_environ, TPythonTableFormat, @@ -505,3 +506,45 @@ def test_empty_arrow(item_type: TPythonTableFormat) -> None: assert len(pipeline.list_extracted_resources()) == 1 norm_info = pipeline.normalize() assert norm_info.row_counts["items"] == 0 + + +def test_import_file_with_arrow_schema() -> None: + pipeline = dlt.pipeline( + pipeline_name="test_jsonl_import", + destination="duckdb", + full_refresh=True, + ) + + # Define the schema based on the CSV input + schema = pa.schema( + [ + ("id", pa.int64()), + ("name", pa.string()), + ("description", pa.string()), + ("ordered_at", pa.date32()), + ("price", pa.float64()), + ] + ) + + # Create empty arrays for each field + empty_arrays = [ + pa.array([], type=pa.int64()), + pa.array([], type=pa.string()), + pa.array([], type=pa.string()), + pa.array([], type=pa.date32()), + pa.array([], type=pa.float64()), + ] + + # Create an empty table with the defined schema + empty_table = pa.Table.from_arrays(empty_arrays, schema=schema) + + # columns should be created from empty table + import_file = "tests/load/cases/loading/header.jsonl" + info = pipeline.run( + [dlt.mark.with_file_import(empty_table, import_file, "jsonl", 2)], + table_name="no_header", + ) + info.raise_on_failed_jobs() + assert_only_table_columns(pipeline, "no_header", schema.names) + rows = load_tables_to_dicts(pipeline, "no_header") + assert len(rows["no_header"]) == 2 diff --git a/tests/pipeline/test_pipeline.py b/tests/pipeline/test_pipeline.py index 7c352d6844..7a122e70a0 100644 --- a/tests/pipeline/test_pipeline.py +++ b/tests/pipeline/test_pipeline.py @@ -7,7 +7,7 @@ import random import threading from time import sleep -from typing import Any, Tuple, cast +from typing import Any, List, Tuple, cast from tenacity import retry_if_exception, Retrying, stop_after_attempt import pytest @@ -19,6 +19,7 @@ from dlt.common.configuration.specs.aws_credentials import AwsCredentials from dlt.common.configuration.specs.exceptions import NativeValueError from dlt.common.configuration.specs.gcp_credentials import GcpOAuthCredentials +from dlt.common.data_writers.exceptions import FileImportNotFound, SpecLookupFailed from dlt.common.destination import DestinationCapabilitiesContext from dlt.common.destination.reference import WithStateSync from dlt.common.destination.exceptions import ( @@ -32,6 +33,7 @@ from dlt.common.exceptions import PipelineStateNotAvailable from dlt.common.pipeline import LoadInfo, PipelineContext from dlt.common.runtime.collector import LogCollector +from dlt.common.schema.typing import TColumnSchema from dlt.common.schema.utils import new_column, new_table from dlt.common.typing import DictStrAny from dlt.common.utils import uniq_id @@ -44,9 +46,11 @@ from dlt.extract import DltResource, DltSource from dlt.extract.extractors import MaterializedEmptyList from dlt.load.exceptions import LoadClientJobFailed +from dlt.normalize.exceptions import NormalizeJobFailed from dlt.pipeline.exceptions import InvalidPipelineName, PipelineNotActive, PipelineStepFailed from dlt.pipeline.helpers import retry_load +from dlt.pipeline.pipeline import Pipeline from tests.common.utils import TEST_SENTRY_DSN from tests.common.configuration.utils import environment from tests.utils import TEST_STORAGE_ROOT, skipifnotwindows @@ -55,7 +59,9 @@ assert_data_table_counts, assert_load_info, airtable_emojis, + assert_only_table_columns, load_data_table_counts, + load_tables_to_dicts, many_delayed, ) @@ -2262,3 +2268,136 @@ def test_data(): with client.execute_query(f"SELECT * FROM {pipeline.dataset_name}.staging_cleared") as cur: assert len(cur.fetchall()) == 3 + + +def test_import_jsonl_file() -> None: + pipeline = dlt.pipeline( + pipeline_name="test_jsonl_import", + destination="duckdb", + full_refresh=True, + ) + columns: List[TColumnSchema] = [ + {"name": "id", "data_type": "bigint", "nullable": False}, + {"name": "name", "data_type": "text"}, + {"name": "description", "data_type": "text"}, + {"name": "ordered_at", "data_type": "date"}, + {"name": "price", "data_type": "decimal"}, + ] + import_file = "tests/load/cases/loading/header.jsonl" + info = pipeline.run( + [dlt.mark.with_file_import([{"id": "IGNORED"}], import_file, "jsonl", 2)], + table_name="no_header", + loader_file_format="jsonl", + columns=columns, + ) + info.raise_on_failed_jobs() + print(info) + assert_imported_file(pipeline, "no_header", columns, 2) + + # use hints to infer + hints = dlt.mark.make_hints(columns=columns) + info = pipeline.run( + [dlt.mark.with_file_import([{"id": "IGNORED"}], import_file, "jsonl", 2, hints=hints)], + table_name="no_header_2", + ) + info.raise_on_failed_jobs() + assert_imported_file(pipeline, "no_header_2", columns, 2, expects_state=False) + + +def test_import_file_without_sniff_schema() -> None: + pipeline = dlt.pipeline( + pipeline_name="test_jsonl_import", + destination="duckdb", + full_refresh=True, + ) + import_file = "tests/load/cases/loading/header.jsonl" + info = pipeline.run( + [dlt.mark.with_file_import([{"id": "IGNORED"}], import_file, "jsonl", 2)], + table_name="no_header", + ) + assert info.has_failed_jobs + print(info) + + +def test_import_non_existing_file() -> None: + pipeline = dlt.pipeline( + pipeline_name="test_jsonl_import", + destination="duckdb", + full_refresh=True, + ) + # this file does not exist + import_file = "tests/load/cases/loading/X_header.jsonl" + with pytest.raises(PipelineStepFailed) as pip_ex: + pipeline.run( + [dlt.mark.with_file_import([{"id": "IGNORED"}], import_file, "jsonl", 2)], + table_name="no_header", + ) + inner_ex = pip_ex.value.__cause__ + assert isinstance(inner_ex, FileImportNotFound) + assert inner_ex.import_file_path == import_file + + +def test_import_unsupported_file_format() -> None: + pipeline = dlt.pipeline( + pipeline_name="test_jsonl_import", + destination="duckdb", + full_refresh=True, + ) + # this file does not exist + import_file = "tests/load/cases/loading/csv_no_header.csv" + with pytest.raises(PipelineStepFailed) as pip_ex: + pipeline.run( + [dlt.mark.with_file_import([{"id": "IGNORED"}], import_file, "csv", 2)], + table_name="no_header", + ) + inner_ex = pip_ex.value.__cause__ + assert isinstance(inner_ex, NormalizeJobFailed) + assert isinstance(inner_ex.__cause__, SpecLookupFailed) + + +def test_import_unknown_file_format() -> None: + pipeline = dlt.pipeline( + pipeline_name="test_jsonl_import", + destination="duckdb", + full_refresh=True, + ) + # this file does not exist + import_file = "tests/load/cases/loading/csv_no_header.csv" + with pytest.raises(PipelineStepFailed) as pip_ex: + pipeline.run( + [dlt.mark.with_file_import([{"id": "IGNORED"}], import_file, "unknown", 2)], # type: ignore[arg-type] + table_name="no_header", + ) + inner_ex = pip_ex.value.__cause__ + assert isinstance(inner_ex, NormalizeJobFailed) + # can't figure format from extension + assert isinstance(inner_ex.__cause__, ValueError) + + +def assert_imported_file( + pipeline: Pipeline, + table_name: str, + columns: List[TColumnSchema], + expected_rows: int, + expects_state: bool = True, +) -> None: + assert_only_table_columns(pipeline, table_name, [col["name"] for col in columns]) + rows = load_tables_to_dicts(pipeline, table_name) + assert len(rows[table_name]) == expected_rows + # we should have twp files loaded + jobs = pipeline.last_trace.last_load_info.load_packages[0].jobs["completed_jobs"] + job_extensions = [os.path.splitext(job.job_file_info.file_name())[1] for job in jobs] + assert ".jsonl" in job_extensions + if expects_state: + assert ".insert_values" in job_extensions + # check extract trace if jsonl is really there + extract_info = pipeline.last_trace.last_extract_info + jobs = extract_info.load_packages[0].jobs["new_jobs"] + # find jsonl job + jsonl_job = next(job for job in jobs if job.job_file_info.table_name == table_name) + assert jsonl_job.job_file_info.file_format == "jsonl" + # find metrics for table + assert ( + extract_info.metrics[extract_info.loads_ids[0]][0]["table_metrics"][table_name].items_count + == expected_rows + ) diff --git a/tests/pipeline/test_pipeline_extra.py b/tests/pipeline/test_pipeline_extra.py index 2a39865b8a..6ffc8c9707 100644 --- a/tests/pipeline/test_pipeline_extra.py +++ b/tests/pipeline/test_pipeline_extra.py @@ -470,6 +470,61 @@ def users(): assert set(table.schema.names) == {"id", "name", "_dlt_load_id", "_dlt_id"} +def test_resource_file_format() -> None: + os.environ["RESTORE_FROM_DESTINATION"] = "False" + + def jsonl_data(): + yield [ + { + "id": 1, + "name": "item", + "description": "value", + "ordered_at": "2024-04-12", + "price": 128.4, + }, + { + "id": 1, + "name": "item", + "description": "value with space", + "ordered_at": "2024-04-12", + "price": 128.4, + }, + ] + + # preferred file format will use destination preferred format + jsonl_preferred = dlt.resource(jsonl_data, file_format="preferred", name="jsonl_preferred") + assert jsonl_preferred.compute_table_schema()["file_format"] == "preferred" + + jsonl_r = dlt.resource(jsonl_data, file_format="jsonl", name="jsonl_r") + assert jsonl_r.compute_table_schema()["file_format"] == "jsonl" + + jsonl_pq = dlt.resource(jsonl_data, file_format="parquet", name="jsonl_pq") + assert jsonl_pq.compute_table_schema()["file_format"] == "parquet" + + info = dlt.pipeline("example", destination="duckdb").run([jsonl_preferred, jsonl_r, jsonl_pq]) + info.raise_on_failed_jobs() + # check file types on load jobs + load_jobs = { + job.job_file_info.table_name: job.job_file_info + for job in info.load_packages[0].jobs["completed_jobs"] + } + assert load_jobs["jsonl_r"].file_format == "jsonl" + assert load_jobs["jsonl_pq"].file_format == "parquet" + assert load_jobs["jsonl_preferred"].file_format == "insert_values" + + # test not supported format + csv_r = dlt.resource(jsonl_data, file_format="csv", name="csv_r") + assert csv_r.compute_table_schema()["file_format"] == "csv" + info = dlt.pipeline("example", destination="duckdb").run(csv_r) + info.raise_on_failed_jobs() + # fallback to preferred + load_jobs = { + job.job_file_info.table_name: job.job_file_info + for job in info.load_packages[0].jobs["completed_jobs"] + } + assert load_jobs["csv_r"].file_format == "insert_values" + + def test_pick_matching_file_format(test_storage: FileStorage) -> None: from dlt.destinations import filesystem From 46ec73219c8e701863c12e9972c9794a0293b003 Mon Sep 17 00:00:00 2001 From: Marcin Rudolf Date: Sat, 22 Jun 2024 00:47:30 +0200 Subject: [PATCH 083/105] docs hard links note --- docs/website/docs/general-usage/resource.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/docs/website/docs/general-usage/resource.md b/docs/website/docs/general-usage/resource.md index 8822410ca0..1439c7b5b3 100644 --- a/docs/website/docs/general-usage/resource.md +++ b/docs/website/docs/general-usage/resource.md @@ -539,7 +539,8 @@ on_error_continue=true You can sniff the schema from the data ie. using `duckdb` to infer the table schema from csv file. `dlt.mark.with_file_import` accepts additional arguments that you can use to pass hints at run time. :::note -If you do not define any columns, the table will not be created in the destination. `dlt` will still attempt to load data into it, so you create a fitting table upfront, the load process will succeed. +* If you do not define any columns, the table will not be created in the destination. `dlt` will still attempt to load data into it, so you create a fitting table upfront, the load process will succeed. +* Files are imported using hard links if possible. ::: ### Duplicate and rename resources From b00cbb289e95f68172253aed1652b63738a2890e Mon Sep 17 00:00:00 2001 From: Marcin Rudolf Date: Sun, 23 Jun 2024 14:05:23 +0200 Subject: [PATCH 084/105] moves loader parallelism test to pipeliens, solves duckdb ci test error issue --- tests/load/{ => pipeline}/test_parallelism.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename tests/load/{ => pipeline}/test_parallelism.py (100%) diff --git a/tests/load/test_parallelism.py b/tests/load/pipeline/test_parallelism.py similarity index 100% rename from tests/load/test_parallelism.py rename to tests/load/pipeline/test_parallelism.py From a5303453b501f6e464d55311bcefb3be1c0eb131 Mon Sep 17 00:00:00 2001 From: Marcin Rudolf Date: Sun, 23 Jun 2024 14:05:50 +0200 Subject: [PATCH 085/105] fixes tests --- dlt/destinations/impl/snowflake/snowflake.py | 5 +++-- dlt/normalize/worker.py | 10 +++++----- tests/load/pipeline/test_csv_loading.py | 8 ++++++-- tests/load/pipeline/test_merge_disposition.py | 2 ++ tests/load/pipeline/test_stage_loading.py | 7 ++++++- tests/load/snowflake/test_snowflake_configuration.py | 8 ++++---- 6 files changed, 26 insertions(+), 14 deletions(-) diff --git a/dlt/destinations/impl/snowflake/snowflake.py b/dlt/destinations/impl/snowflake/snowflake.py index a7c1a345c0..2a5671b7e7 100644 --- a/dlt/destinations/impl/snowflake/snowflake.py +++ b/dlt/destinations/impl/snowflake/snowflake.py @@ -174,8 +174,9 @@ def __init__( source_format = "( TYPE = 'JSON', BINARY_FORMAT = 'BASE64' )" if file_name.endswith("parquet"): source_format = ( - "(TYPE = 'PARQUET', BINARY_AS_TEXT = FALSE, USE_LOGICAL_TYPE = TRUE," - " USE_VECTORIZED_SCANNER = TRUE)" + "(TYPE = 'PARQUET', BINARY_AS_TEXT = FALSE, USE_LOGICAL_TYPE = TRUE)" + # TODO: USE_VECTORIZED_SCANNER inserts null strings into VARIANT JSON + # " USE_VECTORIZED_SCANNER = TRUE)" ) if file_name.endswith("csv"): # empty strings are NULL, no data is NULL, missing columns (ERROR_ON_COLUMN_COUNT_MISMATCH) are NULL diff --git a/dlt/normalize/worker.py b/dlt/normalize/worker.py index 7476066fe7..d5d4a028d9 100644 --- a/dlt/normalize/worker.py +++ b/dlt/normalize/worker.py @@ -122,17 +122,17 @@ def _get_items_normalizer( ) config_loader_file_format = config.loader_file_format - if table_schema.get("file_format"): + if file_format := table_schema.get("file_format"): # resource has a file format defined so use it - if table_schema["file_format"] == "preferred": + if file_format == "preferred": # use destination preferred config_loader_file_format = items_preferred_file_format else: # use resource format - config_loader_file_format = table_schema["file_format"] + config_loader_file_format = file_format logger.info( - f"A file format for table {table_name} was specified in the resource so" - f" {config_loader_file_format} format being used." + f"A file format for table {table_name} was specified to {file_format} in the" + f" resource so {config_loader_file_format} format being used." ) if config_loader_file_format and best_writer_spec is None: diff --git a/tests/load/pipeline/test_csv_loading.py b/tests/load/pipeline/test_csv_loading.py index 7e83978afc..c8bdfea3cb 100644 --- a/tests/load/pipeline/test_csv_loading.py +++ b/tests/load/pipeline/test_csv_loading.py @@ -30,8 +30,10 @@ def test_load_csv( ) -> None: os.environ["DATA_WRITER__DISABLE_COMPRESSION"] = "True" pipeline = destination_config.setup_pipeline("postgres_" + uniq_id(), full_refresh=True) - table, shuffled_table, shuffled_removed_column = prepare_shuffled_tables() + # do not save state so the state job is not created + pipeline.config.restore_from_destination = False + table, shuffled_table, shuffled_removed_column = prepare_shuffled_tables() # convert to pylist when loading from objects, this will kick the csv-reader in if item_type == "object": table, shuffled_table, shuffled_removed_column = ( @@ -100,7 +102,9 @@ def test_custom_csv_no_header( assert len(jobs) == 2 job_extensions = [os.path.splitext(job.job_file_info.file_name())[1] for job in jobs] assert ".csv" in job_extensions - assert ".insert_values" in job_extensions + # we allow state to be saved to make sure it is not in csv format (which would broke) + # the loading. state is always saved in destination preferred format + assert pipeline.destination.capabilities().preferred_loader_file_format in job_extensions @pytest.mark.parametrize( diff --git a/tests/load/pipeline/test_merge_disposition.py b/tests/load/pipeline/test_merge_disposition.py index a68e81ca97..732f0258c8 100644 --- a/tests/load/pipeline/test_merge_disposition.py +++ b/tests/load/pipeline/test_merge_disposition.py @@ -321,6 +321,8 @@ def test_merge_keys_non_existing_columns(destination_config: DestinationTestConf ) def test_pipeline_load_parquet(destination_config: DestinationTestConfiguration) -> None: p = destination_config.setup_pipeline("github_3", dev_mode=True) + # do not save state to destination so jobs counting is easier + p.config.restore_from_destination = False github_data = github() # generate some complex types github_data.max_table_nesting = 2 diff --git a/tests/load/pipeline/test_stage_loading.py b/tests/load/pipeline/test_stage_loading.py index e0e2154b57..42e67345ac 100644 --- a/tests/load/pipeline/test_stage_loading.py +++ b/tests/load/pipeline/test_stage_loading.py @@ -65,12 +65,17 @@ def test_staging_load(destination_config: DestinationTestConfiguration) -> None: ) == 4 ) + # pipeline state is loaded with preferred format, so allows (possibly) for two job formats + caps = pipeline.destination.capabilities() + # NOTE: preferred_staging_file_format goes first because here we test staged loading and + # default caps will be modified so preferred_staging_file_format is used as main + preferred_format = caps.preferred_staging_file_format or caps.preferred_loader_file_format assert ( len( [ x for x in package_info.jobs["completed_jobs"] - if x.job_file_info.file_format == destination_config.file_format + if x.job_file_info.file_format in (destination_config.file_format, preferred_format) ] ) == 4 diff --git a/tests/load/snowflake/test_snowflake_configuration.py b/tests/load/snowflake/test_snowflake_configuration.py index 691f0b5a64..10d93d104c 100644 --- a/tests/load/snowflake/test_snowflake_configuration.py +++ b/tests/load/snowflake/test_snowflake_configuration.py @@ -121,10 +121,10 @@ def test_only_authenticator() -> None: } -def test_no_query(environment) -> None: - c = SnowflakeCredentials("snowflake://user1:pass1@host1/db1") - assert str(c.to_url()) == "snowflake://user1:pass1@host1/db1" - print(c.to_url()) +# def test_no_query(environment) -> None: +# c = SnowflakeCredentials("snowflake://user1:pass1@host1/db1") +# assert str(c.to_url()) == "snowflake://user1:pass1@host1/db1" +# print(c.to_url()) def test_query_additional_params() -> None: From 4271895116058417741b241691ba35df885bcfb7 Mon Sep 17 00:00:00 2001 From: Marcin Rudolf Date: Sun, 23 Jun 2024 14:46:31 +0200 Subject: [PATCH 086/105] moves drop_pipeline fixture level up --- .../athena_iceberg/test_athena_iceberg.py | 8 +-- .../bigquery/test_bigquery_table_builder.py | 6 +- tests/load/conftest.py | 2 +- tests/load/duckdb/test_duckdb_client.py | 1 - tests/load/pipeline/conftest.py | 2 +- tests/load/pipeline/test_athena.py | 4 +- tests/load/pipeline/test_bigquery.py | 3 +- tests/load/pipeline/test_clickhouse.py | 5 +- tests/load/pipeline/test_csv_loading.py | 5 +- tests/load/pipeline/test_dbt_helper.py | 2 +- tests/load/pipeline/test_drop.py | 2 +- tests/load/pipeline/test_duckdb.py | 5 +- .../load/pipeline/test_filesystem_pipeline.py | 2 +- tests/load/pipeline/test_merge_disposition.py | 7 +- tests/load/pipeline/test_pipelines.py | 9 ++- tests/load/pipeline/test_postgres.py | 2 +- tests/load/pipeline/test_redshift.py | 2 +- .../load/pipeline/test_replace_disposition.py | 4 +- tests/load/pipeline/test_restore_state.py | 2 +- tests/load/pipeline/test_scd2.py | 5 +- tests/load/pipeline/test_stage_loading.py | 8 +-- .../test_write_disposition_changes.py | 2 +- tests/load/pipeline/utils.py | 68 ------------------- .../synapse/test_synapse_table_indexing.py | 3 - tests/load/test_insert_job_client.py | 8 ++- tests/load/test_job_client.py | 3 +- tests/load/test_sql_client.py | 8 ++- tests/load/utils.py | 55 +++++++++++++++ 28 files changed, 107 insertions(+), 126 deletions(-) diff --git a/tests/load/athena_iceberg/test_athena_iceberg.py b/tests/load/athena_iceberg/test_athena_iceberg.py index 4fe01752ee..0ef935a8bc 100644 --- a/tests/load/athena_iceberg/test_athena_iceberg.py +++ b/tests/load/athena_iceberg/test_athena_iceberg.py @@ -1,15 +1,9 @@ import pytest import os -import datetime # noqa: I251 from typing import Iterator, Any import dlt -from dlt.common import pendulum -from dlt.common.utils import uniq_id -from tests.cases import table_update_and_row, assert_all_data_types_row -from tests.pipeline.utils import assert_load_info, load_table_counts - -from tests.load.pipeline.utils import destinations_configs, DestinationTestConfiguration +from tests.pipeline.utils import load_table_counts from dlt.destinations.exceptions import DatabaseTerminalException diff --git a/tests/load/bigquery/test_bigquery_table_builder.py b/tests/load/bigquery/test_bigquery_table_builder.py index e6fa936da8..bae4ed9b59 100644 --- a/tests/load/bigquery/test_bigquery_table_builder.py +++ b/tests/load/bigquery/test_bigquery_table_builder.py @@ -30,12 +30,14 @@ from dlt.extract import DltResource -from tests.load.pipeline.utils import ( +from tests.load.utils import ( destinations_configs, DestinationTestConfiguration, drop_active_pipeline_data, + TABLE_UPDATE, + sequence_generator, + empty_schema, ) -from tests.load.utils import TABLE_UPDATE, sequence_generator, empty_schema # mark all tests as essential, do not remove pytestmark = pytest.mark.essential diff --git a/tests/load/conftest.py b/tests/load/conftest.py index fefaeee077..0ad21a6185 100644 --- a/tests/load/conftest.py +++ b/tests/load/conftest.py @@ -2,7 +2,7 @@ import pytest from typing import Iterator -from tests.load.utils import ALL_BUCKETS, DEFAULT_BUCKETS, WITH_GDRIVE_BUCKETS +from tests.load.utils import ALL_BUCKETS, DEFAULT_BUCKETS, WITH_GDRIVE_BUCKETS, drop_pipeline from tests.utils import preserve_environ diff --git a/tests/load/duckdb/test_duckdb_client.py b/tests/load/duckdb/test_duckdb_client.py index 8f6bf195e2..4e22d62ee6 100644 --- a/tests/load/duckdb/test_duckdb_client.py +++ b/tests/load/duckdb/test_duckdb_client.py @@ -15,7 +15,6 @@ from dlt.destinations.impl.duckdb.exceptions import InvalidInMemoryDuckdbCredentials from dlt.pipeline.exceptions import PipelineStepFailed -from tests.load.pipeline.utils import drop_pipeline from tests.pipeline.utils import assert_table from tests.utils import patch_home_dir, autouse_test_storage, preserve_environ, TEST_STORAGE_ROOT diff --git a/tests/load/pipeline/conftest.py b/tests/load/pipeline/conftest.py index 34227a8041..dd57af09f1 100644 --- a/tests/load/pipeline/conftest.py +++ b/tests/load/pipeline/conftest.py @@ -5,4 +5,4 @@ duckdb_pipeline_location, ) from tests.pipeline.utils import drop_dataset_from_env -from tests.load.pipeline.utils import drop_pipeline +from tests.load.utils import drop_pipeline diff --git a/tests/load/pipeline/test_athena.py b/tests/load/pipeline/test_athena.py index 4234b55fc1..921d8a083e 100644 --- a/tests/load/pipeline/test_athena.py +++ b/tests/load/pipeline/test_athena.py @@ -10,14 +10,14 @@ from tests.pipeline.utils import load_table_counts from dlt.destinations.exceptions import CantExtractTablePrefix from dlt.destinations.adapters import athena_partition, athena_adapter -from dlt.destinations.fs_client import FSClientBase -from tests.load.pipeline.utils import destinations_configs, DestinationTestConfiguration from tests.load.utils import ( TEST_FILE_LAYOUTS, FILE_LAYOUT_MANY_TABLES_ONE_FOLDER, FILE_LAYOUT_CLASSIC, FILE_LAYOUT_TABLE_NOT_FIRST, + destinations_configs, + DestinationTestConfiguration, ) # mark all tests as essential, do not remove diff --git a/tests/load/pipeline/test_bigquery.py b/tests/load/pipeline/test_bigquery.py index 68533a5d43..0618ff9d3d 100644 --- a/tests/load/pipeline/test_bigquery.py +++ b/tests/load/pipeline/test_bigquery.py @@ -3,8 +3,7 @@ from dlt.common import Decimal from tests.pipeline.utils import assert_load_info -from tests.load.pipeline.utils import destinations_configs, DestinationTestConfiguration -from tests.load.utils import delete_dataset +from tests.load.utils import destinations_configs, DestinationTestConfiguration # mark all tests as essential, do not remove pytestmark = pytest.mark.essential diff --git a/tests/load/pipeline/test_clickhouse.py b/tests/load/pipeline/test_clickhouse.py index 2ba5cfdcb8..b4ccfe3581 100644 --- a/tests/load/pipeline/test_clickhouse.py +++ b/tests/load/pipeline/test_clickhouse.py @@ -5,10 +5,7 @@ import dlt from dlt.common.typing import TDataItem from dlt.common.utils import uniq_id -from tests.load.pipeline.utils import ( - destinations_configs, - DestinationTestConfiguration, -) +from tests.load.utils import destinations_configs, DestinationTestConfiguration from tests.pipeline.utils import load_table_counts diff --git a/tests/load/pipeline/test_csv_loading.py b/tests/load/pipeline/test_csv_loading.py index c8bdfea3cb..b1054ae798 100644 --- a/tests/load/pipeline/test_csv_loading.py +++ b/tests/load/pipeline/test_csv_loading.py @@ -8,7 +8,6 @@ from dlt.common.typing import TLoaderFileFormat from dlt.common.utils import uniq_id -from tests.load.pipeline.utils import destinations_configs, DestinationTestConfiguration from tests.cases import arrow_table_all_data_types, prepare_shuffled_tables from tests.pipeline.utils import ( assert_data_table_counts, @@ -16,6 +15,7 @@ assert_only_table_columns, load_tables_to_dicts, ) +from tests.load.utils import destinations_configs, DestinationTestConfiguration from tests.utils import TestDataItemFormat @@ -104,7 +104,8 @@ def test_custom_csv_no_header( assert ".csv" in job_extensions # we allow state to be saved to make sure it is not in csv format (which would broke) # the loading. state is always saved in destination preferred format - assert pipeline.destination.capabilities().preferred_loader_file_format in job_extensions + preferred_ext = "." + pipeline.destination.capabilities().preferred_loader_file_format + assert preferred_ext in job_extensions @pytest.mark.parametrize( diff --git a/tests/load/pipeline/test_dbt_helper.py b/tests/load/pipeline/test_dbt_helper.py index 1dc225594f..86ee1a646e 100644 --- a/tests/load/pipeline/test_dbt_helper.py +++ b/tests/load/pipeline/test_dbt_helper.py @@ -11,8 +11,8 @@ from dlt.helpers.dbt.exceptions import DBTProcessingError, PrerequisitesException from tests.pipeline.utils import select_data +from tests.load.utils import destinations_configs, DestinationTestConfiguration from tests.utils import ACTIVE_SQL_DESTINATIONS -from tests.load.pipeline.utils import destinations_configs, DestinationTestConfiguration # uncomment add motherduck tests # NOTE: the tests are passing but we disable them due to frequent ATTACH DATABASE timeouts diff --git a/tests/load/pipeline/test_drop.py b/tests/load/pipeline/test_drop.py index acc3bcc68c..ea279fd11d 100644 --- a/tests/load/pipeline/test_drop.py +++ b/tests/load/pipeline/test_drop.py @@ -17,7 +17,7 @@ ) from dlt.destinations.job_client_impl import SqlJobClientBase -from tests.load.pipeline.utils import destinations_configs, DestinationTestConfiguration +from tests.load.utils import destinations_configs, DestinationTestConfiguration def _attach(pipeline: Pipeline) -> Pipeline: diff --git a/tests/load/pipeline/test_duckdb.py b/tests/load/pipeline/test_duckdb.py index a32ec82aa4..80c6b861ee 100644 --- a/tests/load/pipeline/test_duckdb.py +++ b/tests/load/pipeline/test_duckdb.py @@ -7,11 +7,8 @@ from dlt.pipeline.exceptions import PipelineStepFailed from tests.cases import TABLE_UPDATE_ALL_INT_PRECISIONS, TABLE_UPDATE_ALL_TIMESTAMP_PRECISIONS +from tests.load.utils import destinations_configs, DestinationTestConfiguration from tests.pipeline.utils import airtable_emojis, load_table_counts -from tests.load.pipeline.utils import ( - destinations_configs, - DestinationTestConfiguration, -) @pytest.mark.parametrize( diff --git a/tests/load/pipeline/test_filesystem_pipeline.py b/tests/load/pipeline/test_filesystem_pipeline.py index efbdc082f1..19132d2a8e 100644 --- a/tests/load/pipeline/test_filesystem_pipeline.py +++ b/tests/load/pipeline/test_filesystem_pipeline.py @@ -20,7 +20,7 @@ from tests.common.utils import load_json_case from tests.utils import ALL_TEST_DATA_ITEM_FORMATS, TestDataItemFormat, skip_if_not_active from dlt.destinations.path_utils import create_path -from tests.load.pipeline.utils import ( +from tests.load.utils import ( destinations_configs, DestinationTestConfiguration, ) diff --git a/tests/load/pipeline/test_merge_disposition.py b/tests/load/pipeline/test_merge_disposition.py index 732f0258c8..ffebb21797 100644 --- a/tests/load/pipeline/test_merge_disposition.py +++ b/tests/load/pipeline/test_merge_disposition.py @@ -18,9 +18,12 @@ from dlt.sources.helpers.transform import skip_first, take_first from dlt.pipeline.exceptions import PipelineStepFailed -from tests.load.utils import normalize_storage_table_cols from tests.pipeline.utils import assert_load_info, load_table_counts, select_data -from tests.load.pipeline.utils import destinations_configs, DestinationTestConfiguration +from tests.load.utils import ( + normalize_storage_table_cols, + destinations_configs, + DestinationTestConfiguration, +) # uncomment add motherduck tests # NOTE: the tests are passing but we disable them due to frequent ATTACH DATABASE timeouts diff --git a/tests/load/pipeline/test_pipelines.py b/tests/load/pipeline/test_pipelines.py index 8c4b8cec29..a12c29168f 100644 --- a/tests/load/pipeline/test_pipelines.py +++ b/tests/load/pipeline/test_pipelines.py @@ -27,7 +27,7 @@ PipelineStepFailed, ) -from tests.utils import TEST_STORAGE_ROOT, data_to_item_format, preserve_environ +from tests.utils import TEST_STORAGE_ROOT, data_to_item_format from tests.pipeline.utils import ( assert_data_table_counts, assert_load_info, @@ -41,12 +41,11 @@ TABLE_UPDATE_COLUMNS_SCHEMA, assert_all_data_types_row, delete_dataset, -) -from tests.load.pipeline.utils import ( drop_active_pipeline_data, - REPLACE_STRATEGIES, + destinations_configs, + DestinationTestConfiguration, ) -from tests.load.pipeline.utils import destinations_configs, DestinationTestConfiguration +from tests.load.pipeline.utils import REPLACE_STRATEGIES # mark all tests as essential, do not remove pytestmark = pytest.mark.essential diff --git a/tests/load/pipeline/test_postgres.py b/tests/load/pipeline/test_postgres.py index 045aadfe98..81b729eefa 100644 --- a/tests/load/pipeline/test_postgres.py +++ b/tests/load/pipeline/test_postgres.py @@ -6,7 +6,7 @@ from dlt.common.utils import uniq_id -from tests.load.pipeline.utils import destinations_configs, DestinationTestConfiguration +from tests.load.utils import destinations_configs, DestinationTestConfiguration from tests.pipeline.utils import assert_load_info, load_tables_to_dicts from tests.utils import TestDataItemFormat diff --git a/tests/load/pipeline/test_redshift.py b/tests/load/pipeline/test_redshift.py index 29293693f5..bfdc15459c 100644 --- a/tests/load/pipeline/test_redshift.py +++ b/tests/load/pipeline/test_redshift.py @@ -4,7 +4,7 @@ import dlt from dlt.common.utils import uniq_id -from tests.load.pipeline.utils import destinations_configs, DestinationTestConfiguration +from tests.load.utils import destinations_configs, DestinationTestConfiguration from tests.cases import table_update_and_row, assert_all_data_types_row from tests.pipeline.utils import assert_load_info diff --git a/tests/load/pipeline/test_replace_disposition.py b/tests/load/pipeline/test_replace_disposition.py index 464b5aea1f..12bc69abe0 100644 --- a/tests/load/pipeline/test_replace_disposition.py +++ b/tests/load/pipeline/test_replace_disposition.py @@ -4,12 +4,12 @@ from dlt.common.utils import uniq_id from tests.pipeline.utils import assert_load_info, load_table_counts, load_tables_to_dicts -from tests.load.pipeline.utils import ( +from tests.load.utils import ( drop_active_pipeline_data, destinations_configs, DestinationTestConfiguration, - REPLACE_STRATEGIES, ) +from tests.load.pipeline.utils import REPLACE_STRATEGIES @pytest.mark.essential diff --git a/tests/load/pipeline/test_restore_state.py b/tests/load/pipeline/test_restore_state.py index 6ddc43bab2..b8770f6deb 100644 --- a/tests/load/pipeline/test_restore_state.py +++ b/tests/load/pipeline/test_restore_state.py @@ -25,12 +25,12 @@ from tests.cases import JSON_TYPED_DICT, JSON_TYPED_DICT_DECODED from tests.common.utils import IMPORTED_VERSION_HASH_ETH_V9, yml_case_path as common_yml_case_path from tests.common.configuration.utils import environment -from tests.load.pipeline.utils import drop_active_pipeline_data from tests.pipeline.utils import assert_query_data from tests.load.utils import ( destinations_configs, DestinationTestConfiguration, get_normalized_dataset_name, + drop_active_pipeline_data, ) diff --git a/tests/load/pipeline/test_scd2.py b/tests/load/pipeline/test_scd2.py index e8baa33ff3..4cbe47e960 100644 --- a/tests/load/pipeline/test_scd2.py +++ b/tests/load/pipeline/test_scd2.py @@ -17,12 +17,11 @@ from dlt.pipeline.exceptions import PipelineStepFailed from tests.cases import arrow_table_all_data_types -from tests.pipeline.utils import assert_load_info, load_table_counts -from tests.load.pipeline.utils import ( +from tests.load.utils import ( destinations_configs, DestinationTestConfiguration, ) -from tests.pipeline.utils import load_tables_to_dicts +from tests.pipeline.utils import load_tables_to_dicts, assert_load_info, load_table_counts from tests.utils import TPythonTableFormat diff --git a/tests/load/pipeline/test_stage_loading.py b/tests/load/pipeline/test_stage_loading.py index 42e67345ac..7f1427f20f 100644 --- a/tests/load/pipeline/test_stage_loading.py +++ b/tests/load/pipeline/test_stage_loading.py @@ -8,15 +8,13 @@ from dlt.common.schema.typing import TDataType from tests.load.pipeline.test_merge_disposition import github -from tests.pipeline.utils import load_table_counts -from tests.pipeline.utils import assert_load_info +from tests.pipeline.utils import load_table_counts, assert_load_info from tests.load.utils import ( - TABLE_ROW_ALL_DATA_TYPES, - TABLE_UPDATE_COLUMNS_SCHEMA, + destinations_configs, + DestinationTestConfiguration, assert_all_data_types_row, ) from tests.cases import table_update_and_row -from tests.load.pipeline.utils import destinations_configs, DestinationTestConfiguration @dlt.resource( diff --git a/tests/load/pipeline/test_write_disposition_changes.py b/tests/load/pipeline/test_write_disposition_changes.py index b1703b4339..ba2f6bf172 100644 --- a/tests/load/pipeline/test_write_disposition_changes.py +++ b/tests/load/pipeline/test_write_disposition_changes.py @@ -1,7 +1,7 @@ import pytest import dlt from typing import Any -from tests.load.pipeline.utils import ( +from tests.load.utils import ( destinations_configs, DestinationTestConfiguration, ) diff --git a/tests/load/pipeline/utils.py b/tests/load/pipeline/utils.py index dcf115f7e7..679c2d6da9 100644 --- a/tests/load/pipeline/utils.py +++ b/tests/load/pipeline/utils.py @@ -1,69 +1 @@ -from typing import Iterator, TYPE_CHECKING -import pytest - -import dlt -from dlt.common.destination.reference import WithStagingDataset - -from dlt.common.configuration.container import Container -from dlt.common.pipeline import LoadInfo, PipelineContext - -from tests.load.utils import DestinationTestConfiguration, destinations_configs -from dlt.destinations.exceptions import CantExtractTablePrefix - -if TYPE_CHECKING: - from dlt.destinations.impl.filesystem.filesystem import FilesystemClient - REPLACE_STRATEGIES = ["truncate-and-insert", "insert-from-staging", "staging-optimized"] - - -@pytest.fixture(autouse=True) -def drop_pipeline(request) -> Iterator[None]: - yield - if "no_load" in request.keywords: - return - try: - drop_active_pipeline_data() - except CantExtractTablePrefix: - # for some tests we test that this exception is raised, - # so we suppress it here - pass - - -def drop_active_pipeline_data() -> None: - """Drops all the datasets for currently active pipeline, wipes the working folder and then deactivated it.""" - if Container()[PipelineContext].is_active(): - try: - # take existing pipeline - p = dlt.pipeline() - - def _drop_dataset(schema_name: str) -> None: - with p.destination_client(schema_name) as client: - try: - client.drop_storage() - print("dropped") - except Exception as exc: - print(exc) - if isinstance(client, WithStagingDataset): - with client.with_staging_dataset(): - try: - client.drop_storage() - print("staging dropped") - except Exception as exc: - print(exc) - - # drop_func = _drop_dataset_fs if _is_filesystem(p) else _drop_dataset_sql - # take all schemas and if destination was set - if p.destination: - if p.config.use_single_dataset: - # drop just the dataset for default schema - if p.default_schema_name: - _drop_dataset(p.default_schema_name) - else: - # for each schema, drop the dataset - for schema_name in p.schema_names: - _drop_dataset(schema_name) - - # p._wipe_working_folder() - finally: - # always deactivate context, working directory will be wiped when the next test starts - Container()[PipelineContext].deactivate() diff --git a/tests/load/synapse/test_synapse_table_indexing.py b/tests/load/synapse/test_synapse_table_indexing.py index b3a077dd7f..d877b769cc 100644 --- a/tests/load/synapse/test_synapse_table_indexing.py +++ b/tests/load/synapse/test_synapse_table_indexing.py @@ -9,9 +9,6 @@ from dlt.destinations.impl.synapse.synapse_adapter import TTableIndexType from tests.load.utils import TABLE_UPDATE, TABLE_ROW_ALL_DATA_TYPES -from tests.load.pipeline.utils import ( - drop_pipeline, -) # this import ensures all test data gets removed from tests.load.synapse.utils import get_storage_table_index_type # mark all tests as essential, do not remove diff --git a/tests/load/test_insert_job_client.py b/tests/load/test_insert_job_client.py index 57c3947cca..38155a8b09 100644 --- a/tests/load/test_insert_job_client.py +++ b/tests/load/test_insert_job_client.py @@ -11,8 +11,12 @@ from dlt.destinations.insert_job_client import InsertValuesJobClient from tests.utils import TEST_STORAGE_ROOT, skipifpypy -from tests.load.utils import expect_load_file, prepare_table, yield_client_with_storage -from tests.load.pipeline.utils import destinations_configs +from tests.load.utils import ( + expect_load_file, + prepare_table, + yield_client_with_storage, + destinations_configs, +) DEFAULT_SUBSET = ["duckdb", "redshift", "postgres", "mssql", "synapse", "motherduck"] diff --git a/tests/load/test_job_client.py b/tests/load/test_job_client.py index 04a1175180..9360ef274f 100644 --- a/tests/load/test_job_client.py +++ b/tests/load/test_job_client.py @@ -42,8 +42,9 @@ write_dataset, prepare_table, normalize_storage_table_cols, + destinations_configs, + DestinationTestConfiguration, ) -from tests.load.pipeline.utils import destinations_configs, DestinationTestConfiguration @pytest.fixture diff --git a/tests/load/test_sql_client.py b/tests/load/test_sql_client.py index 7fb7cc26c4..d9049509da 100644 --- a/tests/load/test_sql_client.py +++ b/tests/load/test_sql_client.py @@ -22,8 +22,12 @@ from dlt.common.time import ensure_pendulum_datetime from tests.utils import TEST_STORAGE_ROOT, autouse_test_storage -from tests.load.utils import yield_client_with_storage, prepare_table, AWS_BUCKET -from tests.load.pipeline.utils import destinations_configs +from tests.load.utils import ( + yield_client_with_storage, + prepare_table, + AWS_BUCKET, + destinations_configs, +) @pytest.fixture diff --git a/tests/load/utils.py b/tests/load/utils.py index d56f36b132..a83066b76b 100644 --- a/tests/load/utils.py +++ b/tests/load/utils.py @@ -24,6 +24,7 @@ from dlt.common.destination import TLoaderFileFormat, Destination from dlt.common.destination.reference import DEFAULT_FILE_LAYOUT from dlt.common.data_writers import DataWriter +from dlt.common.pipeline import PipelineContext from dlt.common.schema import TTableSchemaColumns, Schema from dlt.common.storages import SchemaStorage, FileStorage, SchemaStorageConfiguration from dlt.common.schema.utils import new_table, normalize_table_identifiers @@ -31,6 +32,7 @@ from dlt.common.typing import StrAny from dlt.common.utils import uniq_id +from dlt.destinations.exceptions import CantExtractTablePrefix from dlt.destinations.sql_client import SqlClientBase from dlt.destinations.job_client_impl import SqlJobClientBase @@ -489,6 +491,59 @@ def destinations_configs( return destination_configs +@pytest.fixture(autouse=True) +def drop_pipeline(request) -> Iterator[None]: + yield + if "no_load" in request.keywords: + return + try: + drop_active_pipeline_data() + except CantExtractTablePrefix: + # for some tests we test that this exception is raised, + # so we suppress it here + pass + + +def drop_active_pipeline_data() -> None: + """Drops all the datasets for currently active pipeline, wipes the working folder and then deactivated it.""" + if Container()[PipelineContext].is_active(): + try: + # take existing pipeline + p = dlt.pipeline() + + def _drop_dataset(schema_name: str) -> None: + with p.destination_client(schema_name) as client: + try: + client.drop_storage() + print("dropped") + except Exception as exc: + print(exc) + if isinstance(client, WithStagingDataset): + with client.with_staging_dataset(): + try: + client.drop_storage() + print("staging dropped") + except Exception as exc: + print(exc) + + # drop_func = _drop_dataset_fs if _is_filesystem(p) else _drop_dataset_sql + # take all schemas and if destination was set + if p.destination: + if p.config.use_single_dataset: + # drop just the dataset for default schema + if p.default_schema_name: + _drop_dataset(p.default_schema_name) + else: + # for each schema, drop the dataset + for schema_name in p.schema_names: + _drop_dataset(schema_name) + + # p._wipe_working_folder() + finally: + # always deactivate context, working directory will be wiped when the next test starts + Container()[PipelineContext].deactivate() + + @pytest.fixture def empty_schema() -> Schema: schema = Schema("event") From abd02df4e4564db71940724b5408218506c5bdbb Mon Sep 17 00:00:00 2001 From: Marcin Rudolf Date: Mon, 24 Jun 2024 23:06:06 +0200 Subject: [PATCH 087/105] drops default naming convention from caps so naming in saved schema persists, allows (section, , schema) config section for schema settings --- dlt/common/destination/capabilities.py | 3 +- dlt/common/normalizers/naming/sql_cs_v1.py | 2 + dlt/common/normalizers/utils.py | 19 +++- dlt/common/schema/schema.py | 4 +- .../normalizers/test_import_normalizers.py | 11 +++ .../schema/test_normalize_identifiers.py | 87 ++++++++++++++++++- 6 files changed, 118 insertions(+), 8 deletions(-) diff --git a/dlt/common/destination/capabilities.py b/dlt/common/destination/capabilities.py index b198d256e7..f28065782a 100644 --- a/dlt/common/destination/capabilities.py +++ b/dlt/common/destination/capabilities.py @@ -73,7 +73,8 @@ class DestinationCapabilitiesContext(ContainerInjectableContext): is_max_text_data_type_length_in_bytes: bool = None supports_transactions: bool = None supports_ddl_transactions: bool = None - naming_convention: Union[str, NamingConvention] = "snake_case" + # use naming convention in the schema + naming_convention: Union[str, NamingConvention] = None alter_add_multi_column: bool = True supports_truncate_command: bool = True schema_supports_numeric_precision: bool = True diff --git a/dlt/common/normalizers/naming/sql_cs_v1.py b/dlt/common/normalizers/naming/sql_cs_v1.py index f83e4259a5..93b93bbc89 100644 --- a/dlt/common/normalizers/naming/sql_cs_v1.py +++ b/dlt/common/normalizers/naming/sql_cs_v1.py @@ -2,6 +2,8 @@ from dlt.common.normalizers.naming.naming import NamingConvention as BaseNamingConvention +# TODO: not yet finished + class NamingConvention(BaseNamingConvention): PATH_SEPARATOR = "__" diff --git a/dlt/common/normalizers/utils.py b/dlt/common/normalizers/utils.py index a13b517844..49751980ff 100644 --- a/dlt/common/normalizers/utils.py +++ b/dlt/common/normalizers/utils.py @@ -1,9 +1,10 @@ import inspect from importlib import import_module -from typing import Any, Type, Tuple, Union, cast, List +from typing import Any, Dict, Optional, Type, Tuple, Union, cast, List import dlt from dlt.common.configuration.inject import with_config +from dlt.common.configuration.specs import known_sections from dlt.common.destination import DestinationCapabilitiesContext from dlt.common.normalizers.configuration import NormalizersConfiguration from dlt.common.normalizers.json import SupportsDataItemNormalizer, DataItemNormalizer @@ -16,13 +17,25 @@ DLT_ID_LENGTH_BYTES = 10 -@with_config(spec=NormalizersConfiguration) +def _section_for_schema(kwargs: Dict[str, Any]) -> Tuple[str, ...]: + """Uses the schema name to generate dynamic section normalizer settings""" + if schema_name := kwargs.get("schema_name"): + return (known_sections.SOURCES, schema_name) + else: + return (known_sections.SOURCES,) + + +@with_config(spec=NormalizersConfiguration, sections=_section_for_schema) # type: ignore[call-overload] def explicit_normalizers( naming: Union[str, NamingConvention] = dlt.config.value, json_normalizer: TJSONNormalizer = dlt.config.value, allow_identifier_change_on_table_with_data: bool = None, + schema_name: Optional[str] = None, ) -> TNormalizersConfig: - """Gets explicitly configured normalizers - via config or destination caps. May return None as naming or normalizer""" + """Gets explicitly configured normalizers - via config or destination caps. May return None as naming or normalizer + + If `schema_name` is present, a section ("sources", schema_name, "schema") is used to inject the config + """ norm_conf: TNormalizersConfig = {"names": naming, "json": json_normalizer} if allow_identifier_change_on_table_with_data is not None: norm_conf["allow_identifier_change_on_table_with_data"] = ( diff --git a/dlt/common/schema/schema.py b/dlt/common/schema/schema.py index 1ee1013127..fd0521cc14 100644 --- a/dlt/common/schema/schema.py +++ b/dlt/common/schema/schema.py @@ -732,7 +732,7 @@ def update_normalizers(self) -> None: Default hints, preferred data types and normalize configs (ie. column propagation) are normalized as well. Regexes are included as long as textual parts can be extracted from an expression. """ - normalizers = explicit_normalizers() + normalizers = explicit_normalizers(schema_name=self._schema_name) # set the current values as defaults normalizers["names"] = normalizers["names"] or self._normalizers_config["names"] normalizers["json"] = normalizers["json"] or self._normalizers_config["json"] @@ -1086,7 +1086,7 @@ def _reset_schema(self, name: str, normalizers: TNormalizersConfig = None) -> No self._add_standard_hints() # configure normalizers, including custom config if present if not normalizers: - normalizers = explicit_normalizers() + normalizers = explicit_normalizers(schema_name=self._schema_name) self._configure_normalizers(normalizers) # add version tables self._add_standard_tables() diff --git a/tests/common/normalizers/test_import_normalizers.py b/tests/common/normalizers/test_import_normalizers.py index 13bd1fcd3c..85bb8ca1cc 100644 --- a/tests/common/normalizers/test_import_normalizers.py +++ b/tests/common/normalizers/test_import_normalizers.py @@ -63,6 +63,17 @@ def test_import_normalizers() -> None: assert json_normalizer is CustomRelationalNormalizer +@pytest.mark.parametrize("sections", ("", "SOURCES__", "SOURCES__TEST_SCHEMA__")) +def test_config_sections(sections: str) -> None: + os.environ[f"{sections}SCHEMA__NAMING"] = "direct" + os.environ[f"{sections}SCHEMA__JSON_NORMALIZER"] = ( + '{"module": "tests.common.normalizers.custom_normalizers"}' + ) + config, _, _ = import_normalizers(explicit_normalizers(schema_name="test_schema")) + assert config["names"] == "direct" + assert config["json"] == {"module": "tests.common.normalizers.custom_normalizers"} + + def test_import_normalizers_with_caps() -> None: # gets the naming convention from capabilities destination_caps = DestinationCapabilitiesContext.generic_capabilities() diff --git a/tests/common/schema/test_normalize_identifiers.py b/tests/common/schema/test_normalize_identifiers.py index 2b7599e65e..b71977a5fd 100644 --- a/tests/common/schema/test_normalize_identifiers.py +++ b/tests/common/schema/test_normalize_identifiers.py @@ -72,18 +72,20 @@ def test_save_load_incomplete_column( def test_schema_config_normalizers(schema: Schema, schema_storage_no_import: SchemaStorage) -> None: # save snake case schema + assert schema._normalizers_config["names"] == "snake_case" schema_storage_no_import.save_schema(schema) # config direct naming convention os.environ["SCHEMA__NAMING"] = "direct" # new schema has direct naming convention schema_direct_nc = Schema("direct_naming") + schema_storage_no_import.save_schema(schema_direct_nc) assert schema_direct_nc._normalizers_config["names"] == "direct" # still after loading the config is "snake" schema = schema_storage_no_import.load_schema(schema.name) assert schema._normalizers_config["names"] == "snake_case" # provide capabilities context destination_caps = DestinationCapabilitiesContext.generic_capabilities() - destination_caps.naming_convention = "snake_case" + destination_caps.naming_convention = "sql_cs_v1" destination_caps.max_identifier_length = 127 with Container().injectable_context(destination_caps): # caps are ignored if schema is configured @@ -91,10 +93,91 @@ def test_schema_config_normalizers(schema: Schema, schema_storage_no_import: Sch assert schema_direct_nc._normalizers_config["names"] == "direct" # but length is there assert schema_direct_nc.naming.max_length == 127 - # also for loaded schema + # when loading schema configuration is ignored schema = schema_storage_no_import.load_schema(schema.name) assert schema._normalizers_config["names"] == "snake_case" assert schema.naming.max_length == 127 + # but if we ask to update normalizers config schema is applied + schema.update_normalizers() + assert schema._normalizers_config["names"] == "direct" + + # load schema_direct_nc (direct) + schema_direct_nc = schema_storage_no_import.load_schema(schema_direct_nc.name) + assert schema_direct_nc._normalizers_config["names"] == "direct" + + # drop config + del os.environ["SCHEMA__NAMING"] + schema_direct_nc = schema_storage_no_import.load_schema(schema_direct_nc.name) + assert schema_direct_nc._normalizers_config["names"] == "direct" + + +def test_schema_normalizers_no_config( + schema: Schema, schema_storage_no_import: SchemaStorage +) -> None: + # convert schema to direct and save + os.environ["SCHEMA__NAMING"] = "direct" + schema.update_normalizers() + assert schema._normalizers_config["names"] == "direct" + schema_storage_no_import.save_schema(schema) + # make sure we drop the config correctly + del os.environ["SCHEMA__NAMING"] + schema_test = Schema("test") + assert schema_test.naming.name() == "snake_case" + # use capabilities without default naming convention + destination_caps = DestinationCapabilitiesContext.generic_capabilities() + assert destination_caps.naming_convention is None + destination_caps.max_identifier_length = 66 + with Container().injectable_context(destination_caps): + schema_in_caps = Schema("schema_in_caps") + assert schema_in_caps._normalizers_config["names"] == "snake_case" + assert schema_in_caps.naming.name() == "snake_case" + assert schema_in_caps.naming.max_length == 66 + schema_in_caps.update_normalizers() + assert schema_in_caps.naming.name() == "snake_case" + # old schema preserves convention when loaded + schema = schema_storage_no_import.load_schema(schema.name) + assert schema._normalizers_config["names"] == "direct" + # update normalizer no effect + schema.update_normalizers() + assert schema._normalizers_config["names"] == "direct" + assert schema.naming.max_length == 66 + + # use caps with default naming convention + destination_caps = DestinationCapabilitiesContext.generic_capabilities() + destination_caps.naming_convention = "sql_cs_v1" + destination_caps.max_identifier_length = 127 + with Container().injectable_context(destination_caps): + schema_in_caps = Schema("schema_in_caps") + # new schema gets convention from caps + assert schema_in_caps._normalizers_config["names"] == "sql_cs_v1" + # old schema preserves convention when loaded + schema = schema_storage_no_import.load_schema(schema.name) + assert schema._normalizers_config["names"] == "direct" + # update changes to caps schema + schema.update_normalizers() + assert schema._normalizers_config["names"] == "sql_cs_v1" + assert schema.naming.max_length == 127 + + +@pytest.mark.parametrize("section", ("SOURCES__SCHEMA__NAMING", "SOURCES__THIS__SCHEMA__NAMING")) +def test_config_with_section(section: str) -> None: + os.environ["SOURCES__OTHER__SCHEMA__NAMING"] = "direct" + os.environ[section] = "sql_cs_v1" + this_schema = Schema("this") + that_schema = Schema("that") + assert this_schema.naming.name() == "sql_cs_v1" + expected_that_schema = ( + "snake_case" if section == "SOURCES__THIS__SCHEMA__NAMING" else "sql_cs_v1" + ) + assert that_schema.naming.name() == expected_that_schema + + # test update normalizers + os.environ[section] = "direct" + expected_that_schema = "snake_case" if section == "SOURCES__THIS__SCHEMA__NAMING" else "direct" + this_schema.update_normalizers() + assert this_schema.naming.name() == "direct" + that_schema.update_normalizers() + assert that_schema.naming.name() == expected_that_schema def test_normalize_table_identifiers() -> None: From 14b4b0eba155da3fd1b85052d59738265fbc6ad6 Mon Sep 17 00:00:00 2001 From: Marcin Rudolf Date: Mon, 24 Jun 2024 23:09:59 +0200 Subject: [PATCH 088/105] unifies all representations of pipeline state --- dlt/common/destination/reference.py | 16 ++- dlt/common/storages/fsspec_filesystem.py | 1 - dlt/common/storages/load_package.py | 4 +- .../impl/filesystem/filesystem.py | 9 +- dlt/destinations/impl/qdrant/qdrant_client.py | 1 - .../impl/weaviate/weaviate_client.py | 1 - dlt/destinations/job_client_impl.py | 10 +- dlt/extract/decorators.py | 11 +- dlt/pipeline/pipeline.py | 4 +- dlt/pipeline/state_sync.py | 21 ++-- tests/pipeline/test_dlt_versions.py | 109 ++++++++++++++---- tests/pipeline/test_pipeline_state.py | 59 +++++++++- 12 files changed, 186 insertions(+), 60 deletions(-) diff --git a/dlt/common/destination/reference.py b/dlt/common/destination/reference.py index 776a0f9a6f..90f89b85d7 100644 --- a/dlt/common/destination/reference.py +++ b/dlt/common/destination/reference.py @@ -45,7 +45,7 @@ from dlt.common.schema.exceptions import UnknownTableException from dlt.common.storages import FileStorage from dlt.common.storages.load_storage import ParsedLoadJobFileName -from dlt.common.storages.load_package import LoadJobInfo +from dlt.common.storages.load_package import LoadJobInfo, TPipelineStateDoc TLoaderReplaceStrategy = Literal["truncate-and-insert", "insert-from-staging", "staging-optimized"] TDestinationConfig = TypeVar("TDestinationConfig", bound="DestinationClientConfiguration") @@ -64,13 +64,23 @@ class StorageSchemaInfo(NamedTuple): schema: str -class StateInfo(NamedTuple): +@dataclasses.dataclass +class StateInfo: version: int engine_version: int pipeline_name: str state: str created_at: datetime.datetime - dlt_load_id: str + version_hash: Optional[str] = None + _dlt_load_id: Optional[str] = None + + def as_doc(self) -> TPipelineStateDoc: + doc: TPipelineStateDoc = dataclasses.asdict(self) # type: ignore[assignment] + if self._dlt_load_id is None: + doc.pop("_dlt_load_id") + if self.version_hash is None: + doc.pop("version_hash") + return doc @configspec diff --git a/dlt/common/storages/fsspec_filesystem.py b/dlt/common/storages/fsspec_filesystem.py index a21f0f2c0c..f419baed03 100644 --- a/dlt/common/storages/fsspec_filesystem.py +++ b/dlt/common/storages/fsspec_filesystem.py @@ -5,7 +5,6 @@ import pathlib import posixpath from io import BytesIO -from gzip import GzipFile from typing import ( Literal, cast, diff --git a/dlt/common/storages/load_package.py b/dlt/common/storages/load_package.py index f29a3f4446..9e3185221d 100644 --- a/dlt/common/storages/load_package.py +++ b/dlt/common/storages/load_package.py @@ -67,9 +67,9 @@ class TPipelineStateDoc(TypedDict, total=False): engine_version: int pipeline_name: str state: str - version_hash: str created_at: datetime.datetime - dlt_load_id: NotRequired[str] + version_hash: str + _dlt_load_id: NotRequired[str] class TLoadPackageState(TVersionedState, total=False): diff --git a/dlt/destinations/impl/filesystem/filesystem.py b/dlt/destinations/impl/filesystem/filesystem.py index ef438de9a0..6757399e98 100644 --- a/dlt/destinations/impl/filesystem/filesystem.py +++ b/dlt/destinations/impl/filesystem/filesystem.py @@ -12,7 +12,7 @@ from dlt.common.typing import DictStrAny from dlt.common.schema import Schema, TSchemaTables, TTableSchema from dlt.common.storages import FileStorage, fsspec_from_config -from dlt.common.storages.load_package import LoadJobInfo, ParsedLoadJobFileName +from dlt.common.storages.load_package import LoadJobInfo, ParsedLoadJobFileName, TPipelineStateDoc from dlt.common.destination import DestinationCapabilitiesContext from dlt.common.destination.reference import ( NewLoadJob, @@ -450,8 +450,11 @@ def get_stored_state(self, pipeline_name: str) -> Optional[StateInfo]: # Load compressed state from destination if selected_path: - state_json = json.loads(self.fs_client.read_text(selected_path)) - state_json.pop("version_hash") + state_json: TPipelineStateDoc = json.loads(self.fs_client.read_text(selected_path)) + # we had dlt_load_id stored until version 0.5 and since we do not have any version control + # we always migrate + if load_id := state_json.pop("dlt_load_id", None): # type: ignore[typeddict-item] + state_json["_dlt_load_id"] = load_id return StateInfo(**state_json) return None diff --git a/dlt/destinations/impl/qdrant/qdrant_client.py b/dlt/destinations/impl/qdrant/qdrant_client.py index 554602d594..51915c5536 100644 --- a/dlt/destinations/impl/qdrant/qdrant_client.py +++ b/dlt/destinations/impl/qdrant/qdrant_client.py @@ -379,7 +379,6 @@ def get_stored_state(self, pipeline_name: str) -> Optional[StateInfo]: ), ) if load_records.count > 0: - state["dlt_load_id"] = state.pop(p_dlt_load_id) return StateInfo(**state) except Exception: return None diff --git a/dlt/destinations/impl/weaviate/weaviate_client.py b/dlt/destinations/impl/weaviate/weaviate_client.py index 8f4bdf4db7..71f2f13e76 100644 --- a/dlt/destinations/impl/weaviate/weaviate_client.py +++ b/dlt/destinations/impl/weaviate/weaviate_client.py @@ -546,7 +546,6 @@ def get_stored_state(self, pipeline_name: str) -> Optional[StateInfo]: ) # if there is a load for this state which was successful, return the state if len(load_records): - state["dlt_load_id"] = state.pop(p_dlt_load_id) return StateInfo(**state) def get_stored_schema(self) -> Optional[StorageSchemaInfo]: diff --git a/dlt/destinations/job_client_impl.py b/dlt/destinations/job_client_impl.py index 14dfd8b894..c3e9e08406 100644 --- a/dlt/destinations/job_client_impl.py +++ b/dlt/destinations/job_client_impl.py @@ -409,7 +409,15 @@ def get_stored_state(self, pipeline_name: str) -> StateInfo: row = cur.fetchone() if not row: return None - return StateInfo(row[0], row[1], row[2], row[3], pendulum.instance(row[4]), row[5]) + # NOTE: we request order of columns in SELECT statement which corresponds to StateInfo + return StateInfo( + version=row[0], + engine_version=row[1], + pipeline_name=row[2], + state=row[3], + created_at=pendulum.instance(row[4]), + _dlt_load_id=row[5], + ) def _norm_and_escape_columns(self, *columns: str) -> Iterator[str]: return map( diff --git a/dlt/extract/decorators.py b/dlt/extract/decorators.py index 4a2f2a2807..ad10ef3ad3 100644 --- a/dlt/extract/decorators.py +++ b/dlt/extract/decorators.py @@ -42,16 +42,13 @@ TSchemaContract, TTableFormat, ) -from dlt.extract.hints import make_hints -from dlt.extract.utils import ( - simulate_func_call, - wrap_compat_transformer, - wrap_resource_gen, -) from dlt.common.storages.exceptions import SchemaNotFoundError from dlt.common.storages.schema_storage import SchemaStorage from dlt.common.typing import AnyFun, ParamSpec, Concatenate, TDataItem, TDataItems from dlt.common.utils import get_callable_name, get_module_name, is_inner_callable + +from dlt.extract.hints import make_hints +from dlt.extract.utils import simulate_func_call from dlt.extract.exceptions import ( CurrentSourceNotAvailable, DynamicNameNotStandaloneResource, @@ -65,8 +62,6 @@ SourceNotAFunction, CurrentSourceSchemaNotAvailable, ) -from dlt.extract.incremental import IncrementalResourceWrapper - from dlt.extract.items import TTableHintTemplate from dlt.extract.source import DltSource from dlt.extract.resource import DltResource, TUnboundDltResource, TDltResourceImpl diff --git a/dlt/pipeline/pipeline.py b/dlt/pipeline/pipeline.py index 72db857aa8..333038f92f 100644 --- a/dlt/pipeline/pipeline.py +++ b/dlt/pipeline/pipeline.py @@ -1622,7 +1622,7 @@ def _bump_version_and_extract_state( extract: Extract = None, load_package_state_update: Optional[Dict[str, Any]] = None, schema: Optional[Schema] = None, - ) -> TPipelineStateDoc: + ) -> None: """Merges existing state into `state` and extracts state using `storage` if extract_state is True. Storage will be created on demand. In that case the extracted package will be immediately committed. @@ -1657,8 +1657,6 @@ def _bump_version_and_extract_state( # commit only if we created storage if not extract: extract_.commit_packages() - return doc - return None def _list_schemas_sorted(self) -> List[str]: """Lists schema names sorted to have deterministic state""" diff --git a/dlt/pipeline/state_sync.py b/dlt/pipeline/state_sync.py index 26ff6ddb54..11648328f2 100644 --- a/dlt/pipeline/state_sync.py +++ b/dlt/pipeline/state_sync.py @@ -85,17 +85,16 @@ def state_doc(state: TPipelineState, load_id: str = None) -> TPipelineStateDoc: state = copy(state) state.pop("_local") state_str = compress_state(state) - doc: TPipelineStateDoc = { - "version": state["_state_version"], - "engine_version": state["_state_engine_version"], - "pipeline_name": state["pipeline_name"], - "state": state_str, - "created_at": pendulum.now(), - "version_hash": state["_version_hash"], - } - if load_id: - doc["dlt_load_id"] = load_id - return doc + info = StateInfo( + version=state["_state_version"], + engine_version=state["_state_engine_version"], + pipeline_name=state["pipeline_name"], + state=state_str, + created_at=pendulum.now(), + version_hash=state["_version_hash"], + _dlt_load_id=load_id, + ) + return info.as_doc() def state_resource(state: TPipelineState, load_id: str) -> Tuple[DltResource, TPipelineStateDoc]: diff --git a/tests/pipeline/test_dlt_versions.py b/tests/pipeline/test_dlt_versions.py index e4eb9e23a4..be086f1694 100644 --- a/tests/pipeline/test_dlt_versions.py +++ b/tests/pipeline/test_dlt_versions.py @@ -1,5 +1,4 @@ from subprocess import CalledProcessError -import sys import pytest import tempfile import shutil @@ -20,14 +19,15 @@ TStoredSchema, ) from dlt.common.configuration.resolve import resolve_configuration -from dlt.destinations import duckdb +from dlt.destinations import duckdb, filesystem from dlt.destinations.impl.duckdb.configuration import DuckDbClientConfiguration from dlt.destinations.impl.duckdb.sql_client import DuckDbSqlClient +from tests.pipeline.utils import load_table_counts from tests.utils import TEST_STORAGE_ROOT, test_storage -if sys.version_info >= (3, 12): - pytest.skip("Does not run on Python 3.12 and later", allow_module_level=True) +# if sys.version_info >= (3, 12): +# pytest.skip("Does not run on Python 3.12 and later", allow_module_level=True) GITHUB_PIPELINE_NAME = "dlt_github_pipeline" @@ -52,7 +52,9 @@ def test_pipeline_with_dlt_update(test_storage: FileStorage) -> None: # load 20 issues print( venv.run_script( - "../tests/pipeline/cases/github_pipeline/github_pipeline.py", "20" + "../tests/pipeline/cases/github_pipeline/github_pipeline.py", + "duckdb", + "20", ) ) # load schema and check _dlt_loads definition @@ -107,7 +109,7 @@ def test_pipeline_with_dlt_update(test_storage: FileStorage) -> None: try: print( venv.run_script( - "../tests/pipeline/cases/github_pipeline/github_pipeline.py" + "../tests/pipeline/cases/github_pipeline/github_pipeline.py", "duckdb" ) ) except CalledProcessError as cpe: @@ -160,23 +162,82 @@ def test_pipeline_with_dlt_update(test_storage: FileStorage) -> None: assert rows[1][7] == state_dict["_version_hash"] # attach to existing pipeline - pipeline = dlt.attach(GITHUB_PIPELINE_NAME, credentials=duckdb_cfg.credentials) - created_at_value = pipeline.state["sources"]["github"]["resources"]["load_issues"][ - "incremental" - ]["created_at"]["last_value"] - assert isinstance(created_at_value, pendulum.DateTime) - assert created_at_value == pendulum.parse("2023-02-17T09:52:12Z") - pipeline = pipeline.drop() - # print(pipeline.working_dir) - assert pipeline.dataset_name == GITHUB_DATASET - assert pipeline.default_schema_name is None - # sync from destination - pipeline.sync_destination() - # print(pipeline.working_dir) - # we have updated schema - assert pipeline.default_schema.ENGINE_VERSION == 9 - # make sure that schema hash retrieved from the destination is exactly the same as the schema hash that was in storage before the schema was wiped - assert pipeline.default_schema.stored_version_hash == github_schema["version_hash"] + pipeline = dlt.attach( + GITHUB_PIPELINE_NAME, destination=duckdb(credentials=duckdb_cfg.credentials) + ) + assert_github_pipeline_end_state(pipeline, github_schema, 2) + + +def test_filesystem_pipeline_with_dlt_update(test_storage: FileStorage) -> None: + shutil.copytree("tests/pipeline/cases/github_pipeline", TEST_STORAGE_ROOT, dirs_exist_ok=True) + + # execute in test storage + with set_working_dir(TEST_STORAGE_ROOT): + # store dlt data in test storage (like patch_home_dir) + with custom_environ({"DLT_DATA_DIR": get_dlt_data_dir()}): + # create virtual env with (0.4.9) where filesystem started to store state + with Venv.create(tempfile.mkdtemp(), ["dlt==0.4.9"]) as venv: + try: + print(venv.run_script("github_pipeline.py", "filesystem", "20")) + except CalledProcessError as cpe: + print(f"script stdout: {cpe.stdout}") + print(f"script stderr: {cpe.stderr}") + raise + # load all issues + venv = Venv.restore_current() + try: + print(venv.run_script("github_pipeline.py", "filesystem")) + except CalledProcessError as cpe: + print(f"script stdout: {cpe.stdout}") + print(f"script stderr: {cpe.stderr}") + raise + # hash hash in schema + github_schema = json.loads( + test_storage.load( + f".dlt/pipelines/{GITHUB_PIPELINE_NAME}/schemas/github.schema.json" + ) + ) + # attach to existing pipeline + pipeline = dlt.attach(GITHUB_PIPELINE_NAME, destination=filesystem("_storage/data")) + # assert end state + assert_github_pipeline_end_state(pipeline, github_schema, 2) + # load new state + fs_client = pipeline._fs_client() + state_files = sorted(fs_client.list_table_files("_dlt_pipeline_state")) + # first file is in old format + state_1 = json.loads(fs_client.read_text(state_files[0])) + assert "dlt_load_id" in state_1 + # seconds is new + state_2 = json.loads(fs_client.read_text(state_files[1])) + assert "_dlt_load_id" in state_2 + + +def assert_github_pipeline_end_state( + pipeline: dlt.Pipeline, orig_schema: TStoredSchema, schema_updates: int +) -> None: + # get tables counts + table_counts = load_table_counts(pipeline, *pipeline.default_schema.data_table_names()) + assert table_counts == {"issues": 100, "issues__assignees": 31, "issues__labels": 34} + dlt_counts = load_table_counts(pipeline, *pipeline.default_schema.dlt_table_names()) + assert dlt_counts == {"_dlt_version": schema_updates, "_dlt_loads": 2, "_dlt_pipeline_state": 2} + + # check state + created_at_value = pipeline.state["sources"]["github"]["resources"]["load_issues"][ + "incremental" + ]["created_at"]["last_value"] + assert isinstance(created_at_value, pendulum.DateTime) + assert created_at_value == pendulum.parse("2023-02-17T09:52:12Z") + pipeline = pipeline.drop() + # print(pipeline.working_dir) + assert pipeline.dataset_name == GITHUB_DATASET + assert pipeline.default_schema_name is None + # sync from destination + pipeline.sync_destination() + # print(pipeline.working_dir) + # we have updated schema + assert pipeline.default_schema.ENGINE_VERSION == 9 + # make sure that schema hash retrieved from the destination is exactly the same as the schema hash that was in storage before the schema was wiped + assert pipeline.default_schema.stored_version_hash == orig_schema["version_hash"] def test_load_package_with_dlt_update(test_storage: FileStorage) -> None: @@ -201,7 +262,7 @@ def test_load_package_with_dlt_update(test_storage: FileStorage) -> None: ) print( venv.run_script( - "../tests/pipeline/cases/github_pipeline/github_normalize.py", + "../tests/pipeline/cases/github_pipeline/github_normalize.py" ) ) # switch to current version and make sure the load package loads and schema migrates diff --git a/tests/pipeline/test_pipeline_state.py b/tests/pipeline/test_pipeline_state.py index 8cbc1ca516..11c45d72cc 100644 --- a/tests/pipeline/test_pipeline_state.py +++ b/tests/pipeline/test_pipeline_state.py @@ -1,20 +1,25 @@ import os import shutil +from typing_extensions import get_type_hints import pytest import dlt - +from dlt.common.pendulum import pendulum from dlt.common.exceptions import ( PipelineStateNotAvailable, ResourceNameNotAvailable, ) from dlt.common.schema import Schema +from dlt.common.schema.utils import pipeline_state_table from dlt.common.source import get_current_pipe_name from dlt.common.storages import FileStorage from dlt.common import pipeline as state_module +from dlt.common.storages.load_package import TPipelineStateDoc from dlt.common.utils import uniq_id -from dlt.common.destination.reference import Destination +from dlt.common.destination.reference import Destination, StateInfo +from dlt.common.validation import validate_dict +from dlt.destinations.utils import get_pipeline_state_query_columns from dlt.pipeline.exceptions import PipelineStateEngineNoUpgradePathException, PipelineStepFailed from dlt.pipeline.pipeline import Pipeline from dlt.pipeline.state_sync import ( @@ -41,6 +46,56 @@ def some_data_resource_state(): dlt.current.resource_state()["last_value"] = last_value + 1 +def test_state_repr() -> None: + """Verify that all possible state representations match""" + table = pipeline_state_table() + state_doc_hints = get_type_hints(TPipelineStateDoc) + sync_class_hints = get_type_hints(StateInfo) + info = StateInfo(1, 4, "pipeline", "compressed", pendulum.now(), "hash", "_load_id") + state_doc = info.as_doc() + # just in case hardcode column order + reference_cols = [ + "version", + "engine_version", + "pipeline_name", + "state", + "created_at", + "version_hash", + "_dlt_load_id", + ] + # doc and table must be in the same order with the same name + assert ( + len(table["columns"]) + == len(state_doc_hints) + == len(sync_class_hints) + == len(state_doc) + == len(reference_cols) + ) + for col, hint, class_hint, val, ref_col in zip( + table["columns"].values(), state_doc_hints, sync_class_hints, state_doc, reference_cols + ): + assert col["name"] == hint == class_hint == val == ref_col + + # validate info + validate_dict(TPipelineStateDoc, state_doc, "$") + + info = StateInfo(1, 4, "pipeline", "compressed", pendulum.now()) + state_doc = info.as_doc() + assert "_dlt_load_id" not in state_doc + assert "version_hash" not in state_doc + + # we drop hash in query + compat_table = get_pipeline_state_query_columns() + assert list(compat_table["columns"].keys()) == [ + "version", + "engine_version", + "pipeline_name", + "state", + "created_at", + "_dlt_load_id", + ] + + def test_restore_state_props() -> None: p = dlt.pipeline( pipeline_name="restore_state_props", From 60e45b1bc229b211afb190612b748008384d1bf5 Mon Sep 17 00:00:00 2001 From: Marcin Rudolf Date: Mon, 24 Jun 2024 23:11:06 +0200 Subject: [PATCH 089/105] tries to decompress text file first in fs_client --- dlt/destinations/fs_client.py | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/dlt/destinations/fs_client.py b/dlt/destinations/fs_client.py index 5153659614..22b055b5dc 100644 --- a/dlt/destinations/fs_client.py +++ b/dlt/destinations/fs_client.py @@ -1,3 +1,4 @@ +import gzip from typing import Iterable, cast, Any, List from abc import ABC, abstractmethod from fsspec import AbstractFileSystem @@ -41,7 +42,16 @@ def read_text( encoding: Any = None, errors: Any = None, newline: Any = None, + compression: str = None, **kwargs: Any ) -> str: - """reads given file into string""" - return cast(str, self.fs_client.read_text(path, encoding, errors, newline, **kwargs)) + """reads given file into string, tries gzip and pure text""" + if compression is None: + try: + return self.read_text(path, encoding, errors, newline, "gzip", **kwargs) + except (gzip.BadGzipFile, OSError): + pass + with self.fs_client.open( + path, mode="rt", compression=compression, encoding=encoding, newline=newline + ) as f: + return cast(str, f.read()) From a84be2a80d29722539636b64c0aaef5984675270 Mon Sep 17 00:00:00 2001 From: Marcin Rudolf Date: Mon, 24 Jun 2024 23:12:40 +0200 Subject: [PATCH 090/105] tests get stored state in test_job_client --- tests/load/test_job_client.py | 57 ++++++++++++++++++++++++++++++++--- 1 file changed, 53 insertions(+), 4 deletions(-) diff --git a/tests/load/test_job_client.py b/tests/load/test_job_client.py index 9360ef274f..35b988d46e 100644 --- a/tests/load/test_job_client.py +++ b/tests/load/test_job_client.py @@ -15,7 +15,7 @@ TWriteDisposition, TTableSchema, ) -from dlt.common.schema.utils import new_table, new_column +from dlt.common.schema.utils import new_table, new_column, pipeline_state_table from dlt.common.storages import FileStorage from dlt.common.schema import TTableSchemaColumns from dlt.common.utils import uniq_id @@ -26,10 +26,10 @@ ) from dlt.destinations.job_client_impl import SqlJobClientBase -from dlt.common.destination.reference import WithStagingDataset +from dlt.common.destination.reference import StateInfo, WithStagingDataset from tests.cases import table_update_and_row, assert_all_data_types_row -from tests.utils import TEST_STORAGE_ROOT, autouse_test_storage, preserve_environ +from tests.utils import TEST_STORAGE_ROOT, autouse_test_storage from tests.common.utils import load_json_case from tests.load.utils import ( TABLE_UPDATE, @@ -46,6 +46,9 @@ DestinationTestConfiguration, ) +# mark all tests as essential, do not remove +pytestmark = pytest.mark.essential + @pytest.fixture def file_storage() -> FileStorage: @@ -164,7 +167,6 @@ def test_get_update_basic_schema(client: SqlJobClientBase) -> None: assert this_schema == newest_schema -@pytest.mark.essential @pytest.mark.parametrize( "client", destinations_configs(default_sql_configs=True), indirect=True, ids=lambda x: x.name ) @@ -725,6 +727,53 @@ def test_default_schema_name_init_storage(destination_config: DestinationTestCon assert client.sql_client.has_dataset() +@pytest.mark.parametrize( + "destination_config", destinations_configs(default_sql_configs=True), ids=lambda x: x.name +) +@pytest.mark.parametrize( + "naming_convention", + [ + "tests.common.cases.normalizers.title_case", + "snake_case", + ], +) +def test_get_stored_state( + destination_config: DestinationTestConfiguration, + naming_convention: str, + file_storage: FileStorage, +) -> None: + os.environ["SCHEMA__NAMING"] = naming_convention + + with cm_yield_client_with_storage( + destination_config.destination, default_config_values={"default_schema_name": None} + ) as client: + # event schema with event table + if not client.capabilities.preferred_loader_file_format: + pytest.skip( + "preferred loader file format not set, destination will only work with staging" + ) + # load pipeline state + state_table = pipeline_state_table() + partial = client.schema.update_table(state_table) + print(partial) + client.schema._bump_version() + client.update_stored_schema() + + state_info = StateInfo(1, 4, "pipeline", "compressed", pendulum.now(), None, "_load_id") + doc = state_info.as_doc() + norm_doc = {client.schema.naming.normalize_identifier(k): v for k, v in doc.items()} + with io.BytesIO() as f: + # use normalized columns + write_dataset(client, f, [norm_doc], partial["columns"]) + query = f.getvalue().decode() + expect_load_file(client, file_storage, query, partial["name"]) + client.complete_load("_load_id") + + # get state + stored_state = client.get_stored_state("pipeline") + assert doc == stored_state.as_doc() + + @pytest.mark.parametrize( "destination_config", destinations_configs(default_sql_configs=True), ids=lambda x: x.name ) From 1dc7a097bce1be90b4ab066b28139f6888244add Mon Sep 17 00:00:00 2001 From: Marcin Rudolf Date: Mon, 24 Jun 2024 23:13:21 +0200 Subject: [PATCH 091/105] removes credentials from dlt.attach, addes destination and staging factories --- dlt/pipeline/__init__.py | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/dlt/pipeline/__init__.py b/dlt/pipeline/__init__.py index 20ba0b07d0..56e3682810 100644 --- a/dlt/pipeline/__init__.py +++ b/dlt/pipeline/__init__.py @@ -175,27 +175,38 @@ def attach( pipeline_salt: TSecretValue = None, full_refresh: Optional[bool] = None, dev_mode: bool = False, - credentials: Any = None, + destination: TDestinationReferenceArg = None, + staging: TDestinationReferenceArg = None, progress: TCollectorArg = _NULL_COLLECTOR, **injection_kwargs: Any, ) -> Pipeline: - """Attaches to the working folder of `pipeline_name` in `pipelines_dir` or in default directory. Requires that valid pipeline state exists in working folder.""" + """Attaches to the working folder of `pipeline_name` in `pipelines_dir` or in default directory. Requires that valid pipeline state exists in working folder. + Pre-configured `destination` and `staging` factories may be provided. If not present, default factories are created from pipeline state. + """ ensure_correct_pipeline_kwargs(attach, **injection_kwargs) full_refresh_argument_deprecated("attach", full_refresh) # if working_dir not provided use temp folder if not pipelines_dir: pipelines_dir = get_dlt_pipelines_dir() progress = collector_from_name(progress) + destination = Destination.from_reference( + destination or injection_kwargs["destination_type"], + destination_name=injection_kwargs["destination_name"], + ) + staging = Destination.from_reference( + staging or injection_kwargs.get("staging_type", None), + destination_name=injection_kwargs.get("staging_name", None), + ) # create new pipeline instance p = Pipeline( pipeline_name, pipelines_dir, pipeline_salt, + destination, + staging, None, None, None, - credentials, - None, None, full_refresh if full_refresh is not None else dev_mode, progress, From ab69b76faf58890c5cac02d633bf2e1f59903033 Mon Sep 17 00:00:00 2001 From: Marcin Rudolf Date: Mon, 24 Jun 2024 23:13:50 +0200 Subject: [PATCH 092/105] cleans up env variables and pipeline dropping fixutere precedence --- .../dlt-ecosystem/destinations/postgres.md | 2 +- .../dlt-ecosystem/destinations/snowflake.md | 2 +- docs/website/docs/general-usage/resource.md | 4 +-- tests/load/bigquery/test_bigquery_client.py | 2 +- tests/load/conftest.py | 2 +- .../test_databricks_configuration.py | 1 - tests/load/duckdb/test_duckdb_client.py | 4 +-- tests/load/duckdb/test_motherduck_client.py | 2 +- tests/load/filesystem/test_aws_credentials.py | 2 +- .../load/filesystem/test_azure_credentials.py | 2 +- .../load/filesystem/test_filesystem_common.py | 2 +- tests/load/pipeline/conftest.py | 8 +---- tests/load/pipeline/test_arrow_loading.py | 4 +-- tests/load/pipeline/test_drop.py | 2 +- .../load/pipeline/test_filesystem_pipeline.py | 4 +-- tests/load/pipeline/test_refresh_modes.py | 2 +- tests/load/pipeline/test_restore_state.py | 12 ++++--- tests/load/postgres/test_postgres_client.py | 2 +- tests/load/test_dummy_client.py | 1 - tests/load/test_sql_client.py | 8 ++--- tests/load/utils.py | 35 +++++++++++++++---- .../cases/github_pipeline/github_pipeline.py | 16 +++++++-- tests/pipeline/test_arrow_sources.py | 1 - tests/utils.py | 2 +- 24 files changed, 72 insertions(+), 50 deletions(-) diff --git a/docs/website/docs/dlt-ecosystem/destinations/postgres.md b/docs/website/docs/dlt-ecosystem/destinations/postgres.md index 4c72f040d0..49b3c06208 100644 --- a/docs/website/docs/dlt-ecosystem/destinations/postgres.md +++ b/docs/website/docs/dlt-ecosystem/destinations/postgres.md @@ -113,7 +113,7 @@ delimiter="|" include_header=false ``` or -```python +```py from dlt.destinations import postgres from dlt.common.data_writers.configuration import CsvFormatConfiguration diff --git a/docs/website/docs/dlt-ecosystem/destinations/snowflake.md b/docs/website/docs/dlt-ecosystem/destinations/snowflake.md index 5a12c6a8ba..7797298bdc 100644 --- a/docs/website/docs/dlt-ecosystem/destinations/snowflake.md +++ b/docs/website/docs/dlt-ecosystem/destinations/snowflake.md @@ -279,7 +279,7 @@ include_header=false on_error_continue=true ``` or -```python +```py from dlt.destinations import snowflake from dlt.common.data_writers.configuration import CsvFormatConfiguration diff --git a/docs/website/docs/general-usage/resource.md b/docs/website/docs/general-usage/resource.md index 1439c7b5b3..e09e7294de 100644 --- a/docs/website/docs/general-usage/resource.md +++ b/docs/website/docs/general-usage/resource.md @@ -491,7 +491,7 @@ You can emit columns as Pydantic model and use dynamic hints (ie. lambda for tab ### Import external files You can import external files ie. `csv`, `parquet` and `jsonl` by yielding items marked with `with_file_import`, optionally passing table schema corresponding the the imported file. `dlt` will not read, parse and normalize any names (ie. `csv` or `arrow` headers) and will attempt to copy the file into the destination as is. -```python +```py import os import dlt @@ -595,7 +595,7 @@ pipeline.run([generate_rows(10), generate_rows(20)]) ### Pick loader file format for a particular resource You can request a particular loader file format to be used for a resource. -```python +```py @dlt.resource(file_format="parquet") def generate_rows(nr): for i in range(nr): diff --git a/tests/load/bigquery/test_bigquery_client.py b/tests/load/bigquery/test_bigquery_client.py index b16790b07d..e8b5dab8fd 100644 --- a/tests/load/bigquery/test_bigquery_client.py +++ b/tests/load/bigquery/test_bigquery_client.py @@ -22,7 +22,7 @@ from dlt.destinations.impl.bigquery.bigquery import BigQueryClient, BigQueryClientConfiguration from dlt.destinations.exceptions import LoadJobNotExistsException, LoadJobTerminalException -from tests.utils import TEST_STORAGE_ROOT, delete_test_storage, preserve_environ +from tests.utils import TEST_STORAGE_ROOT, delete_test_storage from tests.common.utils import json_case_path as common_json_case_path from tests.common.configuration.utils import environment from tests.load.utils import ( diff --git a/tests/load/conftest.py b/tests/load/conftest.py index 0ad21a6185..a110b1198f 100644 --- a/tests/load/conftest.py +++ b/tests/load/conftest.py @@ -3,7 +3,7 @@ from typing import Iterator from tests.load.utils import ALL_BUCKETS, DEFAULT_BUCKETS, WITH_GDRIVE_BUCKETS, drop_pipeline -from tests.utils import preserve_environ +from tests.utils import preserve_environ, patch_home_dir @pytest.fixture(scope="function", params=DEFAULT_BUCKETS) diff --git a/tests/load/databricks/test_databricks_configuration.py b/tests/load/databricks/test_databricks_configuration.py index cc353f5894..f6a06180c9 100644 --- a/tests/load/databricks/test_databricks_configuration.py +++ b/tests/load/databricks/test_databricks_configuration.py @@ -6,7 +6,6 @@ from dlt.destinations.impl.databricks.configuration import DatabricksClientConfiguration from dlt.common.configuration import resolve_configuration -from tests.utils import preserve_environ # mark all tests as essential, do not remove pytestmark = pytest.mark.essential diff --git a/tests/load/duckdb/test_duckdb_client.py b/tests/load/duckdb/test_duckdb_client.py index 4e22d62ee6..ebbe959874 100644 --- a/tests/load/duckdb/test_duckdb_client.py +++ b/tests/load/duckdb/test_duckdb_client.py @@ -16,7 +16,7 @@ from dlt.destinations.impl.duckdb.exceptions import InvalidInMemoryDuckdbCredentials from dlt.pipeline.exceptions import PipelineStepFailed from tests.pipeline.utils import assert_table -from tests.utils import patch_home_dir, autouse_test_storage, preserve_environ, TEST_STORAGE_ROOT +from tests.utils import patch_home_dir, autouse_test_storage, TEST_STORAGE_ROOT # mark all tests as essential, do not remove pytestmark = pytest.mark.essential @@ -56,7 +56,7 @@ def test_duckdb_open_conn_default() -> None: delete_quack_db() -def test_duckdb_in_memory_mode_via_factory(preserve_environ): +def test_duckdb_in_memory_mode_via_factory(): delete_quack_db() try: import duckdb diff --git a/tests/load/duckdb/test_motherduck_client.py b/tests/load/duckdb/test_motherduck_client.py index 2a1d703c87..764e1654c6 100644 --- a/tests/load/duckdb/test_motherduck_client.py +++ b/tests/load/duckdb/test_motherduck_client.py @@ -14,7 +14,7 @@ MotherDuckClientConfiguration, ) -from tests.utils import patch_home_dir, preserve_environ, skip_if_not_active +from tests.utils import patch_home_dir, skip_if_not_active # mark all tests as essential, do not remove pytestmark = pytest.mark.essential diff --git a/tests/load/filesystem/test_aws_credentials.py b/tests/load/filesystem/test_aws_credentials.py index 28460f1ca3..5e0a3c3fd0 100644 --- a/tests/load/filesystem/test_aws_credentials.py +++ b/tests/load/filesystem/test_aws_credentials.py @@ -9,7 +9,7 @@ from tests.common.configuration.utils import environment from tests.load.utils import ALL_FILESYSTEM_DRIVERS -from tests.utils import preserve_environ, autouse_test_storage +from tests.utils import autouse_test_storage # mark all tests as essential, do not remove pytestmark = pytest.mark.essential diff --git a/tests/load/filesystem/test_azure_credentials.py b/tests/load/filesystem/test_azure_credentials.py index 4ee2ec46db..2353491737 100644 --- a/tests/load/filesystem/test_azure_credentials.py +++ b/tests/load/filesystem/test_azure_credentials.py @@ -17,7 +17,7 @@ from dlt.common.storages.configuration import FilesystemConfiguration from tests.load.utils import ALL_FILESYSTEM_DRIVERS, AZ_BUCKET from tests.common.configuration.utils import environment -from tests.utils import preserve_environ, autouse_test_storage +from tests.utils import autouse_test_storage from dlt.common.storages.fsspec_filesystem import fsspec_from_config # mark all tests as essential, do not remove diff --git a/tests/load/filesystem/test_filesystem_common.py b/tests/load/filesystem/test_filesystem_common.py index 270e1ff70c..bc609fe9b9 100644 --- a/tests/load/filesystem/test_filesystem_common.py +++ b/tests/load/filesystem/test_filesystem_common.py @@ -22,7 +22,7 @@ ) from tests.common.storages.utils import assert_sample_files from tests.load.utils import ALL_FILESYSTEM_DRIVERS, AWS_BUCKET -from tests.utils import preserve_environ, autouse_test_storage +from tests.utils import autouse_test_storage from .utils import self_signed_cert from tests.common.configuration.utils import environment diff --git a/tests/load/pipeline/conftest.py b/tests/load/pipeline/conftest.py index dd57af09f1..a2ba65494b 100644 --- a/tests/load/pipeline/conftest.py +++ b/tests/load/pipeline/conftest.py @@ -1,8 +1,2 @@ -from tests.utils import ( - patch_home_dir, - preserve_environ, - autouse_test_storage, - duckdb_pipeline_location, -) +from tests.utils import autouse_test_storage, duckdb_pipeline_location from tests.pipeline.utils import drop_dataset_from_env -from tests.load.utils import drop_pipeline diff --git a/tests/load/pipeline/test_arrow_loading.py b/tests/load/pipeline/test_arrow_loading.py index 0bddfaabee..630d84a28c 100644 --- a/tests/load/pipeline/test_arrow_loading.py +++ b/tests/load/pipeline/test_arrow_loading.py @@ -9,14 +9,14 @@ import dlt from dlt.common import pendulum -from dlt.common.time import reduce_pendulum_datetime_precision, ensure_pendulum_datetime +from dlt.common.time import reduce_pendulum_datetime_precision from dlt.common.utils import uniq_id + from tests.load.utils import destinations_configs, DestinationTestConfiguration from tests.pipeline.utils import assert_load_info, select_data from tests.utils import ( TestDataItemFormat, arrow_item_from_pandas, - preserve_environ, TPythonTableFormat, ) from tests.cases import arrow_table_all_data_types diff --git a/tests/load/pipeline/test_drop.py b/tests/load/pipeline/test_drop.py index ea279fd11d..2f14b6a4b3 100644 --- a/tests/load/pipeline/test_drop.py +++ b/tests/load/pipeline/test_drop.py @@ -21,7 +21,7 @@ def _attach(pipeline: Pipeline) -> Pipeline: - return dlt.attach(pipeline.pipeline_name, pipeline.pipelines_dir) + return dlt.attach(pipeline.pipeline_name, pipeline.pipelines_dir, dev_mode=pipeline.dev_mode) @dlt.source(section="droppable", name="droppable") diff --git a/tests/load/pipeline/test_filesystem_pipeline.py b/tests/load/pipeline/test_filesystem_pipeline.py index 19132d2a8e..7067a33764 100644 --- a/tests/load/pipeline/test_filesystem_pipeline.py +++ b/tests/load/pipeline/test_filesystem_pipeline.py @@ -653,8 +653,8 @@ def some_data(): # test accessors for state s1 = c1.get_stored_state("p1") s2 = c1.get_stored_state("p2") - assert s1.dlt_load_id == load_id_1_2 # second load - assert s2.dlt_load_id == load_id_2_1 # first load + assert s1._dlt_load_id == load_id_1_2 # second load + assert s2._dlt_load_id == load_id_2_1 # first load assert s1_old.version != s1.version assert s2_old.version == s2.version diff --git a/tests/load/pipeline/test_refresh_modes.py b/tests/load/pipeline/test_refresh_modes.py index 02ed560068..de557ba118 100644 --- a/tests/load/pipeline/test_refresh_modes.py +++ b/tests/load/pipeline/test_refresh_modes.py @@ -8,7 +8,7 @@ from dlt.common.typing import DictStrAny from dlt.common.pipeline import pipeline_state as current_pipeline_state -from tests.utils import clean_test_storage, preserve_environ +from tests.utils import clean_test_storage from tests.pipeline.utils import ( assert_load_info, load_tables_to_dicts, diff --git a/tests/load/pipeline/test_restore_state.py b/tests/load/pipeline/test_restore_state.py index b8770f6deb..37f999ff86 100644 --- a/tests/load/pipeline/test_restore_state.py +++ b/tests/load/pipeline/test_restore_state.py @@ -398,7 +398,7 @@ def some_data(): p = destination_config.setup_pipeline(pipeline_name=pipeline_name, dataset_name=dataset_name) # now attach locally os.environ["RESTORE_FROM_DESTINATION"] = "True" - p = dlt.attach(pipeline_name=pipeline_name) + p = destination_config.attach_pipeline(pipeline_name=pipeline_name) assert p.dataset_name == dataset_name assert p.default_schema_name is None # restore @@ -502,7 +502,7 @@ def test_restore_schemas_while_import_schemas_exist( assert normalized_labels in schema.tables # re-attach the pipeline - p = dlt.attach(pipeline_name=pipeline_name) + p = destination_config.attach_pipeline(pipeline_name=pipeline_name) p.run( ["C", "D", "E"], table_name="annotations", loader_file_format=destination_config.file_format ) @@ -639,7 +639,9 @@ def some_data(param: str) -> Any: prod_state = production_p.state assert p.state["_state_version"] == prod_state["_state_version"] - 1 # re-attach production and sync - ra_production_p = dlt.attach(pipeline_name=pipeline_name, pipelines_dir=TEST_STORAGE_ROOT) + ra_production_p = destination_config.attach_pipeline( + pipeline_name=pipeline_name, pipelines_dir=TEST_STORAGE_ROOT + ) ra_production_p.sync_destination() # state didn't change because production is ahead of local with its version # nevertheless this is potentially dangerous situation 🤷 @@ -712,7 +714,7 @@ def some_data(param: str) -> Any: assert p.dataset_name == dataset_name print("---> no state sync last attach") - p = dlt.attach(pipeline_name=pipeline_name) + p = destination_config.attach_pipeline(pipeline_name=pipeline_name) # this will prevent from creating of _dlt_pipeline_state p.config.restore_from_destination = False data4 = some_data("state4") @@ -729,7 +731,7 @@ def some_data(param: str) -> Any: assert p.state["_local"]["first_run"] is False # attach again to make the `run` method check the destination print("---> last attach") - p = dlt.attach(pipeline_name=pipeline_name) + p = destination_config.attach_pipeline(pipeline_name=pipeline_name) p.config.restore_from_destination = True data5 = some_data("state4") data5.apply_hints(table_name="state1_data5") diff --git a/tests/load/postgres/test_postgres_client.py b/tests/load/postgres/test_postgres_client.py index a0fbd85b5b..d8cd996dcf 100644 --- a/tests/load/postgres/test_postgres_client.py +++ b/tests/load/postgres/test_postgres_client.py @@ -11,7 +11,7 @@ from dlt.destinations.impl.postgres.postgres import PostgresClient from dlt.destinations.impl.postgres.sql_client import psycopg2 -from tests.utils import TEST_STORAGE_ROOT, delete_test_storage, skipifpypy, preserve_environ +from tests.utils import TEST_STORAGE_ROOT, delete_test_storage, skipifpypy from tests.load.utils import expect_load_file, prepare_table, yield_client_with_storage from tests.common.configuration.utils import environment diff --git a/tests/load/test_dummy_client.py b/tests/load/test_dummy_client.py index beb2c88688..be917672f1 100644 --- a/tests/load/test_dummy_client.py +++ b/tests/load/test_dummy_client.py @@ -31,7 +31,6 @@ clean_test_storage, init_test_logging, TEST_DICT_CONFIG_PROVIDER, - preserve_environ, ) from tests.load.utils import prepare_load_package from tests.utils import skip_if_not_active, TEST_STORAGE_ROOT diff --git a/tests/load/test_sql_client.py b/tests/load/test_sql_client.py index d9049509da..fa31f1db65 100644 --- a/tests/load/test_sql_client.py +++ b/tests/load/test_sql_client.py @@ -29,6 +29,9 @@ destinations_configs, ) +# mark all tests as essential, do not remove +pytestmark = pytest.mark.essential + @pytest.fixture def file_storage() -> FileStorage: @@ -145,7 +148,6 @@ def test_malformed_execute_parameters(client: SqlJobClientBase) -> None: assert client.sql_client.is_dbapi_exception(term_ex.value.dbapi_exception) -@pytest.mark.essential @pytest.mark.parametrize( "client", destinations_configs(default_sql_configs=True), indirect=True, ids=lambda x: x.name ) @@ -193,7 +195,6 @@ def test_execute_sql(client: SqlJobClientBase) -> None: assert len(rows) == 0 -@pytest.mark.essential @pytest.mark.parametrize( "client", destinations_configs(default_sql_configs=True), indirect=True, ids=lambda x: x.name ) @@ -216,7 +217,6 @@ def test_execute_ddl(client: SqlJobClientBase) -> None: assert rows[0][0] == Decimal("1.0") -@pytest.mark.essential @pytest.mark.parametrize( "client", destinations_configs(default_sql_configs=True), indirect=True, ids=lambda x: x.name ) @@ -259,7 +259,6 @@ def test_execute_query(client: SqlJobClientBase) -> None: assert len(rows) == 0 -@pytest.mark.essential @pytest.mark.parametrize( "client", destinations_configs(default_sql_configs=True), indirect=True, ids=lambda x: x.name ) @@ -311,7 +310,6 @@ def test_execute_df(client: SqlJobClientBase) -> None: assert df_3 is None -@pytest.mark.essential @pytest.mark.parametrize( "client", destinations_configs(default_sql_configs=True), indirect=True, ids=lambda x: x.name ) diff --git a/tests/load/utils.py b/tests/load/utils.py index a83066b76b..7420abbc9b 100644 --- a/tests/load/utils.py +++ b/tests/load/utils.py @@ -128,6 +128,7 @@ class DestinationTestConfiguration: force_iceberg: bool = False supports_dbt: bool = True disable_compression: bool = False + dev_mode: bool = False @property def name(self) -> str: @@ -142,15 +143,26 @@ def name(self) -> str: name += f"-{self.extra_info}" return name + @property + def factory_kwargs(self) -> Dict[str, Any]: + return { + k: getattr(self, k) + for k in [ + "bucket_url", + "stage_name", + "staging_iam_role", + "staging_use_msi", + "stage_name", + ] + if getattr(self, k, None) is not None + } + def setup(self) -> None: """Sets up environment variables for this destination configuration""" - os.environ["DESTINATION__FILESYSTEM__BUCKET_URL"] = self.bucket_url or "" - os.environ["DESTINATION__STAGE_NAME"] = self.stage_name or "" - os.environ["DESTINATION__STAGING_IAM_ROLE"] = self.staging_iam_role or "" - os.environ["DESTINATION__STAGING_USE_MSI"] = str(self.staging_use_msi) or "" - os.environ["DESTINATION__FORCE_ICEBERG"] = str(self.force_iceberg) or "" + for k, v in self.factory_kwargs.items(): + os.environ[f"DESTINATION__{k.upper()}"] = str(v) - """For the filesystem destinations we disable compression to make analyzing the result easier""" + # For the filesystem destinations we disable compression to make analyzing the result easier if self.destination == "filesystem" or self.disable_compression: os.environ["DATA_WRITER__DISABLE_COMPRESSION"] = "True" @@ -158,6 +170,7 @@ def setup_pipeline( self, pipeline_name: str, dataset_name: str = None, dev_mode: bool = False, **kwargs ) -> dlt.Pipeline: """Convenience method to setup pipeline with this configuration""" + self.dev_mode = dev_mode self.setup() pipeline = dlt.pipeline( pipeline_name=pipeline_name, @@ -169,6 +182,13 @@ def setup_pipeline( ) return pipeline + def attach_pipeline(self, pipeline_name: str, dev_mode: bool = None, **kwargs) -> dlt.Pipeline: + """Attach to existing pipeline keeping the dev_mode""" + # remember dev_mode from setup_pipeline + dev_mode = dev_mode if dev_mode is not None else self.dev_mode + pipeline = dlt.attach(pipeline_name, dev_mode=dev_mode, **kwargs) + return pipeline + def destinations_configs( default_sql_configs: bool = False, @@ -492,7 +512,8 @@ def destinations_configs( @pytest.fixture(autouse=True) -def drop_pipeline(request) -> Iterator[None]: +def drop_pipeline(request, preserve_environ) -> Iterator[None]: + # NOTE: keep `preserve_environ` to make sure fixtures are executed in order`` yield if "no_load" in request.keywords: return diff --git a/tests/pipeline/cases/github_pipeline/github_pipeline.py b/tests/pipeline/cases/github_pipeline/github_pipeline.py index aa0f6d0e0e..f4cdc2bcf2 100644 --- a/tests/pipeline/cases/github_pipeline/github_pipeline.py +++ b/tests/pipeline/cases/github_pipeline/github_pipeline.py @@ -33,11 +33,21 @@ def load_issues( if __name__ == "__main__": - p = dlt.pipeline("dlt_github_pipeline", destination="duckdb", dataset_name="github_3") + # pick the destination name + if len(sys.argv) < 1: + raise RuntimeError(f"Please provide destination name in args ({sys.argv})") + dest_ = sys.argv[1] + if dest_ == "filesystem": + import os + from dlt.destinations import filesystem + + dest_ = filesystem(os.path.abspath(os.path.join("_storage", "data"))) # type: ignore + + p = dlt.pipeline("dlt_github_pipeline", destination=dest_, dataset_name="github_3") github_source = github() - if len(sys.argv) > 1: + if len(sys.argv) > 2: # load only N issues - limit = int(sys.argv[1]) + limit = int(sys.argv[2]) github_source.add_limit(limit) info = p.run(github_source) print(info) diff --git a/tests/pipeline/test_arrow_sources.py b/tests/pipeline/test_arrow_sources.py index ae5c0e49a5..ff046e70a7 100644 --- a/tests/pipeline/test_arrow_sources.py +++ b/tests/pipeline/test_arrow_sources.py @@ -19,7 +19,6 @@ ) from tests.pipeline.utils import assert_only_table_columns, load_tables_to_dicts from tests.utils import ( - preserve_environ, TPythonTableFormat, arrow_item_from_pandas, arrow_item_from_table, diff --git a/tests/utils.py b/tests/utils.py index 580c040706..47b6561c8e 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -173,7 +173,7 @@ def unload_modules() -> Iterator[None]: @pytest.fixture(autouse=True) -def wipe_pipeline() -> Iterator[None]: +def wipe_pipeline(preserve_environ) -> Iterator[None]: """Wipes pipeline local state and deactivates it""" container = Container() if container[PipelineContext].is_active(): From f1097d8d0759e12df88cf950337b4019b7ffc38e Mon Sep 17 00:00:00 2001 From: Marcin Rudolf Date: Tue, 25 Jun 2024 00:43:24 +0200 Subject: [PATCH 093/105] removes dev_mode from dlt.attach --- dlt/pipeline/__init__.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/dlt/pipeline/__init__.py b/dlt/pipeline/__init__.py index 56e3682810..4efc7716e6 100644 --- a/dlt/pipeline/__init__.py +++ b/dlt/pipeline/__init__.py @@ -173,8 +173,6 @@ def attach( pipeline_name: str = None, pipelines_dir: str = None, pipeline_salt: TSecretValue = None, - full_refresh: Optional[bool] = None, - dev_mode: bool = False, destination: TDestinationReferenceArg = None, staging: TDestinationReferenceArg = None, progress: TCollectorArg = _NULL_COLLECTOR, @@ -184,7 +182,6 @@ def attach( Pre-configured `destination` and `staging` factories may be provided. If not present, default factories are created from pipeline state. """ ensure_correct_pipeline_kwargs(attach, **injection_kwargs) - full_refresh_argument_deprecated("attach", full_refresh) # if working_dir not provided use temp folder if not pipelines_dir: pipelines_dir = get_dlt_pipelines_dir() @@ -208,7 +205,7 @@ def attach( None, None, None, - full_refresh if full_refresh is not None else dev_mode, + False, # always False as dev_mode so we do not wipe the working folder progress, True, last_config(**injection_kwargs), From 3855fcce351ad60103d9988549e80f73379ffbb0 Mon Sep 17 00:00:00 2001 From: Marcin Rudolf Date: Tue, 25 Jun 2024 00:44:00 +0200 Subject: [PATCH 094/105] adds missing arguments to filesystem factory --- dlt/destinations/impl/filesystem/factory.py | 14 ++++++++++++++ dlt/destinations/impl/filesystem/typing.py | 4 +++- 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/dlt/destinations/impl/filesystem/factory.py b/dlt/destinations/impl/filesystem/factory.py index 111c7e1ad6..1e6eec5cce 100644 --- a/dlt/destinations/impl/filesystem/factory.py +++ b/dlt/destinations/impl/filesystem/factory.py @@ -1,10 +1,12 @@ import typing as t from dlt.common.destination import Destination, DestinationCapabilitiesContext, TLoaderFileFormat +from dlt.common.destination.reference import DEFAULT_FILE_LAYOUT from dlt.common.schema.typing import TTableSchema from dlt.common.storages.configuration import FileSystemCredentials from dlt.destinations.impl.filesystem.configuration import FilesystemDestinationClientConfiguration +from dlt.destinations.impl.filesystem.typing import TCurrentDateTime, TExtraPlaceholders if t.TYPE_CHECKING: from dlt.destinations.impl.filesystem.filesystem import FilesystemClient @@ -42,6 +44,9 @@ def __init__( self, bucket_url: str = None, credentials: t.Union[FileSystemCredentials, t.Dict[str, t.Any], t.Any] = None, + layout: str = DEFAULT_FILE_LAYOUT, + extra_placeholders: t.Optional[TExtraPlaceholders] = None, + current_datetime: t.Optional[TCurrentDateTime] = None, destination_name: t.Optional[str] = None, environment: t.Optional[str] = None, **kwargs: t.Any, @@ -63,11 +68,20 @@ def __init__( credentials: Credentials to connect to the filesystem. The type of credentials should correspond to the bucket protocol. For example, for AWS S3, the credentials should be an instance of `AwsCredentials`. A dictionary with the credentials parameters can also be provided. + layout (str): A layout of the files holding table data in the destination bucket/filesystem. Uses a set of pre-defined + and user-defined (extra) placeholders. Please refer to https://dlthub.com/docs/dlt-ecosystem/destinations/filesystem#files-layout + extra_placeholders (dict(str, str | callable)): A dictionary of extra placeholder names that can be used in the `layout` parameter. Names + are mapped to string values or to callables evaluated at runtime. + current_datetime (DateTime | callable): current datetime used by date/time related placeholders. If not provided, load package creation timestamp + will be used. **kwargs: Additional arguments passed to the destination config """ super().__init__( bucket_url=bucket_url, credentials=credentials, + layout=layout, + extra_placeholders=extra_placeholders, + current_datetime=current_datetime, destination_name=destination_name, environment=environment, **kwargs, diff --git a/dlt/destinations/impl/filesystem/typing.py b/dlt/destinations/impl/filesystem/typing.py index 139602198d..6781fe21ac 100644 --- a/dlt/destinations/impl/filesystem/typing.py +++ b/dlt/destinations/impl/filesystem/typing.py @@ -15,5 +15,7 @@ `schema name`, `table name`, `load_id`, `file_id` and an `extension` """ -TExtraPlaceholders: TypeAlias = Dict[str, Union[str, TLayoutPlaceholderCallback]] +TExtraPlaceholders: TypeAlias = Dict[ + str, Union[Union[str, int, DateTime], TLayoutPlaceholderCallback] +] """Extra placeholders for filesystem layout""" From 651412e96e1a1a23dc64fb7c66b984824355d5bf Mon Sep 17 00:00:00 2001 From: Marcin Rudolf Date: Tue, 25 Jun 2024 00:44:19 +0200 Subject: [PATCH 095/105] fixes tests --- .../custom_destination_lancedb.py | 4 ++- .../website/docs/general-usage/destination.md | 25 ++++++++++++------- .../load/filesystem/test_filesystem_common.py | 7 +++--- tests/load/pipeline/test_drop.py | 2 +- .../load/pipeline/test_filesystem_pipeline.py | 3 ++- tests/load/utils.py | 5 ++-- tests/pipeline/test_dlt_versions.py | 4 ++- tests/pipeline/test_pipeline.py | 3 ++- tests/pipeline/test_pipeline_extra.py | 5 +++- .../helpers/rest_client/test_client.py | 1 - 10 files changed, 37 insertions(+), 22 deletions(-) diff --git a/docs/examples/custom_destination_lancedb/custom_destination_lancedb.py b/docs/examples/custom_destination_lancedb/custom_destination_lancedb.py index 9d75d90f99..ba815d4fcd 100644 --- a/docs/examples/custom_destination_lancedb/custom_destination_lancedb.py +++ b/docs/examples/custom_destination_lancedb/custom_destination_lancedb.py @@ -38,7 +38,9 @@ from dlt.sources.helpers.rest_client import RESTClient, AuthConfigBase # access secrets to get openai key and instantiate embedding function -openai_api_key: str = dlt.secrets.get("destination.lancedb.credentials.embedding_model_provider_api_key") +openai_api_key: str = dlt.secrets.get( + "destination.lancedb.credentials.embedding_model_provider_api_key" +) func = get_registry().get("openai").create(name="text-embedding-3-small", api_key=openai_api_key) diff --git a/docs/website/docs/general-usage/destination.md b/docs/website/docs/general-usage/destination.md index 0cce49390d..842133cfa1 100644 --- a/docs/website/docs/general-usage/destination.md +++ b/docs/website/docs/general-usage/destination.md @@ -18,26 +18,27 @@ We recommend that you declare the destination type when creating a pipeline inst Above we want to use **filesystem** built-in destination. You can use shorthand types only for built-ins. -* Use full **destination class type** +* Use full **destination factory type** -Above we use built in **filesystem** destination by providing a class type `filesystem` from module `dlt.destinations`. You can pass [destinations from external modules](#declare-external-destination) as well. +Above we use built in **filesystem** destination by providing a factory type `filesystem` from module `dlt.destinations`. You can pass [destinations from external modules](#declare-external-destination) as well. -* Import **destination class** +* Import **destination factory** -Above we import destination class for **filesystem** and pass it to the pipeline. +Above we import destination factory for **filesystem** and pass it to the pipeline. -All examples above will create the same destination class with default parameters and pull required config and secret values from [configuration](credentials/configuration.md) - they are equivalent. +All examples above will create the same destination factory with default parameters and pull required config and secret values from [configuration](credentials/configuration.md) - they are equivalent. ### Pass explicit parameters and a name to a destination -You can instantiate **destination class** yourself to configure it explicitly. When doing this you work with destinations the same way you work with [sources](source.md) +You can instantiate **destination factory** yourself to configure it explicitly. When doing this you work with destinations the same way you work with [sources](source.md) -Above we import and instantiate the `filesystem` destination class. We pass explicit url of the bucket and name the destination to `production_az_bucket`. +Above we import and instantiate the `filesystem` destination factory. We pass explicit url of the bucket and name the destination to `production_az_bucket`. + +If destination is not named, its shorthand type (the Python factory name) serves as a destination name. Name your destination explicitly if you need several separate configurations of destinations of the same type (i.e. you wish to maintain credentials for development, staging and production storage buckets in the same config file). Destination name is also stored in the [load info](../running-in-production/running.md#inspect-and-save-the-load-info-and-trace) and pipeline traces so use them also when you need more descriptive names (other than, for example, `filesystem`). -If destination is not named, its shorthand type (the Python class name) serves as a destination name. Name your destination explicitly if you need several separate configurations of destinations of the same type (i.e. you wish to maintain credentials for development, staging and production storage buckets in the same config file). Destination name is also stored in the [load info](../running-in-production/running.md#inspect-and-save-the-load-info-and-trace) and pipeline traces so use them also when you need more descriptive names (other than, for example, `filesystem`). ## Configure a destination We recommend to pass the credentials and other required parameters to configuration via TOML files, environment variables or other [config providers](credentials/config_providers.md). This allows you, for example, to easily switch to production destinations after deployment. @@ -59,7 +60,7 @@ For named destinations you use their names in the config section Note that when you use [`dlt init` command](../walkthroughs/add-a-verified-source.md) to create or add a data source, `dlt` creates a sample configuration for selected destination. ### Pass explicit credentials -You can pass credentials explicitly when creating destination class instance. This replaces the `credentials` argument in `dlt.pipeline` and `pipeline.load` methods - which is now deprecated. You can pass the required credentials object, its dictionary representation or the supported native form like below: +You can pass credentials explicitly when creating destination factory instance. This replaces the `credentials` argument in `dlt.pipeline` and `pipeline.load` methods - which is now deprecated. You can pass the required credentials object, its dictionary representation or the supported native form like below: @@ -74,6 +75,12 @@ You can create and pass partial credentials and `dlt` will fill the missing data Please read how to use [various built in credentials types](credentials/config_specs.md). ::: +### Pass additional parameters and change destination capabilities +Destination factory accepts additional parameters that will be used to pre-configure it and change device capabilities. +```py +``` +Example above is overriding `naming_convention` and `recommended_file_size` in the destination capabilities and + ### Configure multiple destinations in a pipeline To configure multiple destinations within a pipeline, you need to provide the credentials for each destination in the "secrets.toml" file. This example demonstrates how to configure a BigQuery destination named `destination_one`: diff --git a/tests/load/filesystem/test_filesystem_common.py b/tests/load/filesystem/test_filesystem_common.py index bc609fe9b9..a7b1371f9f 100644 --- a/tests/load/filesystem/test_filesystem_common.py +++ b/tests/load/filesystem/test_filesystem_common.py @@ -20,6 +20,7 @@ from dlt.destinations.impl.filesystem.configuration import ( FilesystemDestinationClientConfiguration, ) +from dlt.destinations.impl.filesystem.typing import TExtraPlaceholders from tests.common.storages.utils import assert_sample_files from tests.load.utils import ALL_FILESYSTEM_DRIVERS, AWS_BUCKET from tests.utils import autouse_test_storage @@ -199,7 +200,7 @@ def test_s3_wrong_client_certificate(default_buckets_env: str, self_signed_cert: def test_filesystem_destination_config_reports_unused_placeholders(mocker) -> None: with custom_environ({"DATASET_NAME": "BOBO"}): - extra_placeholders = { + extra_placeholders: TExtraPlaceholders = { "value": 1, "otters": "lab", "dlt": "labs", @@ -211,7 +212,7 @@ def test_filesystem_destination_config_reports_unused_placeholders(mocker) -> No FilesystemDestinationClientConfiguration( bucket_url="file:///tmp/dirbobo", layout="{schema_name}/{table_name}/{otters}-x-{x}/{load_id}.{file_id}.{timestamp}.{ext}", - extra_placeholders=extra_placeholders, # type: ignore + extra_placeholders=extra_placeholders, ) ) logger_spy.assert_called_once_with("Found unused layout placeholders: value, dlt, dlthub") @@ -227,7 +228,7 @@ def test_filesystem_destination_passed_parameters_override_config_values() -> No "DESTINATION__FILESYSTEM__EXTRA_PLACEHOLDERS": json.dumps(config_extra_placeholders), } ): - extra_placeholders = { + extra_placeholders: TExtraPlaceholders = { "new_value": 1, "dlt": "labs", "dlthub": "platform", diff --git a/tests/load/pipeline/test_drop.py b/tests/load/pipeline/test_drop.py index 2f14b6a4b3..7d5f175bb5 100644 --- a/tests/load/pipeline/test_drop.py +++ b/tests/load/pipeline/test_drop.py @@ -21,7 +21,7 @@ def _attach(pipeline: Pipeline) -> Pipeline: - return dlt.attach(pipeline.pipeline_name, pipeline.pipelines_dir, dev_mode=pipeline.dev_mode) + return dlt.attach(pipeline.pipeline_name, pipelines_dir=pipeline.pipelines_dir) @dlt.source(section="droppable", name="droppable") diff --git a/tests/load/pipeline/test_filesystem_pipeline.py b/tests/load/pipeline/test_filesystem_pipeline.py index 7067a33764..74a7fdaf99 100644 --- a/tests/load/pipeline/test_filesystem_pipeline.py +++ b/tests/load/pipeline/test_filesystem_pipeline.py @@ -14,6 +14,7 @@ from dlt.common.utils import uniq_id from dlt.destinations import filesystem from dlt.destinations.impl.filesystem.filesystem import FilesystemClient +from dlt.destinations.impl.filesystem.typing import TExtraPlaceholders from dlt.pipeline.exceptions import PipelineStepFailed from tests.cases import arrow_table_all_data_types, table_update_and_row, assert_all_data_types_row @@ -499,7 +500,7 @@ def count(*args, **kwargs) -> Any: return count - extra_placeholders = { + extra_placeholders: TExtraPlaceholders = { "who": "marcin", "action": "says", "what": "no potato", diff --git a/tests/load/utils.py b/tests/load/utils.py index 7420abbc9b..0b9026378b 100644 --- a/tests/load/utils.py +++ b/tests/load/utils.py @@ -182,11 +182,10 @@ def setup_pipeline( ) return pipeline - def attach_pipeline(self, pipeline_name: str, dev_mode: bool = None, **kwargs) -> dlt.Pipeline: + def attach_pipeline(self, pipeline_name: str, **kwargs) -> dlt.Pipeline: """Attach to existing pipeline keeping the dev_mode""" # remember dev_mode from setup_pipeline - dev_mode = dev_mode if dev_mode is not None else self.dev_mode - pipeline = dlt.attach(pipeline_name, dev_mode=dev_mode, **kwargs) + pipeline = dlt.attach(pipeline_name, **kwargs) return pipeline diff --git a/tests/pipeline/test_dlt_versions.py b/tests/pipeline/test_dlt_versions.py index be086f1694..b95d351844 100644 --- a/tests/pipeline/test_dlt_versions.py +++ b/tests/pipeline/test_dlt_versions.py @@ -283,7 +283,9 @@ def test_load_package_with_dlt_update(test_storage: FileStorage) -> None: ) ) # attach to existing pipeline - pipeline = dlt.attach(GITHUB_PIPELINE_NAME, credentials=duckdb_cfg.credentials) + pipeline = dlt.attach( + GITHUB_PIPELINE_NAME, destination=duckdb(credentials=duckdb_cfg.credentials) + ) # get the schema from schema storage before we sync github_schema = json.loads( test_storage.load( diff --git a/tests/pipeline/test_pipeline.py b/tests/pipeline/test_pipeline.py index 2d1c345385..31b1514d53 100644 --- a/tests/pipeline/test_pipeline.py +++ b/tests/pipeline/test_pipeline.py @@ -207,7 +207,8 @@ def test_pipeline_context() -> None: assert ctx.pipeline() is p3 assert p3.is_active is True assert p2.is_active is False - assert Container()[DestinationCapabilitiesContext].naming_convention == "snake_case" + # no default naming convention + assert Container()[DestinationCapabilitiesContext].naming_convention is None # restore previous p2 = dlt.attach("another pipeline") diff --git a/tests/pipeline/test_pipeline_extra.py b/tests/pipeline/test_pipeline_extra.py index 6ffc8c9707..308cdcd91d 100644 --- a/tests/pipeline/test_pipeline_extra.py +++ b/tests/pipeline/test_pipeline_extra.py @@ -55,7 +55,10 @@ def test_create_pipeline_all_destinations(destination_config: DestinationTestCon ) # are capabilities injected caps = p._container[DestinationCapabilitiesContext] - assert p.naming.name() == caps.naming_convention + if caps.naming_convention: + assert p.naming.name() == caps.naming_convention + else: + assert p.naming.name() == "snake_case" p.extract([1, "2", 3], table_name="data") # is default schema with right naming convention diff --git a/tests/sources/helpers/rest_client/test_client.py b/tests/sources/helpers/rest_client/test_client.py index 7196ef3436..aa3f02e51d 100644 --- a/tests/sources/helpers/rest_client/test_client.py +++ b/tests/sources/helpers/rest_client/test_client.py @@ -234,7 +234,6 @@ def test_oauth2_client_credentials_flow_wrong_client_secret(self, rest_client: R assert e.type == HTTPError assert e.match("401 Client Error") - def test_oauth_token_expired_refresh(self, rest_client_immediate_oauth_expiry: RESTClient): rest_client = rest_client_immediate_oauth_expiry auth = cast(OAuth2ClientCredentials, rest_client.auth) From aab36e1e4ac2bdf0c7fe3e962345259884c5fe01 Mon Sep 17 00:00:00 2001 From: Marcin Rudolf Date: Tue, 25 Jun 2024 19:17:36 +0200 Subject: [PATCH 096/105] updates destination and naming convention docs --- .../custom_destination_bigquery.py | 2 +- .../postgres_to_postgres.py | 6 +- docs/website/blog/2023-09-05-mongo-etl.md | 2 +- docs/website/blog/2023-10-23-arrow-loading.md | 4 +- .../blog/2023-12-01-dlt-kestra-demo.md | 8 +-- .../docs/dlt-ecosystem/destinations/dremio.md | 2 +- .../verified-sources/google_sheets.md | 2 +- .../verified-sources/sql_database.md | 2 +- .../website/docs/general-usage/destination.md | 67 ++++++++++++++++--- .../docs/general-usage/naming-convention.md | 18 +++-- docs/website/docs/general-usage/resource.md | 9 ++- .../walkthroughs/create-new-destination.md | 4 ++ docs/website/sidebars.js | 1 + 13 files changed, 94 insertions(+), 33 deletions(-) diff --git a/docs/examples/custom_destination_bigquery/custom_destination_bigquery.py b/docs/examples/custom_destination_bigquery/custom_destination_bigquery.py index 380912a9a7..ce4b2a12d0 100644 --- a/docs/examples/custom_destination_bigquery/custom_destination_bigquery.py +++ b/docs/examples/custom_destination_bigquery/custom_destination_bigquery.py @@ -86,7 +86,7 @@ def bigquery_insert( pipeline_name="csv_to_bigquery_insert", destination=bigquery_insert, dataset_name="mydata", - full_refresh=True, + dev_mode=True, ) load_info = pipeline.run(resource(url=OWID_DISASTERS_URL)) diff --git a/docs/examples/postgres_to_postgres/postgres_to_postgres.py b/docs/examples/postgres_to_postgres/postgres_to_postgres.py index f5327ee236..848af53317 100644 --- a/docs/examples/postgres_to_postgres/postgres_to_postgres.py +++ b/docs/examples/postgres_to_postgres/postgres_to_postgres.py @@ -170,7 +170,7 @@ def table_desc(table_name, pk, schema_name, order_date, columns="*"): pipeline_name=pipeline_name, destination="duckdb", dataset_name=target_schema_name, - full_refresh=True, + dev_mode=True, progress="alive_progress", ) else: @@ -178,8 +178,8 @@ def table_desc(table_name, pk, schema_name, order_date, columns="*"): pipeline_name=pipeline_name, destination="postgres", dataset_name=target_schema_name, - full_refresh=False, - ) # full_refresh=False + dev_mode=False, + ) # dev_mode=False # start timer startTime = pendulum.now() diff --git a/docs/website/blog/2023-09-05-mongo-etl.md b/docs/website/blog/2023-09-05-mongo-etl.md index cd102c8895..8dfd953be4 100644 --- a/docs/website/blog/2023-09-05-mongo-etl.md +++ b/docs/website/blog/2023-09-05-mongo-etl.md @@ -168,7 +168,7 @@ Here's a code explanation of how it works under the hood: pipeline_name='from_json', destination='duckdb', dataset_name='mydata', - full_refresh=True, + dev_mode=True, ) # dlt works with lists of dicts, so wrap data to the list load_info = pipeline.run([data], table_name="json_data") diff --git a/docs/website/blog/2023-10-23-arrow-loading.md b/docs/website/blog/2023-10-23-arrow-loading.md index 2cdf4d90e7..25962c932e 100644 --- a/docs/website/blog/2023-10-23-arrow-loading.md +++ b/docs/website/blog/2023-10-23-arrow-loading.md @@ -50,7 +50,7 @@ chat_messages = dlt.resource( In this demo I just extract and normalize data and skip the loading step. ```py -pipeline = dlt.pipeline(destination="duckdb", full_refresh=True) +pipeline = dlt.pipeline(destination="duckdb", dev_mode=True) # extract first pipeline.extract(chat_messages) info = pipeline.normalize() @@ -98,7 +98,7 @@ chat_messages = dlt.resource( write_disposition="append", )("postgresql://loader:loader@localhost:5432/dlt_data") -pipeline = dlt.pipeline(destination="duckdb", full_refresh=True) +pipeline = dlt.pipeline(destination="duckdb", dev_mode=True) # extract first pipeline.extract(chat_messages) info = pipeline.normalize(workers=3, loader_file_format="parquet") diff --git a/docs/website/blog/2023-12-01-dlt-kestra-demo.md b/docs/website/blog/2023-12-01-dlt-kestra-demo.md index 9f1d7acba2..1b1c79562d 100644 --- a/docs/website/blog/2023-12-01-dlt-kestra-demo.md +++ b/docs/website/blog/2023-12-01-dlt-kestra-demo.md @@ -45,7 +45,7 @@ Wanna jump to the [GitHub repo](https://github.com/dlt-hub/dlt-kestra-demo)? ## HOW IT WORKS -To lay it all out clearly: Everything's automated in **`Kestra`**, with hassle-free data loading thanks to **`dlt`**, and the analytical thinking handled by OpenAI. Here's a diagram to help you understand the general outline of the entire process. +To lay it all out clearly: Everything's automated in **`Kestra`**, with hassle-free data loading thanks to **`dlt`**, and the analytical thinking handled by OpenAI. Here's a diagram to help you understand the general outline of the entire process. ![overview](https://storage.googleapis.com/dlt-blog-images/dlt_kestra_workflow_overview.png) @@ -59,12 +59,12 @@ Once you’ve opened [http://localhost:8080/](http://localhost:8080/) in your br ![Kestra](https://storage.googleapis.com/dlt-blog-images/dlt_kestra_kestra_ui.png) -Now, all you need to do is [create your flows](https://github.com/dlt-hub/dlt-kestra-demo/blob/main/README.md) and execute them. +Now, all you need to do is [create your flows](https://github.com/dlt-hub/dlt-kestra-demo/blob/main/README.md) and execute them. The great thing about **`Kestra`** is its ease of use - it's UI-based, declarative, and language-agnostic. Unless you're using a task like a [Python script](https://kestra.io/plugins/plugin-script-python/tasks/io.kestra.plugin.scripts.python.script), you don't even need to know how to code. -:::tip +:::tip If you're already considering ways to use **`Kestra`** for your projects, consult their [documentation](https://kestra.io/docs) and the [plugin](https://kestra.io/plugins) pages for further insights. ::: @@ -84,7 +84,7 @@ pipeline = dlt.pipeline( pipeline_name="standard_inbox", destination='bigquery', dataset_name="messages_data", - full_refresh=False, + dev_mode=False, ) # Set table name diff --git a/docs/website/docs/dlt-ecosystem/destinations/dremio.md b/docs/website/docs/dlt-ecosystem/destinations/dremio.md index 546f470938..c087d5dc0a 100644 --- a/docs/website/docs/dlt-ecosystem/destinations/dremio.md +++ b/docs/website/docs/dlt-ecosystem/destinations/dremio.md @@ -86,7 +86,7 @@ Data loading happens by copying a staged parquet files from an object storage bu Dremio does not support `CREATE SCHEMA` DDL statements. -Therefore, "Metastore" data sources, such as Hive or Glue, require that the dataset schema exists prior to running the dlt pipeline. `full_refresh=True` is unsupported for these data sources. +Therefore, "Metastore" data sources, such as Hive or Glue, require that the dataset schema exists prior to running the dlt pipeline. `dev_mode=True` is unsupported for these data sources. "Object Storage" data sources do not have this limitation. diff --git a/docs/website/docs/dlt-ecosystem/verified-sources/google_sheets.md b/docs/website/docs/dlt-ecosystem/verified-sources/google_sheets.md index 7b957e98ea..9cd6ad8079 100644 --- a/docs/website/docs/dlt-ecosystem/verified-sources/google_sheets.md +++ b/docs/website/docs/dlt-ecosystem/verified-sources/google_sheets.md @@ -355,7 +355,7 @@ To read more about tables, columns, and datatypes, please refer to [our document `dlt` will **not modify** tables after they are created. So if you changed data types with hints, then you need to **delete the dataset** -or set `full_refresh=True`. +or set `dev_mode=True`. ::: ## Sources and resources diff --git a/docs/website/docs/dlt-ecosystem/verified-sources/sql_database.md b/docs/website/docs/dlt-ecosystem/verified-sources/sql_database.md index fde7a64144..36a8569a4a 100644 --- a/docs/website/docs/dlt-ecosystem/verified-sources/sql_database.md +++ b/docs/website/docs/dlt-ecosystem/verified-sources/sql_database.md @@ -271,7 +271,7 @@ pipeline = dlt.pipeline( pipeline_name="unsw_download", destination=filesystem(os.path.abspath("../_storage/unsw")), progress="log", - full_refresh=True, + dev_mode=True, ) info = pipeline.run( diff --git a/docs/website/docs/general-usage/destination.md b/docs/website/docs/general-usage/destination.md index 842133cfa1..e30cb41a60 100644 --- a/docs/website/docs/general-usage/destination.md +++ b/docs/website/docs/general-usage/destination.md @@ -75,11 +75,22 @@ You can create and pass partial credentials and `dlt` will fill the missing data Please read how to use [various built in credentials types](credentials/config_specs.md). ::: +### Inspect destination capabilities +[Destination capabilities](../walkthroughs/create-new-destination.md#3-set-the-destination-capabilities) tell `dlt` what given destination can and cannot do. For example it tells which file formats it can load, what is maximum query or identifier length. Inspect destination capabilities as follows: +```py +import dlt +pipeline = dlt.pipeline("snowflake_test", destination="snowflake") +print(dict(pipeline.destination.capabilities())) +``` + ### Pass additional parameters and change destination capabilities -Destination factory accepts additional parameters that will be used to pre-configure it and change device capabilities. +Destination factory accepts additional parameters that will be used to pre-configure it and change destination capabilities. ```py +import dlt +duck_ = dlt.destinations.duckdb(naming_convention="duck_case", recommended_file_size=120000) +print(dict(duck_.capabilities())) ``` -Example above is overriding `naming_convention` and `recommended_file_size` in the destination capabilities and +Example above is overriding `naming_convention` and `recommended_file_size` in the destination capabilities. ### Configure multiple destinations in a pipeline To configure multiple destinations within a pipeline, you need to provide the credentials for each destination in the "secrets.toml" file. This example demonstrates how to configure a BigQuery destination named `destination_one`: @@ -124,17 +135,55 @@ Obviously, dlt will access the destination when you instantiate [sql_client](../ ::: -## Control how dlt creates table, column and other identifiers +## Control how `dlt` creates table, column and other identifiers +`dlt` maps identifiers found in the source data into destination identifiers (ie. table and columns names) using [naming conventions](naming-convention.md) which ensure that +character set, identifier length and other properties fit into what given destination can handle. For example our [default naming convention (**snake case**)](naming-convention.md#default-naming-convention-snake_case) converts all names in the source (ie. JSON document fields) into snake case, case insensitive identifiers. + +Each destination declares its preferred naming convention, support for case sensitive identifiers and case folding function that case insensitive identifiers follow. For example: +1. Redshift - by default does not support case sensitive identifiers and converts all of them to lower case. +2. Snowflake - supports case sensitive identifiers and considers upper cased identifiers as case insensitive (which is the default case folding) +3. DuckDb - does not support case sensitive identifiers but does not case fold them so it preserves the original casing in the information schema. +4. Athena - does not support case sensitive identifiers and converts all of them to lower case. +5. BigQuery - all identifiers are case sensitive, there's no case insensitive mode available via case folding (but it can be enabled in dataset level). + +You can change the naming convention used in [many different ways](naming-convention.md#configure-naming-convention), below we set the preferred naming convention on the Snowflake destination to `sql_cs` to switch Snowflake to case sensitive mode: +```py +import dlt +snow_ = dlt.destinations.snowflake(naming_convention="sql_cs_v1") +``` +Setting naming convention will impact all new schemas being created (ie. on first pipeline run) and will re-normalize all existing identifiers. + +:::caution +`dlt` prevents re-normalization of identifiers in tables that were already created at the destination. Use [refresh](pipeline.md#refresh-pipeline-data-and-state) mode to drop the data. You can also disable this behavior via [configuration](naming-convention.md#avoid-identifier-clashes) +::: + +:::note +Destinations that support case sensitive identifiers but use case folding convention to enable case insensitive identifiers are configured in case insensitive mode by default. Examples: Postgres, Snowflake, Oracle. +::: -- case folding -- case sensitivity +:::caution +If you use case sensitive naming convention with case insensitive destination, `dlt` will: +1. Fail the load if it detects identifier clash due to case folding +2. Warn if any case folding is applied by the destination. +::: -(TODO) -1. Redshift - always lower case, no matter which naming convention used. case insensitive -2. Athena - always lower case, no matter which naming convention used. uses different catalogue and query engines that are incompatible +### Enable case sensitive identifiers support +Selected destinations may be configured so they start accepting case sensitive identifiers. For example, it is possible to set case sensitive collation on **mssql** database and then tell `dlt` about it. +```py +from dlt.destinations import mssql +dest_ = mssql(has_case_sensitive_identifiers=True, naming_convention="sql_cs_v1") +``` +Above we can safely use case sensitive naming convention without worrying of name clashes. -### Enable case sensitive mode +You can configure the case sensitivity, **but configuring destination capabilities is not currently supported**. +```toml +[destination.mssql] +has_case_sensitive_identifiers=true +``` +:::note +In most cases setting the flag above just indicates to `dlt` that you switched the case sensitive option on a destination. `dlt` will not do that for you. Refer to destination documentation for details. +::: ## Create new destination You have two ways to implement a new destination: diff --git a/docs/website/docs/general-usage/naming-convention.md b/docs/website/docs/general-usage/naming-convention.md index 71e6b2489c..7802e80985 100644 --- a/docs/website/docs/general-usage/naming-convention.md +++ b/docs/website/docs/general-usage/naming-convention.md @@ -1,18 +1,24 @@ --- title: Naming Convention description: Control how dlt creates table, column and other identifiers -keywords: [identifiers, snake case, ] +keywords: [identifiers, snake case, case sensitive, case insensitive, naming] --- # Naming Convention -`dlt` creates tables, child tables and column schemas from the data. The data being loaded, +`dlt` creates tables, child tables and column identifiers from the data. The data source, typically JSON documents, contains identifiers (i.e. key names in a dictionary) with any Unicode -characters, any lengths and naming styles. On the other hand the destinations accept very strict +characters, any lengths and naming styles. On the other hand destinations accept very strict namespaces for their identifiers. Like [Redshift](../dlt-ecosystem/destinations/redshift.md#naming-convention) that accepts case-insensitive alphanumeric identifiers with maximum 127 characters. -Each schema contains `naming convention` that tells `dlt` how to translate identifiers to the -namespace that the destination understands. +`dlt` groups tables belonging to [resources](resource.md) from a single [source](source.md) in a [schema](schema.md). + +Each schema contains **naming convention** that tells `dlt` how to translate identifiers to the +namespace that the destination understands. Naming conventions are in essence functions translating strings from the source identifier format into destination identifier format. For example our **snake_case** (default) naming convention will translate `DealFlow` into `deal_flow` identifier. + +You have control over which naming convention to use and dlt provides a few to choose from ie. `sql_cs_v1` + + * Each destination has a preferred naming convention. * This naming convention is used when new schemas are created. @@ -51,6 +57,8 @@ The naming convention is configurable and users can easily create their own conventions that i.e. pass all the identifiers unchanged if the destination accepts that (i.e. DuckDB). +## Avoid identifier clashes + ## Available naming conventions ## Write your own naming convention diff --git a/docs/website/docs/general-usage/resource.md b/docs/website/docs/general-usage/resource.md index e09e7294de..14f8d73b58 100644 --- a/docs/website/docs/general-usage/resource.md +++ b/docs/website/docs/general-usage/resource.md @@ -514,9 +514,8 @@ def orders(items: Iterator[FileItemDict]): dest_file = os.path.join(import_folder, item["file_name"]) # download file item.fsspec.download(item["file_url"], dest_file) - # tell dlt to import the file, mind that `item` below will not be - # saved, dest_file will be imported instead - yield dlt.mark.with_file_import(item, dest_file, "csv") + # tell dlt to import the dest_file as `csv` + yield dlt.mark.with_file_import(dest_file, "csv") # use filesystem verified source to glob a bucket @@ -536,11 +535,11 @@ include_header=false on_error_continue=true ``` -You can sniff the schema from the data ie. using `duckdb` to infer the table schema from csv file. `dlt.mark.with_file_import` accepts additional arguments that you can use to pass hints at run time. +You can sniff the schema from the data ie. using `duckdb` to infer the table schema from `csv` file. `dlt.mark.with_file_import` accepts additional arguments that you can use to pass hints at run time. :::note * If you do not define any columns, the table will not be created in the destination. `dlt` will still attempt to load data into it, so you create a fitting table upfront, the load process will succeed. -* Files are imported using hard links if possible. +* Files are imported using hard links if possible to avoid copying and duplicating storage space needed. ::: ### Duplicate and rename resources diff --git a/docs/website/docs/walkthroughs/create-new-destination.md b/docs/website/docs/walkthroughs/create-new-destination.md index 1b72b81e3e..69e7b2fcc1 100644 --- a/docs/website/docs/walkthroughs/create-new-destination.md +++ b/docs/website/docs/walkthroughs/create-new-destination.md @@ -88,6 +88,10 @@ The default `escape_identifier` function identifier escapes `"` and '\' and quot You should avoid providing a custom `escape_literal` function by not enabling `insert-values` for your destination. +### Enable / disable case sensitive identifiers +Specify if destination supports case sensitive identifiers by setting `has_case_sensitive_identifiers` to `True` (or `False` if otherwise). Some case sensitive destinations (ie. **Snowflake** or **Postgres**) support case insensitive identifiers via. case folding ie. **Snowflake** considers all upper case identifiers as case insensitive (set `casefold_identifier` to `str.upper`), **Postgres** does the same with lower case identifiers (`str.lower`). +Some case insensitive destinations (ie. **Athena** or **Redshift**) case-fold (ie. lower case) all identifiers and store them as such. In that case set `casefold_identifier` to `str.lower` as well. + ## 4. Adjust the SQL client **sql client** is a wrapper over `dbapi` and its main role is to provide consistent interface for executing SQL statements, managing transactions and (probably the most important) to help handling errors via classifying exceptions. Here's a few things you should pay attention to: diff --git a/docs/website/sidebars.js b/docs/website/sidebars.js index d3d7def8fc..1ea92f2e91 100644 --- a/docs/website/sidebars.js +++ b/docs/website/sidebars.js @@ -157,6 +157,7 @@ const sidebars = { 'general-usage/incremental-loading', 'general-usage/full-loading', 'general-usage/schema', + 'general-usage/naming-convention', 'general-usage/schema-contracts', 'general-usage/schema-evolution', { From 7294aae17077c60b016c6b4e051d2d1cc33521fe Mon Sep 17 00:00:00 2001 From: Marcin Rudolf Date: Wed, 26 Jun 2024 14:54:48 +0200 Subject: [PATCH 097/105] removes is_case_sensitive from naming convention initializer --- dlt/common/normalizers/naming/duck_case.py | 5 +++-- dlt/common/normalizers/naming/naming.py | 8 ++++---- dlt/common/normalizers/naming/snake_case.py | 8 ++++++-- dlt/common/normalizers/naming/sql_ci_v1.py | 12 ++++++++++++ dlt/destinations/impl/weaviate/ci_naming.py | 6 ++++-- dlt/destinations/impl/weaviate/naming.py | 5 +++-- 6 files changed, 32 insertions(+), 12 deletions(-) create mode 100644 dlt/common/normalizers/naming/sql_ci_v1.py diff --git a/dlt/common/normalizers/naming/duck_case.py b/dlt/common/normalizers/naming/duck_case.py index 295e607f46..77c41e0e43 100644 --- a/dlt/common/normalizers/naming/duck_case.py +++ b/dlt/common/normalizers/naming/duck_case.py @@ -7,11 +7,12 @@ class NamingConvention(SnakeCaseNamingConvention): _CLEANUP_TABLE = str.maketrans('\n\r"', "___") - def __init__(self, max_length: int = None, is_case_sensitive: bool = True) -> None: + def __init__(self, max_length: int = None) -> None: """Case sensitive naming convention preserving all unicode characters except new line(s). Uses __ for path separation and will replace multiple underscores with a single one. """ - super().__init__(max_length, is_case_sensitive) + super().__init__(max_length) + self.is_case_sensitive = True @staticmethod @lru_cache(maxsize=None) diff --git a/dlt/common/normalizers/naming/naming.py b/dlt/common/normalizers/naming/naming.py index eaf871d66d..b806f11eec 100644 --- a/dlt/common/normalizers/naming/naming.py +++ b/dlt/common/normalizers/naming/naming.py @@ -10,12 +10,12 @@ class NamingConvention(ABC): _TR_TABLE = bytes.maketrans(b"/+", b"ab") _DEFAULT_COLLISION_PROB = 0.001 - def __init__(self, max_length: int = None, is_case_sensitive: bool = True) -> None: - """Initializes naming convention producing identifiers with `max_length` and transforming input - in case sensitive or case insensitive manner. + def __init__(self, max_length: int = None) -> None: + """Initializes naming convention to generate identifier with `max_length` if specified. Base naming convention + is case sensitive by default """ self.max_length = max_length - self.is_case_sensitive = is_case_sensitive + self.is_case_sensitive = True @abstractmethod def normalize_identifier(self, identifier: str) -> str: diff --git a/dlt/common/normalizers/naming/snake_case.py b/dlt/common/normalizers/naming/snake_case.py index ffa0bf6968..f4aa1c25e2 100644 --- a/dlt/common/normalizers/naming/snake_case.py +++ b/dlt/common/normalizers/naming/snake_case.py @@ -18,8 +18,12 @@ class NamingConvention(BaseNamingConvention): # subsequent nested fields will be separated with the string below, applies both to field and table names PATH_SEPARATOR = "__" - def __init__(self, max_length: int = None, is_case_sensitive: bool = False) -> None: - super().__init__(max_length, is_case_sensitive) + def __init__(self, max_length: int = None) -> None: + """Case insensitive naming convention, converting source identifiers into snake case. Uses __ as path separator. + Multiple underscores are contracted to one. + """ + super().__init__(max_length) + self.is_case_sensitive = False def normalize_identifier(self, identifier: str) -> str: identifier = super().normalize_identifier(identifier) diff --git a/dlt/common/normalizers/naming/sql_ci_v1.py b/dlt/common/normalizers/naming/sql_ci_v1.py new file mode 100644 index 0000000000..baabb7ecf7 --- /dev/null +++ b/dlt/common/normalizers/naming/sql_ci_v1.py @@ -0,0 +1,12 @@ +from dlt.common.normalizers.naming.sql_cs_v1 import NamingConvention as SqlCsNamingConvention + + +class NamingConvention(SqlCsNamingConvention): + def __init__(self, max_length: int = None) -> None: + """A variant of sql_cs which lower cases all identifiers.""" + + super().__init__(max_length) + self.is_case_sensitive = False + + def normalize_identifier(self, identifier: str) -> str: + return super().normalize_identifier(identifier).lower() diff --git a/dlt/destinations/impl/weaviate/ci_naming.py b/dlt/destinations/impl/weaviate/ci_naming.py index ab4864f9b0..63c94776ad 100644 --- a/dlt/destinations/impl/weaviate/ci_naming.py +++ b/dlt/destinations/impl/weaviate/ci_naming.py @@ -2,8 +2,10 @@ class NamingConvention(WeaviateNamingConvention): - def __init__(self, max_length: int = None, is_case_sensitive: bool = False) -> None: - super().__init__(max_length, is_case_sensitive) + def __init__(self, max_length: int = None) -> None: + """Case insensitive naming convention for Weaviate. Lower cases all identifiers""" + super().__init__(max_length) + self.is_case_sensitive = False def _lowercase_property(self, identifier: str) -> str: """Lowercase the whole property to become case insensitive""" diff --git a/dlt/destinations/impl/weaviate/naming.py b/dlt/destinations/impl/weaviate/naming.py index 837553d29b..1e8e73a8e1 100644 --- a/dlt/destinations/impl/weaviate/naming.py +++ b/dlt/destinations/impl/weaviate/naming.py @@ -7,8 +7,9 @@ class NamingConvention(SnakeCaseNamingConvention): """Normalizes identifiers according to Weaviate documentation: https://weaviate.io/developers/weaviate/config-refs/schema#class""" - def __init__(self, max_length: int = None, is_case_sensitive: bool = True) -> None: - super().__init__(max_length, is_case_sensitive) + def __init__(self, max_length: int = None) -> None: + super().__init__(max_length) + self.is_case_sensitive: bool = True RESERVED_PROPERTIES = {"id": "__id", "_id": "___id", "_additional": "__additional"} _RE_UNDERSCORES = re.compile("([^_])__+") From dc10473e5d750a3ae4e9025a01a6f892a1a54fc3 Mon Sep 17 00:00:00 2001 From: Marcin Rudolf Date: Wed, 26 Jun 2024 14:59:17 +0200 Subject: [PATCH 098/105] simplifies with_file_import mark --- dlt/extract/extractors.py | 22 +++++++++++++++------- dlt/pipeline/mark.py | 1 - tests/load/pipeline/test_csv_loading.py | 12 ++++++------ tests/pipeline/test_arrow_sources.py | 4 ++-- 4 files changed, 23 insertions(+), 16 deletions(-) diff --git a/dlt/extract/extractors.py b/dlt/extract/extractors.py index 1cf45271eb..4a1de2517d 100644 --- a/dlt/extract/extractors.py +++ b/dlt/extract/extractors.py @@ -1,5 +1,5 @@ from copy import copy -from typing import Set, Dict, Any, Optional, List +from typing import Set, Dict, Any, Optional, List, Union from dlt.common.configuration import known_sections, resolve_configuration, with_config from dlt.common import logger @@ -65,21 +65,29 @@ def __init__( def with_file_import( - item: TDataItems, file_path: str, - file_format: TLoaderFileFormat = None, + file_format: TLoaderFileFormat, items_count: int = 0, - hints: TResourceHints = None, + hints: Union[TResourceHints, TDataItem] = None, ) -> DataItemWithMeta: - """Marks `item` to correspond to a file under `file_path` which will be imported into extract storage. `item` may be used - for a schema inference (from arrow table / pandas) but it will not be saved into storage. + """Marks file under `file_path` to be associated with current resource and imported into the load package as a file of + type `file_format`. You can provide optional `hints` that will be applied to the current resource. Note that you should avoid schema inference at - runtime if possible and if that is not possible - to do that only once per extract process. Create `TResourceHints` with `make_hints`. + runtime if possible and if that is not possible - to do that only once per extract process. Use `make_hints` in `mark` module + to create hints. You can also pass Arrow table or Pandas data frame form which schema will be taken (but content discarded). + Create `TResourceHints` with `make_hints`. If number of records in `file_path` is known, pass it in `items_count` so `dlt` can generate correct extract metrics. + + Note that `dlt` does not sniff schemas from data and will not guess right file format for you. """ metrics = DataWriterMetrics(file_path, items_count, 0, 0, 0) + item: TDataItem = None + # if hints are dict assume that this is dlt schema, if not - that it is arrow table + if not isinstance(hints, dict): + item = hints + hints = None return DataItemWithMeta(ImportFileMeta(file_path, metrics, file_format, hints, False), item) diff --git a/dlt/pipeline/mark.py b/dlt/pipeline/mark.py index aae77d0b3f..5f3122e7a5 100644 --- a/dlt/pipeline/mark.py +++ b/dlt/pipeline/mark.py @@ -4,6 +4,5 @@ with_hints, with_file_import, make_hints, - with_file_import, materialize_schema_item as materialize_table_schema, ) diff --git a/tests/load/pipeline/test_csv_loading.py b/tests/load/pipeline/test_csv_loading.py index b1054ae798..6a2be2eb40 100644 --- a/tests/load/pipeline/test_csv_loading.py +++ b/tests/load/pipeline/test_csv_loading.py @@ -29,7 +29,7 @@ def test_load_csv( destination_config: DestinationTestConfiguration, item_type: TestDataItemFormat ) -> None: os.environ["DATA_WRITER__DISABLE_COMPRESSION"] = "True" - pipeline = destination_config.setup_pipeline("postgres_" + uniq_id(), full_refresh=True) + pipeline = destination_config.setup_pipeline("postgres_" + uniq_id(), dev_mode=True) # do not save state so the state job is not created pipeline.config.restore_from_destination = False @@ -69,7 +69,7 @@ def test_custom_csv_no_header( os.environ["DATA_WRITER__DISABLE_COMPRESSION"] = str(not compression) csv_format = CsvFormatConfiguration(delimiter="|", include_header=False) # apply to collected config - pipeline = destination_config.setup_pipeline("postgres_" + uniq_id(), full_refresh=True) + pipeline = destination_config.setup_pipeline("postgres_" + uniq_id(), dev_mode=True) # this will apply this to config when client instance is created pipeline.destination.config_params["csv_format"] = csv_format # verify @@ -88,7 +88,7 @@ def test_custom_csv_no_header( if compression: import_file += ".gz" info = pipeline.run( - [dlt.mark.with_file_import(None, import_file, "csv", 2, hints)], + [dlt.mark.with_file_import(import_file, "csv", 2, hints=hints)], table_name="no_header", loader_file_format=file_format, ) @@ -116,7 +116,7 @@ def test_custom_csv_no_header( def test_custom_wrong_header(destination_config: DestinationTestConfiguration) -> None: csv_format = CsvFormatConfiguration(delimiter="|", include_header=True) # apply to collected config - pipeline = destination_config.setup_pipeline("postgres_" + uniq_id(), full_refresh=True) + pipeline = destination_config.setup_pipeline("postgres_" + uniq_id(), dev_mode=True) # this will apply this to config when client instance is created pipeline.destination.config_params["csv_format"] = csv_format # verify @@ -134,7 +134,7 @@ def test_custom_wrong_header(destination_config: DestinationTestConfiguration) - import_file = "tests/load/cases/loading/csv_header.csv" # snowflake will pass here because we do not match info = pipeline.run( - [dlt.mark.with_file_import(None, import_file, "csv", 2, hints)], + [dlt.mark.with_file_import(import_file, "csv", 2, hints=hints)], table_name="no_header", ) assert info.has_failed_jobs @@ -149,7 +149,7 @@ def test_custom_wrong_header(destination_config: DestinationTestConfiguration) - def test_empty_csv_from_arrow(destination_config: DestinationTestConfiguration) -> None: os.environ["DATA_WRITER__DISABLE_COMPRESSION"] = "True" os.environ["RESTORE_FROM_DESTINATION"] = "False" - pipeline = destination_config.setup_pipeline("postgres_" + uniq_id(), full_refresh=True) + pipeline = destination_config.setup_pipeline("postgres_" + uniq_id(), dev_mode=True) table, _, _ = arrow_table_all_data_types("arrow-table", include_json=False) load_info = pipeline.run( diff --git a/tests/pipeline/test_arrow_sources.py b/tests/pipeline/test_arrow_sources.py index ff046e70a7..bcaa19479c 100644 --- a/tests/pipeline/test_arrow_sources.py +++ b/tests/pipeline/test_arrow_sources.py @@ -511,7 +511,7 @@ def test_import_file_with_arrow_schema() -> None: pipeline = dlt.pipeline( pipeline_name="test_jsonl_import", destination="duckdb", - full_refresh=True, + dev_mode=True, ) # Define the schema based on the CSV input @@ -540,7 +540,7 @@ def test_import_file_with_arrow_schema() -> None: # columns should be created from empty table import_file = "tests/load/cases/loading/header.jsonl" info = pipeline.run( - [dlt.mark.with_file_import(empty_table, import_file, "jsonl", 2)], + [dlt.mark.with_file_import(import_file, "jsonl", 2, hints=empty_table)], table_name="no_header", ) info.raise_on_failed_jobs() From 727a35ea981f0c15bf63ca85d3103689162eb913 Mon Sep 17 00:00:00 2001 From: Marcin Rudolf Date: Wed, 26 Jun 2024 14:59:52 +0200 Subject: [PATCH 099/105] adds case sensitivity tests --- .../load/pipeline/test_snowflake_pipeline.py | 55 ++++++++++++++ tests/pipeline/test_pipeline.py | 73 ++++++++++++++++--- 2 files changed, 116 insertions(+), 12 deletions(-) create mode 100644 tests/load/pipeline/test_snowflake_pipeline.py diff --git a/tests/load/pipeline/test_snowflake_pipeline.py b/tests/load/pipeline/test_snowflake_pipeline.py new file mode 100644 index 0000000000..3cfa9e8b21 --- /dev/null +++ b/tests/load/pipeline/test_snowflake_pipeline.py @@ -0,0 +1,55 @@ +import pytest + +import dlt +from dlt.common import Decimal + +from dlt.common.utils import uniq_id +from dlt.destinations.exceptions import DatabaseUndefinedRelation +from tests.pipeline.utils import assert_load_info +from tests.load.utils import destinations_configs, DestinationTestConfiguration + +# mark all tests as essential, do not remove +pytestmark = pytest.mark.essential + + +@pytest.mark.parametrize( + "destination_config", + destinations_configs(default_sql_configs=True, subset=["snowflake"]), + ids=lambda x: x.name, +) +def test_snowflake_case_sensitive_identifiers( + destination_config: DestinationTestConfiguration, +) -> None: + snow_ = dlt.destinations.snowflake(naming_convention="sql_cs_v1") + + dataset_name = "CaseSensitive_Dataset_" + uniq_id() + pipeline = destination_config.setup_pipeline( + "test_snowflake_case_sensitive_identifiers", dataset_name=dataset_name, destination=snow_ + ) + caps = pipeline.destination.capabilities() + assert caps.naming_convention == "sql_cs_v1" + + destination_client = pipeline.destination_client() + # assert snowflake caps to be in case sensitive mode + assert destination_client.capabilities.casefold_identifier is str + + # load some case sensitive data + info = pipeline.run([{"Id": 1, "Capital": 0.0}], table_name="Expenses") + assert_load_info(info) + with pipeline.sql_client() as client: + assert client.has_dataset() + # use the same case sensitive dataset + with client.with_alternative_dataset_name(dataset_name): + assert client.has_dataset() + # make it case insensitive (upper) + with client.with_alternative_dataset_name(dataset_name.upper()): + assert not client.has_dataset() + # keep case sensitive but make lowercase + with client.with_alternative_dataset_name(dataset_name.lower()): + assert not client.has_dataset() + + # must use quoted identifiers + rows = client.execute_sql('SELECT "Id", "Capital" FROM "Expenses"') + print(rows) + with pytest.raises(DatabaseUndefinedRelation): + client.execute_sql('SELECT "Id", "Capital" FROM Expenses') diff --git a/tests/pipeline/test_pipeline.py b/tests/pipeline/test_pipeline.py index 31b1514d53..cfc41cb43b 100644 --- a/tests/pipeline/test_pipeline.py +++ b/tests/pipeline/test_pipeline.py @@ -33,6 +33,7 @@ from dlt.common.exceptions import PipelineStateNotAvailable from dlt.common.pipeline import LoadInfo, PipelineContext from dlt.common.runtime.collector import LogCollector +from dlt.common.schema.exceptions import TableIdentifiersFrozen from dlt.common.schema.typing import TColumnSchema from dlt.common.schema.utils import new_column, new_table from dlt.common.typing import DictStrAny @@ -2252,7 +2253,7 @@ def test_data(): pipeline = dlt.pipeline( pipeline_name="test_staging_cleared", destination="duckdb", - full_refresh=True, + dev_mode=True, ) info = pipeline.run(test_data, table_name="staging_cleared") @@ -2271,11 +2272,59 @@ def test_data(): assert len(cur.fetchall()) == 3 +def test_change_naming_convention_name_clash() -> None: + duck_ = dlt.destinations.duckdb(naming_convention="duck_case", recommended_file_size=120000) + caps = duck_.capabilities() + assert caps.naming_convention == "duck_case" + assert caps.recommended_file_size == 120000 + + # use duck case to load data into duckdb so casing and emoji are preserved + pipeline = dlt.pipeline("test_change_naming_convention_name_clash", destination=duck_) + info = pipeline.run( + airtable_emojis().with_resources("📆 Schedule", "🦚Peacock", "🦚WidePeacock") + ) + assert_load_info(info) + # make sure that emojis got in + assert "🦚Peacock" in pipeline.default_schema.tables + assert "🔑id" in pipeline.default_schema.tables["🦚Peacock"]["columns"] + assert load_data_table_counts(pipeline) == { + "📆 Schedule": 3, + "🦚Peacock": 1, + "🦚WidePeacock": 1, + "🦚Peacock__peacock": 3, + "🦚WidePeacock__Peacock": 3, + } + with pipeline.sql_client() as client: + rows = client.execute_sql("SELECT 🔑id FROM 🦚Peacock") + # 🔑id value is 1 + assert rows[0][0] == 1 + + # change naming convention and run pipeline again so we generate name clashes + os.environ["SOURCES__AIRTABLE_EMOJIS__SCHEMA__NAMING"] = "sql_ci_v1" + with pytest.raises(PipelineStepFailed) as pip_ex: + pipeline.run(airtable_emojis().with_resources("📆 Schedule", "🦚Peacock", "🦚WidePeacock")) + assert isinstance(pip_ex.value.__cause__, TableIdentifiersFrozen) + + # all good if we drop tables + # info = pipeline.run( + # airtable_emojis().with_resources("📆 Schedule", "🦚Peacock", "🦚WidePeacock"), + # refresh="drop_resources", + # ) + # assert_load_info(info) + # assert load_data_table_counts(pipeline) == { + # "📆 Schedule": 3, + # "🦚Peacock": 1, + # "🦚WidePeacock": 1, + # "🦚Peacock__peacock": 3, + # "🦚WidePeacock__Peacock": 3, + # } + + def test_import_jsonl_file() -> None: pipeline = dlt.pipeline( pipeline_name="test_jsonl_import", destination="duckdb", - full_refresh=True, + dev_mode=True, ) columns: List[TColumnSchema] = [ {"name": "id", "data_type": "bigint", "nullable": False}, @@ -2286,7 +2335,7 @@ def test_import_jsonl_file() -> None: ] import_file = "tests/load/cases/loading/header.jsonl" info = pipeline.run( - [dlt.mark.with_file_import([{"id": "IGNORED"}], import_file, "jsonl", 2)], + [dlt.mark.with_file_import(import_file, "jsonl", 2)], table_name="no_header", loader_file_format="jsonl", columns=columns, @@ -2298,7 +2347,7 @@ def test_import_jsonl_file() -> None: # use hints to infer hints = dlt.mark.make_hints(columns=columns) info = pipeline.run( - [dlt.mark.with_file_import([{"id": "IGNORED"}], import_file, "jsonl", 2, hints=hints)], + [dlt.mark.with_file_import(import_file, "jsonl", 2, hints=hints)], table_name="no_header_2", ) info.raise_on_failed_jobs() @@ -2309,11 +2358,11 @@ def test_import_file_without_sniff_schema() -> None: pipeline = dlt.pipeline( pipeline_name="test_jsonl_import", destination="duckdb", - full_refresh=True, + dev_mode=True, ) import_file = "tests/load/cases/loading/header.jsonl" info = pipeline.run( - [dlt.mark.with_file_import([{"id": "IGNORED"}], import_file, "jsonl", 2)], + [dlt.mark.with_file_import(import_file, "jsonl", 2)], table_name="no_header", ) assert info.has_failed_jobs @@ -2324,13 +2373,13 @@ def test_import_non_existing_file() -> None: pipeline = dlt.pipeline( pipeline_name="test_jsonl_import", destination="duckdb", - full_refresh=True, + dev_mode=True, ) # this file does not exist import_file = "tests/load/cases/loading/X_header.jsonl" with pytest.raises(PipelineStepFailed) as pip_ex: pipeline.run( - [dlt.mark.with_file_import([{"id": "IGNORED"}], import_file, "jsonl", 2)], + [dlt.mark.with_file_import(import_file, "jsonl", 2)], table_name="no_header", ) inner_ex = pip_ex.value.__cause__ @@ -2342,13 +2391,13 @@ def test_import_unsupported_file_format() -> None: pipeline = dlt.pipeline( pipeline_name="test_jsonl_import", destination="duckdb", - full_refresh=True, + dev_mode=True, ) # this file does not exist import_file = "tests/load/cases/loading/csv_no_header.csv" with pytest.raises(PipelineStepFailed) as pip_ex: pipeline.run( - [dlt.mark.with_file_import([{"id": "IGNORED"}], import_file, "csv", 2)], + [dlt.mark.with_file_import(import_file, "csv", 2)], table_name="no_header", ) inner_ex = pip_ex.value.__cause__ @@ -2360,13 +2409,13 @@ def test_import_unknown_file_format() -> None: pipeline = dlt.pipeline( pipeline_name="test_jsonl_import", destination="duckdb", - full_refresh=True, + dev_mode=True, ) # this file does not exist import_file = "tests/load/cases/loading/csv_no_header.csv" with pytest.raises(PipelineStepFailed) as pip_ex: pipeline.run( - [dlt.mark.with_file_import([{"id": "IGNORED"}], import_file, "unknown", 2)], # type: ignore[arg-type] + [dlt.mark.with_file_import(import_file, "unknown", 2)], # type: ignore[arg-type] table_name="no_header", ) inner_ex = pip_ex.value.__cause__ From 4cb2646d7cf99e751b21d04887a5ad6881a771b5 Mon Sep 17 00:00:00 2001 From: Marcin Rudolf Date: Wed, 26 Jun 2024 15:00:39 +0200 Subject: [PATCH 100/105] uses dev_mode everywhere --- tests/destinations/test_custom_destination.py | 50 +++++++++---------- .../athena_iceberg/test_athena_adapter.py | 2 +- .../test_bigquery_streaming_insert.py | 4 +- .../bigquery/test_bigquery_table_builder.py | 2 +- .../clickhouse/test_clickhouse_adapter.py | 2 +- .../test_clickhouse_gcs_s3_compatibility.py | 2 +- tests/load/pipeline/test_athena.py | 4 +- tests/load/pipeline/test_clickhouse.py | 2 +- tests/load/pipeline/test_dremio.py | 4 +- tests/load/pipeline/test_drop.py | 2 +- .../load/pipeline/test_filesystem_pipeline.py | 2 +- tests/load/pipeline/test_merge_disposition.py | 2 +- tests/load/pipeline/test_parallelism.py | 2 +- tests/load/pipeline/test_postgres.py | 2 +- tests/load/pipeline/test_scd2.py | 14 +++--- tests/load/utils.py | 6 +-- tests/normalize/test_max_nesting.py | 4 +- tests/pipeline/test_import_export_schema.py | 4 +- tests/pipeline/utils.py | 2 +- 19 files changed, 54 insertions(+), 58 deletions(-) diff --git a/tests/destinations/test_custom_destination.py b/tests/destinations/test_custom_destination.py index 1d1bb3d96c..6ebf7f6ef3 100644 --- a/tests/destinations/test_custom_destination.py +++ b/tests/destinations/test_custom_destination.py @@ -56,7 +56,7 @@ def items_resource() -> TDataItems: nonlocal items yield items - p = dlt.pipeline("sink_test", destination=test_sink, full_refresh=True) + p = dlt.pipeline("sink_test", destination=test_sink, dev_mode=True) p.run([items_resource()]) return calls @@ -169,7 +169,7 @@ def local_sink_func(items: TDataItems, table: TTableSchema, my_val=dlt.config.va # test decorator calls = [] - p = dlt.pipeline("sink_test", destination=dlt.destination()(local_sink_func), full_refresh=True) + p = dlt.pipeline("sink_test", destination=dlt.destination()(local_sink_func), dev_mode=True) p.run([1, 2, 3], table_name="items") assert len(calls) == 1 # local func does not create entry in destinations @@ -180,7 +180,7 @@ def local_sink_func(items: TDataItems, table: TTableSchema, my_val=dlt.config.va p = dlt.pipeline( "sink_test", destination=Destination.from_reference("destination", destination_callable=local_sink_func), - full_refresh=True, + dev_mode=True, ) p.run([1, 2, 3], table_name="items") assert len(calls) == 1 @@ -196,7 +196,7 @@ def local_sink_func(items: TDataItems, table: TTableSchema, my_val=dlt.config.va "destination", destination_callable="tests.destinations.test_custom_destination.global_sink_func", ), - full_refresh=True, + dev_mode=True, ) p.run([1, 2, 3], table_name="items") assert len(global_calls) == 1 @@ -211,7 +211,7 @@ def local_sink_func(items: TDataItems, table: TTableSchema, my_val=dlt.config.va p = dlt.pipeline( "sink_test", destination=Destination.from_reference("destination", destination_callable=None), - full_refresh=True, + dev_mode=True, ) with pytest.raises(ConfigurationValueError): p.run([1, 2, 3], table_name="items") @@ -223,7 +223,7 @@ def local_sink_func(items: TDataItems, table: TTableSchema, my_val=dlt.config.va destination=Destination.from_reference( "destination", destination_callable="does.not.exist" ), - full_refresh=True, + dev_mode=True, ) # using decorator without args will also work @@ -235,7 +235,7 @@ def simple_decorator_sink(items, table, my_val=dlt.config.value): assert my_val == "something" calls.append((items, table)) - p = dlt.pipeline("sink_test", destination=simple_decorator_sink, full_refresh=True) # type: ignore + p = dlt.pipeline("sink_test", destination=simple_decorator_sink, dev_mode=True) # type: ignore p.run([1, 2, 3], table_name="items") assert len(calls) == 1 @@ -294,7 +294,7 @@ def assert_items_in_range(c: List[TDataItems], start: int, end: int) -> None: assert str(i) in collected_items # no errors are set, all items should be processed - p = dlt.pipeline("sink_test", destination=test_sink, full_refresh=True) + p = dlt.pipeline("sink_test", destination=test_sink, dev_mode=True) load_id = p.run([items(), items2()]).loads_ids[0] assert_items_in_range(calls["items"], 0, 100) assert_items_in_range(calls["items2"], 0, 100) @@ -307,7 +307,7 @@ def assert_items_in_range(c: List[TDataItems], start: int, end: int) -> None: # provoke errors calls = {} provoke_error = {"items": 25, "items2": 45} - p = dlt.pipeline("sink_test", destination=test_sink, full_refresh=True) + p = dlt.pipeline("sink_test", destination=test_sink, dev_mode=True) with pytest.raises(PipelineStepFailed): p.run([items(), items2()]) @@ -364,7 +364,7 @@ def snake_sink(items, table): assert table["columns"]["snake_case"]["name"] == "snake_case" assert table["columns"]["camel_case"]["name"] == "camel_case" - dlt.pipeline("sink_test", destination=snake_sink, full_refresh=True).run(resource()) + dlt.pipeline("sink_test", destination=snake_sink, dev_mode=True).run(resource()) # check default (which is direct) @dlt.destination() @@ -374,7 +374,7 @@ def direct_sink(items, table): assert table["columns"]["snake_case"]["name"] == "snake_case" assert table["columns"]["camelCase"]["name"] == "camelCase" - dlt.pipeline("sink_test", destination=direct_sink, full_refresh=True).run(resource()) + dlt.pipeline("sink_test", destination=direct_sink, dev_mode=True).run(resource()) def test_file_batch() -> None: @@ -397,7 +397,7 @@ def direct_sink(file_path, table): with pyarrow.parquet.ParquetFile(file_path) as reader: assert reader.metadata.num_rows == (100 if table["name"] == "person" else 50) - dlt.pipeline("sink_test", destination=direct_sink, full_refresh=True).run( + dlt.pipeline("sink_test", destination=direct_sink, dev_mode=True).run( [resource1(), resource2()] ) @@ -413,25 +413,23 @@ def my_sink(file_path, table, my_val=dlt.config.value): # if no value is present, it should raise with pytest.raises(ConfigFieldMissingException): - dlt.pipeline("sink_test", destination=my_sink, full_refresh=True).run( + dlt.pipeline("sink_test", destination=my_sink, dev_mode=True).run( [1, 2, 3], table_name="items" ) # we may give the value via __callable__ function - dlt.pipeline("sink_test", destination=my_sink(my_val="something"), full_refresh=True).run( + dlt.pipeline("sink_test", destination=my_sink(my_val="something"), dev_mode=True).run( [1, 2, 3], table_name="items" ) # right value will pass os.environ["DESTINATION__MY_SINK__MY_VAL"] = "something" - dlt.pipeline("sink_test", destination=my_sink, full_refresh=True).run( - [1, 2, 3], table_name="items" - ) + dlt.pipeline("sink_test", destination=my_sink, dev_mode=True).run([1, 2, 3], table_name="items") # wrong value will raise os.environ["DESTINATION__MY_SINK__MY_VAL"] = "wrong" with pytest.raises(PipelineStepFailed): - dlt.pipeline("sink_test", destination=my_sink, full_refresh=True).run( + dlt.pipeline("sink_test", destination=my_sink, dev_mode=True).run( [1, 2, 3], table_name="items" ) @@ -442,13 +440,13 @@ def other_sink(file_path, table, my_val=dlt.config.value): # if no value is present, it should raise with pytest.raises(ConfigFieldMissingException): - dlt.pipeline("sink_test", destination=other_sink, full_refresh=True).run( + dlt.pipeline("sink_test", destination=other_sink, dev_mode=True).run( [1, 2, 3], table_name="items" ) # right value will pass os.environ["DESTINATION__SOME_NAME__MY_VAL"] = "something" - dlt.pipeline("sink_test", destination=other_sink, full_refresh=True).run( + dlt.pipeline("sink_test", destination=other_sink, dev_mode=True).run( [1, 2, 3], table_name="items" ) @@ -466,7 +464,7 @@ def my_gcp_sink( # missing spec with pytest.raises(ConfigFieldMissingException): - dlt.pipeline("sink_test", destination=my_gcp_sink, full_refresh=True).run( + dlt.pipeline("sink_test", destination=my_gcp_sink, dev_mode=True).run( [1, 2, 3], table_name="items" ) @@ -476,7 +474,7 @@ def my_gcp_sink( os.environ["CREDENTIALS__USERNAME"] = "my_user_name" # now it will run - dlt.pipeline("sink_test", destination=my_gcp_sink, full_refresh=True).run( + dlt.pipeline("sink_test", destination=my_gcp_sink, dev_mode=True).run( [1, 2, 3], table_name="items" ) @@ -500,14 +498,14 @@ def sink_func_with_spec( # call fails because `my_predefined_val` is required part of spec, even if not injected with pytest.raises(ConfigFieldMissingException): - info = dlt.pipeline("sink_test", destination=sink_func_with_spec(), full_refresh=True).run( + info = dlt.pipeline("sink_test", destination=sink_func_with_spec(), dev_mode=True).run( [1, 2, 3], table_name="items" ) info.raise_on_failed_jobs() # call happens now os.environ["MY_PREDEFINED_VAL"] = "VAL" - info = dlt.pipeline("sink_test", destination=sink_func_with_spec(), full_refresh=True).run( + info = dlt.pipeline("sink_test", destination=sink_func_with_spec(), dev_mode=True).run( [1, 2, 3], table_name="items" ) info.raise_on_failed_jobs() @@ -579,7 +577,7 @@ def test_sink(items, table): found_dlt_column_value = True # test with and without removing - p = dlt.pipeline("sink_test", destination=test_sink, full_refresh=True) + p = dlt.pipeline("sink_test", destination=test_sink, dev_mode=True) p.run([{"id": 1, "value": "1"}], table_name="some_table") assert found_dlt_column != remove_stuff @@ -608,7 +606,7 @@ def nesting_sink(items, table): def source(): yield dlt.resource(data, name="data") - p = dlt.pipeline("sink_test_max_nesting", destination=nesting_sink, full_refresh=True) + p = dlt.pipeline("sink_test_max_nesting", destination=nesting_sink, dev_mode=True) p.run(source()) # fall back to source setting diff --git a/tests/load/athena_iceberg/test_athena_adapter.py b/tests/load/athena_iceberg/test_athena_adapter.py index d14c0bdd34..19c176a374 100644 --- a/tests/load/athena_iceberg/test_athena_adapter.py +++ b/tests/load/athena_iceberg/test_athena_adapter.py @@ -40,7 +40,7 @@ def not_partitioned_table(): "athena_test", destination="athena", staging=filesystem("s3://not-a-real-bucket"), - full_refresh=True, + dev_mode=True, ) pipeline.extract([partitioned_table, not_partitioned_table]) diff --git a/tests/load/bigquery/test_bigquery_streaming_insert.py b/tests/load/bigquery/test_bigquery_streaming_insert.py index 391bf4095e..4a355d65eb 100644 --- a/tests/load/bigquery/test_bigquery_streaming_insert.py +++ b/tests/load/bigquery/test_bigquery_streaming_insert.py @@ -12,7 +12,7 @@ def test_resource(): bigquery_adapter(test_resource, insert_api="streaming") - pipe = dlt.pipeline(pipeline_name="insert_test", destination="bigquery", full_refresh=True) + pipe = dlt.pipeline(pipeline_name="insert_test", destination="bigquery", dev_mode=True) pack = pipe.run(test_resource, table_name="test_streaming_items44") assert_load_info(pack) @@ -54,7 +54,7 @@ def test_resource(): bigquery_adapter(test_resource, insert_api="streaming") - pipe = dlt.pipeline(pipeline_name="insert_test", destination="bigquery", full_refresh=True) + pipe = dlt.pipeline(pipeline_name="insert_test", destination="bigquery", dev_mode=True) pack = pipe.run(test_resource, table_name="test_streaming_items") assert_load_info(pack) diff --git a/tests/load/bigquery/test_bigquery_table_builder.py b/tests/load/bigquery/test_bigquery_table_builder.py index bae4ed9b59..66ea4a319f 100644 --- a/tests/load/bigquery/test_bigquery_table_builder.py +++ b/tests/load/bigquery/test_bigquery_table_builder.py @@ -952,7 +952,7 @@ def sources() -> List[DltResource]: pipeline = destination_config.setup_pipeline( f"bigquery_{uniq_id()}", - full_refresh=True, + dev_mode=True, ) pipeline.run(sources()) diff --git a/tests/load/clickhouse/test_clickhouse_adapter.py b/tests/load/clickhouse/test_clickhouse_adapter.py index 36d3ac07f7..ea3116c25b 100644 --- a/tests/load/clickhouse/test_clickhouse_adapter.py +++ b/tests/load/clickhouse/test_clickhouse_adapter.py @@ -19,7 +19,7 @@ def not_annotated_resource(): clickhouse_adapter(merge_tree_resource, table_engine_type="merge_tree") clickhouse_adapter(replicated_merge_tree_resource, table_engine_type="replicated_merge_tree") - pipe = dlt.pipeline(pipeline_name="adapter_test", destination="clickhouse", full_refresh=True) + pipe = dlt.pipeline(pipeline_name="adapter_test", destination="clickhouse", dev_mode=True) pack = pipe.run([merge_tree_resource, replicated_merge_tree_resource, not_annotated_resource]) assert_load_info(pack) diff --git a/tests/load/clickhouse/test_clickhouse_gcs_s3_compatibility.py b/tests/load/clickhouse/test_clickhouse_gcs_s3_compatibility.py index 481cd420c6..b2edb12d49 100644 --- a/tests/load/clickhouse/test_clickhouse_gcs_s3_compatibility.py +++ b/tests/load/clickhouse/test_clickhouse_gcs_s3_compatibility.py @@ -22,7 +22,7 @@ def dummy_data() -> Generator[Dict[str, int], None, None]: pipeline_name="gcs_s3_compatibility", destination="clickhouse", staging=gcp_bucket, - full_refresh=True, + dev_mode=True, ) pack = pipe.run([dummy_data]) assert_load_info(pack) diff --git a/tests/load/pipeline/test_athena.py b/tests/load/pipeline/test_athena.py index 921d8a083e..3197a19d14 100644 --- a/tests/load/pipeline/test_athena.py +++ b/tests/load/pipeline/test_athena.py @@ -208,7 +208,7 @@ def my_source() -> Any: @pytest.mark.parametrize("layout", TEST_FILE_LAYOUTS) def test_athena_file_layouts(destination_config: DestinationTestConfiguration, layout) -> None: # test wether strange file layouts still work in all staging configs - pipeline = destination_config.setup_pipeline("athena_file_layout", full_refresh=True) + pipeline = destination_config.setup_pipeline("athena_file_layout", dev_mode=True) os.environ["DESTINATION__FILESYSTEM__LAYOUT"] = layout resources = [ @@ -242,7 +242,7 @@ def test_athena_file_layouts(destination_config: DestinationTestConfiguration, l ) def test_athena_partitioned_iceberg_table(destination_config: DestinationTestConfiguration): """Load an iceberg table with partition hints and verifiy partitions are created correctly.""" - pipeline = destination_config.setup_pipeline("athena_" + uniq_id(), full_refresh=True) + pipeline = destination_config.setup_pipeline("athena_" + uniq_id(), dev_mode=True) data_items = [ (1, "A", datetime.date.fromisoformat("2021-01-01")), diff --git a/tests/load/pipeline/test_clickhouse.py b/tests/load/pipeline/test_clickhouse.py index b4ccfe3581..8ad3a7f1a7 100644 --- a/tests/load/pipeline/test_clickhouse.py +++ b/tests/load/pipeline/test_clickhouse.py @@ -15,7 +15,7 @@ ids=lambda x: x.name, ) def test_clickhouse_destination_append(destination_config: DestinationTestConfiguration) -> None: - pipeline = destination_config.setup_pipeline(f"clickhouse_{uniq_id()}", full_refresh=True) + pipeline = destination_config.setup_pipeline(f"clickhouse_{uniq_id()}", dev_mode=True) try: diff --git a/tests/load/pipeline/test_dremio.py b/tests/load/pipeline/test_dremio.py index 9a4c96c922..66d1b0be4f 100644 --- a/tests/load/pipeline/test_dremio.py +++ b/tests/load/pipeline/test_dremio.py @@ -12,9 +12,7 @@ ids=lambda x: x.name, ) def test_dremio(destination_config: DestinationTestConfiguration) -> None: - pipeline = destination_config.setup_pipeline( - "dremio-test", dataset_name="bar", full_refresh=True - ) + pipeline = destination_config.setup_pipeline("dremio-test", dataset_name="bar", dev_mode=True) @dlt.resource(name="items", write_disposition="replace") def items() -> Iterator[Any]: diff --git a/tests/load/pipeline/test_drop.py b/tests/load/pipeline/test_drop.py index 7d5f175bb5..e1c6ec9d79 100644 --- a/tests/load/pipeline/test_drop.py +++ b/tests/load/pipeline/test_drop.py @@ -179,7 +179,7 @@ def test_drop_command_only_state(destination_config: DestinationTestConfiguratio def test_drop_command_only_tables(destination_config: DestinationTestConfiguration) -> None: """Test drop only tables and makes sure that schema and state are synced""" source = droppable_source() - pipeline = destination_config.setup_pipeline("drop_test_" + uniq_id(), full_refresh=True) + pipeline = destination_config.setup_pipeline("drop_test_" + uniq_id(), dev_mode=True) pipeline.run(source) sources_state = pipeline.state["sources"] diff --git a/tests/load/pipeline/test_filesystem_pipeline.py b/tests/load/pipeline/test_filesystem_pipeline.py index 74a7fdaf99..94a29523e4 100644 --- a/tests/load/pipeline/test_filesystem_pipeline.py +++ b/tests/load/pipeline/test_filesystem_pipeline.py @@ -35,7 +35,7 @@ @pytest.fixture def local_filesystem_pipeline() -> dlt.Pipeline: os.environ["DESTINATION__FILESYSTEM__BUCKET_URL"] = "_storage" - return dlt.pipeline(pipeline_name="fs_pipe", destination="filesystem", full_refresh=True) + return dlt.pipeline(pipeline_name="fs_pipe", destination="filesystem", dev_mode=True) def test_pipeline_merge_write_disposition(default_buckets_env: str) -> None: diff --git a/tests/load/pipeline/test_merge_disposition.py b/tests/load/pipeline/test_merge_disposition.py index ffebb21797..2c1d1346f1 100644 --- a/tests/load/pipeline/test_merge_disposition.py +++ b/tests/load/pipeline/test_merge_disposition.py @@ -992,7 +992,7 @@ def test_invalid_merge_strategy(destination_config: DestinationTestConfiguration def r(): yield {"foo": "bar"} - p = destination_config.setup_pipeline("abstract", full_refresh=True) + p = destination_config.setup_pipeline("abstract", dev_mode=True) with pytest.raises(PipelineStepFailed) as pip_ex: p.run(r()) assert isinstance(pip_ex.value.__context__, SchemaException) diff --git a/tests/load/pipeline/test_parallelism.py b/tests/load/pipeline/test_parallelism.py index a1a09a4d6b..656357fb00 100644 --- a/tests/load/pipeline/test_parallelism.py +++ b/tests/load/pipeline/test_parallelism.py @@ -55,7 +55,7 @@ def t() -> TDataItems: yield {"num": i} # we load n items for 3 tables in one run - p = dlt.pipeline("sink_test", destination=test_sink, full_refresh=True) + p = dlt.pipeline("sink_test", destination=test_sink, dev_mode=True) p.run( [ dlt.resource(table_name="t1")(t), diff --git a/tests/load/pipeline/test_postgres.py b/tests/load/pipeline/test_postgres.py index 81b729eefa..a4001b7faa 100644 --- a/tests/load/pipeline/test_postgres.py +++ b/tests/load/pipeline/test_postgres.py @@ -30,7 +30,7 @@ def test_postgres_encoded_binary( blob_table = blob_table.to_pylist() print(blob_table) - pipeline = destination_config.setup_pipeline("postgres_" + uniq_id(), full_refresh=True) + pipeline = destination_config.setup_pipeline("postgres_" + uniq_id(), dev_mode=True) load_info = pipeline.run(blob_table, table_name="table", loader_file_format="csv") assert_load_info(load_info) job = load_info.load_packages[0].jobs["completed_jobs"][0].file_path diff --git a/tests/load/pipeline/test_scd2.py b/tests/load/pipeline/test_scd2.py index 4cbe47e960..b33c5a2590 100644 --- a/tests/load/pipeline/test_scd2.py +++ b/tests/load/pipeline/test_scd2.py @@ -103,7 +103,7 @@ def test_core_functionality( validity_column_names: List[str], active_record_timestamp: Optional[pendulum.DateTime], ) -> None: - p = destination_config.setup_pipeline("abstract", full_refresh=True) + p = destination_config.setup_pipeline("abstract", dev_mode=True) @dlt.resource( table_name="dim_test", @@ -242,7 +242,7 @@ def r(data): ) @pytest.mark.parametrize("simple", [True, False]) def test_child_table(destination_config: DestinationTestConfiguration, simple: bool) -> None: - p = destination_config.setup_pipeline("abstract", full_refresh=True) + p = destination_config.setup_pipeline("abstract", dev_mode=True) @dlt.resource( table_name="dim_test", write_disposition={"disposition": "merge", "strategy": "scd2"} @@ -385,7 +385,7 @@ def r(data): ids=lambda x: x.name, ) def test_grandchild_table(destination_config: DestinationTestConfiguration) -> None: - p = destination_config.setup_pipeline("abstract", full_refresh=True) + p = destination_config.setup_pipeline("abstract", dev_mode=True) @dlt.resource( table_name="dim_test", write_disposition={"disposition": "merge", "strategy": "scd2"} @@ -478,7 +478,7 @@ def r(data): ids=lambda x: x.name, ) def test_validity_column_name_conflict(destination_config: DestinationTestConfiguration) -> None: - p = destination_config.setup_pipeline("abstract", full_refresh=True) + p = destination_config.setup_pipeline("abstract", dev_mode=True) @dlt.resource( table_name="dim_test", @@ -524,7 +524,7 @@ def test_active_record_timestamp( destination_config: DestinationTestConfiguration, active_record_timestamp: Optional[TAnyDateTime], ) -> None: - p = destination_config.setup_pipeline("abstract", full_refresh=True) + p = destination_config.setup_pipeline("abstract", dev_mode=True) @dlt.resource( table_name="dim_test", @@ -571,7 +571,7 @@ def _make_scd2_r(table_: Any) -> DltResource: }, ).add_map(add_row_hash_to_table("row_hash")) - p = destination_config.setup_pipeline("abstract", full_refresh=True) + p = destination_config.setup_pipeline("abstract", dev_mode=True) info = p.run(_make_scd2_r(table), loader_file_format=destination_config.file_format) assert_load_info(info) # make sure we have scd2 columns in schema @@ -607,7 +607,7 @@ def _make_scd2_r(table_: Any) -> DltResource: ids=lambda x: x.name, ) def test_user_provided_row_hash(destination_config: DestinationTestConfiguration) -> None: - p = destination_config.setup_pipeline("abstract", full_refresh=True) + p = destination_config.setup_pipeline("abstract", dev_mode=True) @dlt.resource( table_name="dim_test", diff --git a/tests/load/utils.py b/tests/load/utils.py index 0b9026378b..8c6446b921 100644 --- a/tests/load/utils.py +++ b/tests/load/utils.py @@ -152,7 +152,7 @@ def factory_kwargs(self) -> Dict[str, Any]: "stage_name", "staging_iam_role", "staging_use_msi", - "stage_name", + "force_iceberg", ] if getattr(self, k, None) is not None } @@ -174,8 +174,8 @@ def setup_pipeline( self.setup() pipeline = dlt.pipeline( pipeline_name=pipeline_name, - destination=self.destination, - staging=self.staging, + destination=kwargs.pop("destination", self.destination), + staging=kwargs.pop("staging", self.staging), dataset_name=dataset_name or pipeline_name, dev_mode=dev_mode, **kwargs, diff --git a/tests/normalize/test_max_nesting.py b/tests/normalize/test_max_nesting.py index 4015836232..5def1617dc 100644 --- a/tests/normalize/test_max_nesting.py +++ b/tests/normalize/test_max_nesting.py @@ -62,7 +62,7 @@ def bot_events(): pipeline = dlt.pipeline( pipeline_name=pipeline_name, destination=dummy(timeout=0.1), - full_refresh=True, + dev_mode=True, ) pipeline.run(bot_events) @@ -169,7 +169,7 @@ def some_data(): pipeline = dlt.pipeline( pipeline_name=pipeline_name, destination=dummy(timeout=0.1), - full_refresh=True, + dev_mode=True, ) pipeline.run(some_data(), write_disposition="append") diff --git a/tests/pipeline/test_import_export_schema.py b/tests/pipeline/test_import_export_schema.py index 6f40e1d1eb..eb36d36ba3 100644 --- a/tests/pipeline/test_import_export_schema.py +++ b/tests/pipeline/test_import_export_schema.py @@ -117,7 +117,7 @@ def test_import_schema_is_respected() -> None: destination=dummy(completed_prob=1), import_schema_path=IMPORT_SCHEMA_PATH, export_schema_path=EXPORT_SCHEMA_PATH, - full_refresh=True, + dev_mode=True, ) p.extract(EXAMPLE_DATA, table_name="person") # starts with import schema v 1 that is dirty -> 2 @@ -153,7 +153,7 @@ def resource(): destination=dummy(completed_prob=1), import_schema_path=IMPORT_SCHEMA_PATH, export_schema_path=EXPORT_SCHEMA_PATH, - full_refresh=True, + dev_mode=True, ) p.run(source()) diff --git a/tests/pipeline/utils.py b/tests/pipeline/utils.py index 3b4ae33445..c10618a7cc 100644 --- a/tests/pipeline/utils.py +++ b/tests/pipeline/utils.py @@ -52,7 +52,7 @@ def peacock(): @dlt.resource(name="🦚WidePeacock", selected=False) def wide_peacock(): - yield [{"peacock": [1, 2, 3]}] + yield [{"Peacock": [1, 2, 3]}] return budget, schedule, peacock, wide_peacock From f098e5aaceaca278ae10e661da612bd319b8c2ee Mon Sep 17 00:00:00 2001 From: Marcin Rudolf Date: Wed, 26 Jun 2024 15:00:49 +0200 Subject: [PATCH 101/105] improves csv docs --- .../docs/dlt-ecosystem/destinations/snowflake.md | 13 +++++++++++++ .../website/docs/dlt-ecosystem/file-formats/csv.md | 14 +++++++++++++- 2 files changed, 26 insertions(+), 1 deletion(-) diff --git a/docs/website/docs/dlt-ecosystem/destinations/snowflake.md b/docs/website/docs/dlt-ecosystem/destinations/snowflake.md index 7797298bdc..b92d242c8a 100644 --- a/docs/website/docs/dlt-ecosystem/destinations/snowflake.md +++ b/docs/website/docs/dlt-ecosystem/destinations/snowflake.md @@ -152,6 +152,19 @@ When staging is enabled: When loading from `parquet`, Snowflake will store `complex` types (JSON) in `VARIANT` as a string. Use the `jsonl` format instead or use `PARSE_JSON` to update the `VARIANT` field after loading. ::: +### Custom csv formats +By default we support csv format [produced by our writers](../file-formats/csv.md#default-settings) which is comma delimited, with header and optionally quoted. + +You can configure your own formatting ie. when [importing](../../general-usage/resource.md#import-external-files) external `csv` files. +```toml +[destination.snowflake.csv_format] +delimiter="|" +include_header=false +on_error_continue=true +``` +Which will read, `|` delimited file, without header and will continue on errors. + +Note that we ignore missing columns `ERROR_ON_COLUMN_COUNT_MISMATCH = FALSE` and we will insert NULL into them. ## Supported column hints Snowflake supports the following [column hints](https://dlthub.com/docs/general-usage/schema#tables-and-columns): diff --git a/docs/website/docs/dlt-ecosystem/file-formats/csv.md b/docs/website/docs/dlt-ecosystem/file-formats/csv.md index 052fc7195e..02a7e81def 100644 --- a/docs/website/docs/dlt-ecosystem/file-formats/csv.md +++ b/docs/website/docs/dlt-ecosystem/file-formats/csv.md @@ -28,11 +28,23 @@ info = pipeline.run(some_source(), loader_file_format="csv") `dlt` attempts to make both writers to generate similarly looking files * separators are commas * quotes are **"** and are escaped as **""** -* `NULL` values are empty strings +* `NULL` values both are empty strings and empty tokens as in the example below * UNIX new lines are used * dates are represented as ISO 8601 * quoting style is "when needed" +Example of NULLs: +```sh +text1,text2,text3 +A,B,C +A,,"" +``` + +In the last row both `text2` and `text3` values are NULL. Python `csv` writer +is not able to write unquoted `None` values so we had to settle for `""` + +Note: all destinations capable of writing csvs must support it. + ### Change settings You can change basic **csv** settings, this may be handy when working with **filesystem** destination. Other destinations are tested with standard settings: From 1521778fbfbfb55bb006b26b2967fc699a38e300 Mon Sep 17 00:00:00 2001 From: Marcin Rudolf Date: Wed, 26 Jun 2024 19:56:43 +0200 Subject: [PATCH 102/105] fixes encodings in fsspec --- dlt/destinations/fs_client.py | 2 +- dlt/destinations/impl/filesystem/filesystem.py | 10 +++++++--- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/dlt/destinations/fs_client.py b/dlt/destinations/fs_client.py index 22b055b5dc..3233446594 100644 --- a/dlt/destinations/fs_client.py +++ b/dlt/destinations/fs_client.py @@ -39,7 +39,7 @@ def read_bytes(self, path: str, start: Any = None, end: Any = None, **kwargs: An def read_text( self, path: str, - encoding: Any = None, + encoding: Any = "utf-8", errors: Any = None, newline: Any = None, compression: str = None, diff --git a/dlt/destinations/impl/filesystem/filesystem.py b/dlt/destinations/impl/filesystem/filesystem.py index 6757399e98..00b990d4fa 100644 --- a/dlt/destinations/impl/filesystem/filesystem.py +++ b/dlt/destinations/impl/filesystem/filesystem.py @@ -368,7 +368,7 @@ def _write_to_json_file(self, filepath: str, data: DictStrAny) -> None: dirname = self.pathlib.dirname(filepath) if not self.fs_client.isdir(dirname): return - self.fs_client.write_text(filepath, json.dumps(data), "utf-8") + self.fs_client.write_text(filepath, json.dumps(data), encoding="utf-8") def _to_path_safe_string(self, s: str) -> str: """for base64 strings""" @@ -450,7 +450,9 @@ def get_stored_state(self, pipeline_name: str) -> Optional[StateInfo]: # Load compressed state from destination if selected_path: - state_json: TPipelineStateDoc = json.loads(self.fs_client.read_text(selected_path)) + state_json: TPipelineStateDoc = json.loads( + self.fs_client.read_text(selected_path, encoding="utf-8") + ) # we had dlt_load_id stored until version 0.5 and since we do not have any version control # we always migrate if load_id := state_json.pop("dlt_load_id", None): # type: ignore[typeddict-item] @@ -497,7 +499,9 @@ def _get_stored_schema_by_hash_or_newest( break if selected_path: - return StorageSchemaInfo(**json.loads(self.fs_client.read_text(selected_path))) + return StorageSchemaInfo( + **json.loads(self.fs_client.read_text(selected_path, encoding="utf-8")) + ) return None From 796483e061d93a56723f3a7bad2095f6c0d30db7 Mon Sep 17 00:00:00 2001 From: Marcin Rudolf Date: Wed, 26 Jun 2024 19:57:12 +0200 Subject: [PATCH 103/105] improves naming convention docs --- docs/technical/general_usage.md | 2 +- .../docs/dlt-ecosystem/destinations/duckdb.md | 2 +- .../website/docs/general-usage/destination.md | 6 +- .../docs/general-usage/naming-convention.md | 99 ++++++++++++++----- 4 files changed, 79 insertions(+), 30 deletions(-) diff --git a/docs/technical/general_usage.md b/docs/technical/general_usage.md index 7ce8a91b46..336c892c66 100644 --- a/docs/technical/general_usage.md +++ b/docs/technical/general_usage.md @@ -90,7 +90,7 @@ p.extract([label1, label2, label3], name="labels") # will use default schema "s **By default, one dataset can handle multiple schemas**. The pipeline configuration option `use_single_dataset` controls the dataset layout in the destination. By default it is set to True. In that case only one dataset is created at the destination - by default dataset name which is the same as pipeline name. The dataset name can also be explicitly provided into `dlt.pipeline` `dlt.run` and `Pipeline::load` methods. -All the tables from all the schemas are stored in that dataset. The table names are **not prefixed** with schema names!. If there are any name clashes, tables in the destination will be unions of the fields of all the tables with same name in the schemas. +All the tables from all the schemas are stored in that dataset. The table names are **not prefixed** with schema names!. If there are any name collisions, tables in the destination will be unions of the fields of all the tables with same name in the schemas. **Enabling one dataset per schema layout** If you set `use_single_dataset` to False: diff --git a/docs/website/docs/dlt-ecosystem/destinations/duckdb.md b/docs/website/docs/dlt-ecosystem/destinations/duckdb.md index d6ec36ae49..1e3d6b8403 100644 --- a/docs/website/docs/dlt-ecosystem/destinations/duckdb.md +++ b/docs/website/docs/dlt-ecosystem/destinations/duckdb.md @@ -51,7 +51,7 @@ or via the env variable `SCHEMA__NAMING` or directly in the code: dlt.config["schema.naming"] = "duck_case" ``` :::caution -**duckdb** identifiers are **case insensitive** but display names preserve case. This may create name clashes if, for example, you load JSON with +**duckdb** identifiers are **case insensitive** but display names preserve case. This may create name collisions if, for example, you load JSON with `{"Column": 1, "column": 2}` as it will map data to a single column. ::: diff --git a/docs/website/docs/general-usage/destination.md b/docs/website/docs/general-usage/destination.md index e30cb41a60..b30403d349 100644 --- a/docs/website/docs/general-usage/destination.md +++ b/docs/website/docs/general-usage/destination.md @@ -154,7 +154,7 @@ snow_ = dlt.destinations.snowflake(naming_convention="sql_cs_v1") Setting naming convention will impact all new schemas being created (ie. on first pipeline run) and will re-normalize all existing identifiers. :::caution -`dlt` prevents re-normalization of identifiers in tables that were already created at the destination. Use [refresh](pipeline.md#refresh-pipeline-data-and-state) mode to drop the data. You can also disable this behavior via [configuration](naming-convention.md#avoid-identifier-clashes) +`dlt` prevents re-normalization of identifiers in tables that were already created at the destination. Use [refresh](pipeline.md#refresh-pipeline-data-and-state) mode to drop the data. You can also disable this behavior via [configuration](naming-convention.md#avoid-identifier-collisions) ::: :::note @@ -163,7 +163,7 @@ Destinations that support case sensitive identifiers but use case folding conven :::caution If you use case sensitive naming convention with case insensitive destination, `dlt` will: -1. Fail the load if it detects identifier clash due to case folding +1. Fail the load if it detects identifier collision due to case folding 2. Warn if any case folding is applied by the destination. ::: @@ -173,7 +173,7 @@ Selected destinations may be configured so they start accepting case sensitive i from dlt.destinations import mssql dest_ = mssql(has_case_sensitive_identifiers=True, naming_convention="sql_cs_v1") ``` -Above we can safely use case sensitive naming convention without worrying of name clashes. +Above we can safely use case sensitive naming convention without worrying of name collisions. You can configure the case sensitivity, **but configuring destination capabilities is not currently supported**. ```toml diff --git a/docs/website/docs/general-usage/naming-convention.md b/docs/website/docs/general-usage/naming-convention.md index 7802e80985..c24b6c4869 100644 --- a/docs/website/docs/general-usage/naming-convention.md +++ b/docs/website/docs/general-usage/naming-convention.md @@ -5,32 +5,23 @@ keywords: [identifiers, snake case, case sensitive, case insensitive, naming] --- # Naming Convention -`dlt` creates tables, child tables and column identifiers from the data. The data source, -typically JSON documents, contains identifiers (i.e. key names in a dictionary) with any Unicode -characters, any lengths and naming styles. On the other hand destinations accept very strict -namespaces for their identifiers. Like [Redshift](../dlt-ecosystem/destinations/redshift.md#naming-convention) that accepts case-insensitive alphanumeric -identifiers with maximum 127 characters. +`dlt` creates table and column identifiers from the data. The data source that ie. a stream of JSON documents may have identifiers (i.e. key names in a dictionary) with any Unicode characters, of any length and naming style. On the other hand, destinations require that you follow strict rules when you name tables, columns or collections. +A good example is [Redshift](../dlt-ecosystem/destinations/redshift.md#naming-convention) that accepts case-insensitive alphanumeric identifiers with maximum 127 characters. -`dlt` groups tables belonging to [resources](resource.md) from a single [source](source.md) in a [schema](schema.md). +`dlt` groups tables from a single [source](source.md) in a [schema](schema.md). -Each schema contains **naming convention** that tells `dlt` how to translate identifiers to the -namespace that the destination understands. Naming conventions are in essence functions translating strings from the source identifier format into destination identifier format. For example our **snake_case** (default) naming convention will translate `DealFlow` into `deal_flow` identifier. +Each schema defines **naming convention** that tells `dlt` how to translate identifiers to the +namespace that the destination understands. Naming conventions are in essence functions that map strings from the source identifier format into destination identifier format. For example our **snake_case** (default) naming convention will translate `DealFlow` into `deal_flow` identifier. -You have control over which naming convention to use and dlt provides a few to choose from ie. `sql_cs_v1` +You can pick which naming convention to use. `dlt` provides a few to [choose from](#available-naming-conventions) or you can [easily add your own](#write-your-own-naming-convention). +:::tip +* Standard behavior of `dlt` is to **use the same naming convention for all destinations** so users see always the same tables and column names in their databases. +* Use simple, short small caps identifiers for everything so no normalization is needed +::: - -* Each destination has a preferred naming convention. -* This naming convention is used when new schemas are created. -* Schemas preserve naming convention when saved -* `dlt` applies final naming convention in `normalize` stage. Naming convention comes from (1) explicit configuration (2) from destination capabilities. Naming convention -in schema will be ignored. -* You can change the naming convention in the capabilities: (name, case-folding, case sensitivity) - -## Case sensitivity - - -## Default naming convention (snake_case) +### Use default naming convention (snake_case) +`dlt` most used and tested with default, case insensitive, lower case naming convention called **snake_case** 1. Converts identifiers to **snake_case**, small caps. Removes all ascii characters except ascii alphanumerics and underscores. @@ -49,16 +40,74 @@ in schema will be ignored. > 💡 Use simple, short small caps identifiers for everything! -## Set and adjust naming convention explicitly +:::tip +If you do not like **snake_case** your next safe option is **sql_ci** which generates SQL-safe, lower-case, case-insensitive identifiers without any +other transformations. To permanently change the default naming convention on a given machine: +1. set an environment variable `SCHEMA__NAMING` to `sql_ci_v1` OR +2. add the following line to your global `config.toml` (the one in your home dir ie. `~/.dlt/config.toml`) +```toml +[schema] +naming="sql_ci_v1" +``` +::: + +## Source identifiers vs destination identifiers +### Pick the right identifier form when defining resources +`dlt` keeps source (not normalized) identifiers during data [extraction](../reference/explainers/how-dlt-works.md#extract) and translates them during [normalization](../reference/explainers/how-dlt-works.md#normalize). For you it means: +1. If you write a [transformer](resource.md#process-resources-with-dlttransformer) or a [mapping/filtering function](resource.md#filter-transform-and-pivot-data), you will see the original data, without any normalization. Use the source key names to access the dicts! +2. If you define a `primary_key` or `cursor` that participate in [incremental loading](incremental-loading.md#incremental-loading-with-a-cursor-field) use the source identifiers (as `dlt` will inspect the source data). +3. When defining any other hints ie. `columns` or `merge_key` you can pick source or destination identifiers. `dlt` normalizes all hints together with your data. +4. `Schema` object (ie. obtained from the pipeline or from `dlt` source via `discover_schema`) **always contains destination (normalized) identifiers**. + +In the snippet below, we define a resource with various "illegal" unicode characters in table name and other hint and demonstrate how they get normalized in the schema object. +```py +``` + +### Understand the identifier normalization +Identifiers are translated from source to destination form in **normalize** step. Here's how `dlt` picks the right naming convention: -## Configure naming convention +* Each destination has a preferred naming convention. +* This naming convention is used when new schemas are created. +* Schemas preserve naming convention when saved +* `dlt` applies final naming convention in `normalize` step. Naming convention comes from (1) explicit configuration (2) from destination capabilities. Naming convention +in schema will be ignored. +* You can change the naming convention in the capabilities: (name, case-folding, case sensitivity) + +### Case sensitive and insensitive destinations +Naming conventions come in two types. +* **case sensitive** naming convention normalize source identifiers into case sensitive identifiers where character +* **case insensitive** + +Case sensitive naming convention will put a destination in [case sensitive mode](destination.md#control-how-dlt-creates-table-column-and-other-identifiers). Identifiers that +differ only in casing will not [collide](#avoid-identifier-collisions). Note that many destinations are exclusively case insensitive, of which some preserve casing of identifiers (ie. **duckdb**) and some will case-fold identifiers when creating tables (ie. **Redshift**, **Athena** do lower case on the names). + +## Identifier shortening +Identifier shortening happens during normalization. `dlt` takes the maximum length of the identifier from the destination capabilities and will trim the identifiers that are +too long. The default shortening behavior generates short deterministic hashes of the source identifiers and places them in the middle of the destination identifier. This +(with a high probability) avoids shortened identifier collisions. + + +## Pick your own naming convention +### Configure naming convention The naming convention is configurable and users can easily create their own conventions that i.e. pass all the identifiers unchanged if the destination accepts that (i.e. DuckDB). -## Avoid identifier clashes -## Available naming conventions +### Available naming conventions + +### Set and adjust naming convention explicitly + +## Avoid identifier collisions + + +`dlt` detects various types of collisions and ignores the others. + ## Write your own naming convention +Naming conventions reside in separate Python modules, are classes with `NamingConvention` name and must derive from `BaseNamingConvention`. We include two examples of +naming conventions that you may find useful + +1. A variant of `sql_ci` that generates identifier collisions with a low (user defined) probability by appending a deterministic tag to each name. +2. A variant of `sql_cs` that allows for LATIN-2 (ie. umlaut) characters From 534c7f85666b3c08d37c8b801bf98ef8835aa667 Mon Sep 17 00:00:00 2001 From: Marcin Rudolf Date: Wed, 26 Jun 2024 19:57:31 +0200 Subject: [PATCH 104/105] fixes tests and renames clash to collision --- dlt/common/destination/utils.py | 22 ++++++------ dlt/common/libs/pyarrow.py | 8 ++--- dlt/common/normalizers/naming/snake_case.py | 2 +- dlt/common/schema/exceptions.py | 8 ++--- dlt/common/schema/utils.py | 6 ++-- dlt/common/utils.py | 36 ------------------- dlt/destinations/job_client_impl.py | 4 +-- tests/extract/test_decorators.py | 35 ++++++++++++++++++ tests/libs/pyarrow/test_pyarrow_normalizer.py | 4 +-- .../test_bigquery_streaming_insert.py | 4 ++- tests/load/pipeline/test_duckdb.py | 6 ++-- .../load/pipeline/test_filesystem_pipeline.py | 6 ++-- tests/load/weaviate/test_pipeline.py | 4 +-- tests/load/weaviate/test_weaviate_client.py | 6 ++-- tests/pipeline/test_arrow_sources.py | 8 +++-- tests/pipeline/test_dlt_versions.py | 9 ++--- tests/pipeline/test_pipeline.py | 18 ++++++++-- 17 files changed, 103 insertions(+), 83 deletions(-) diff --git a/dlt/common/destination/utils.py b/dlt/common/destination/utils.py index 8b48d9f394..2c5e97df14 100644 --- a/dlt/common/destination/utils.py +++ b/dlt/common/destination/utils.py @@ -4,10 +4,8 @@ from dlt.common.destination.exceptions import IdentifierTooLongException from dlt.common.schema import Schema from dlt.common.schema.exceptions import ( - SchemaCorruptedException, - SchemaIdentifierNormalizationClash, + SchemaIdentifierNormalizationCollision, ) -from dlt.common.schema.exceptions import SchemaException from dlt.common.schema.utils import is_complete_column from dlt.common.typing import DictStrStr @@ -24,7 +22,7 @@ def verify_schema_capabilities( It will log warnings by default. It is up to the caller to eventually raise exception * Checks all table and column name lengths against destination capabilities and raises on too long identifiers - * Checks if schema has clashes due to case sensitivity of the identifiers + * Checks if schema has collisions due to case sensitivity of the identifiers """ log = logger.warning if warnings else logger.info @@ -35,17 +33,17 @@ def verify_schema_capabilities( (str if capabilities.has_case_sensitive_identifiers else str.casefold)(ident) # type: ignore ) table_name_lookup: DictStrStr = {} - # name clash explanation - clash_msg = "Destination is case " + ( + # name collision explanation + collision_msg = "Destination is case " + ( "sensitive" if capabilities.has_case_sensitive_identifiers else "insensitive" ) if capabilities.casefold_identifier is not str: - clash_msg += ( + collision_msg += ( f" but it uses {capabilities.casefold_identifier} to generate case insensitive" " identifiers. You may try to change the destination capabilities by changing the" " `casefold_identifier` to `str`" ) - clash_msg += ( + collision_msg += ( ". Please clean up your data before loading so the entities have different name. You can" " also change to case insensitive naming convention. Note that in that case data from both" " columns will be merged into one." @@ -59,14 +57,14 @@ def verify_schema_capabilities( if cased_table_name in table_name_lookup: conflict_table_name = table_name_lookup[cased_table_name] exception_log.append( - SchemaIdentifierNormalizationClash( + SchemaIdentifierNormalizationCollision( schema.name, table_name, "table", table_name, conflict_table_name, schema.naming.name(), - clash_msg, + collision_msg, ) ) table_name_lookup[cased_table_name] = table_name @@ -87,14 +85,14 @@ def verify_schema_capabilities( if cased_column_name in column_name_lookup: conflict_column_name = column_name_lookup[cased_column_name] exception_log.append( - SchemaIdentifierNormalizationClash( + SchemaIdentifierNormalizationCollision( schema.name, table_name, "column", column_name, conflict_column_name, schema.naming.name(), - clash_msg, + collision_msg, ) ) column_name_lookup[cased_column_name] = column_name diff --git a/dlt/common/libs/pyarrow.py b/dlt/common/libs/pyarrow.py index 8a6dc68078..ee249b111c 100644 --- a/dlt/common/libs/pyarrow.py +++ b/dlt/common/libs/pyarrow.py @@ -348,13 +348,13 @@ def normalize_py_arrow_item( def get_normalized_arrow_fields_mapping(schema: pyarrow.Schema, naming: NamingConvention) -> StrStr: - """Normalizes schema field names and returns mapping from original to normalized name. Raises on name clashes""" + """Normalizes schema field names and returns mapping from original to normalized name. Raises on name collisions""" norm_f = naming.normalize_identifier name_mapping = {n.name: norm_f(n.name) for n in schema} # verify if names uniquely normalize normalized_names = set(name_mapping.values()) if len(name_mapping) != len(normalized_names): - raise NameNormalizationClash( + raise NameNormalizationCollision( f"Arrow schema fields normalized from {list(name_mapping.keys())} to" f" {list(normalized_names)}" ) @@ -497,7 +497,7 @@ def cast_arrow_schema_types( return schema -class NameNormalizationClash(ValueError): +class NameNormalizationCollision(ValueError): def __init__(self, reason: str) -> None: - msg = f"Arrow column name clash after input data normalization. {reason}" + msg = f"Arrow column name collision after input data normalization. {reason}" super().__init__(msg) diff --git a/dlt/common/normalizers/naming/snake_case.py b/dlt/common/normalizers/naming/snake_case.py index f4aa1c25e2..7ff9259745 100644 --- a/dlt/common/normalizers/naming/snake_case.py +++ b/dlt/common/normalizers/naming/snake_case.py @@ -66,5 +66,5 @@ def _to_snake_case(cls, identifier: str) -> str: stripped_ident += "x" * strip_count # identifier = cls._RE_ENDING_UNDERSCORES.sub("x", identifier) - # replace consecutive underscores with single one to prevent name clashes with PATH_SEPARATOR + # replace consecutive underscores with single one to prevent name collisions with PATH_SEPARATOR return cls._RE_UNDERSCORES.sub("_", stripped_ident) diff --git a/dlt/common/schema/exceptions.py b/dlt/common/schema/exceptions.py index 52b34e5959..2f016577ce 100644 --- a/dlt/common/schema/exceptions.py +++ b/dlt/common/schema/exceptions.py @@ -102,7 +102,7 @@ class SchemaCorruptedException(SchemaException): pass -class SchemaIdentifierNormalizationClash(SchemaCorruptedException): +class SchemaIdentifierNormalizationCollision(SchemaCorruptedException): def __init__( self, schema_name: str, @@ -111,17 +111,17 @@ def __init__( identifier_name: str, conflict_identifier_name: str, naming_name: str, - clash_msg: str, + collision_msg: str, ) -> None: if identifier_type == "column": table_info = f"in table {table_name} " else: table_info = "" msg = ( - f"A {identifier_type} name {identifier_name} {table_info}clashes with" + f"A {identifier_type} name {identifier_name} {table_info}collides with" f" {conflict_identifier_name} after normalization with {naming_name} naming" " convention. " - + clash_msg + + collision_msg ) self.table_name = table_name self.identifier_type = identifier_type diff --git a/dlt/common/schema/utils.py b/dlt/common/schema/utils.py index 7ca9265886..f5765be351 100644 --- a/dlt/common/schema/utils.py +++ b/dlt/common/schema/utils.py @@ -510,7 +510,7 @@ def normalize_table_identifiers(table: TTableSchema, naming: NamingConvention) - """Normalizes all table and column names in `table` schema according to current schema naming convention and returns new instance with modified table schema. - Naming convention like snake_case may produce name clashes with the column names. Clashing column schemas are merged + Naming convention like snake_case may produce name collisions with the column names. Colliding column schemas are merged where the column that is defined later in the dictionary overrides earlier column. Note that resource name is not normalized. @@ -529,14 +529,14 @@ def normalize_table_identifiers(table: TTableSchema, naming: NamingConvention) - origin_c_name = c["name"] new_col_name = c["name"] = naming.normalize_path(c["name"]) # re-index columns as the name changed, if name space was reduced then - # some columns now clash with each other. so make sure that we merge columns that are already there + # some columns now collide with each other. so make sure that we merge columns that are already there if new_col_name in new_columns: new_columns[new_col_name] = merge_column( new_columns[new_col_name], c, merge_defaults=False ) logger.warning( f"In schema {naming} column {origin_c_name} got normalized into" - f" {new_col_name} which clashes with other column. Both columns got merged" + f" {new_col_name} which collides with other column. Both columns got merged" " into one." ) else: diff --git a/dlt/common/utils.py b/dlt/common/utils.py index 980385aac4..8e89556c39 100644 --- a/dlt/common/utils.py +++ b/dlt/common/utils.py @@ -142,42 +142,6 @@ def flatten_list_of_str_or_dicts(seq: Sequence[Union[StrAny, str]]) -> DictStrAn return o -# def flatten_dicts_of_dicts(dicts: Mapping[str, Any]) -> Sequence[Any]: -# """ -# Transform and object {K: {...}, L: {...}...} -> [{key:K, ....}, {key: L, ...}, ...] -# """ -# o: List[Any] = [] -# for k, v in dicts.items(): -# if isinstance(v, list): -# # if v is a list then add "key" to each list element -# for lv in v: -# lv["key"] = k -# else: -# # add as "key" to dict -# v["key"] = k - -# o.append(v) -# return o - - -# def tuplify_list_of_dicts(dicts: Sequence[DictStrAny]) -> Sequence[DictStrAny]: -# """ -# Transform list of dictionaries with single key into single dictionary of {"key": orig_key, "value": orig_value} -# """ -# for d in dicts: -# if len(d) > 1: -# raise ValueError(f"Tuplify requires one key dicts {d}") -# if len(d) == 1: -# key = next(iter(d)) -# # delete key first to avoid name clashes -# value = d[key] -# del d[key] -# d["key"] = key -# d["value"] = value - -# return dicts - - def flatten_list_or_items(_iter: Union[Iterable[TAny], Iterable[List[TAny]]]) -> Iterator[TAny]: for items in _iter: if isinstance(items, List): diff --git a/dlt/destinations/job_client_impl.py b/dlt/destinations/job_client_impl.py index c3e9e08406..0a627bbdfb 100644 --- a/dlt/destinations/job_client_impl.py +++ b/dlt/destinations/job_client_impl.py @@ -323,10 +323,10 @@ def get_storage_tables( name_lookup = { folded_name: name for folded_name, name in zip(folded_table_names, table_names) } - # this should never happen: we verify schema for name clashes before loading + # this should never happen: we verify schema for name collisions before loading assert len(name_lookup) == len(table_names), ( f"One or more of tables in {table_names} after applying" - f" {self.capabilities.casefold_identifier} produced a clashing name." + f" {self.capabilities.casefold_identifier} produced a name collision." ) # rows = self.sql_client.execute_sql(query, *db_params) diff --git a/tests/extract/test_decorators.py b/tests/extract/test_decorators.py index 21e8ab7e92..f9775fd218 100644 --- a/tests/extract/test_decorators.py +++ b/tests/extract/test_decorators.py @@ -344,6 +344,41 @@ class Columns3(BaseModel): assert t["columns"]["b"]["data_type"] == "double" +def test_not_normalized_identifiers_in_hints() -> None: + @dlt.resource( + primary_key="ID", + merge_key=["Month", "Day"], + columns=[{"name": "Col1", "data_type": "bigint"}], + table_name="🐫Camels", + ) + def CamelResource(): + yield ["🐫"] * 10 + + camels = CamelResource() + # original names are kept + assert camels.name == "CamelResource" + assert camels.table_name == "🐫Camels" + assert camels.columns == {"Col1": {"data_type": "bigint", "name": "Col1"}} + table = camels.compute_table_schema() + columns = table["columns"] + assert "ID" in columns + assert "Month" in columns + assert "Day" in columns + assert "Col1" in columns + assert table["name"] == "🐫Camels" + + # define as part of a source + camel_source = DltSource(Schema("snake_case"), "camel_section", [camels]) + schema = camel_source.discover_schema() + # all normalized + table = schema.get_table("_camels") + columns = table["columns"] + assert "id" in columns + assert "month" in columns + assert "day" in columns + assert "col1" in columns + + def test_resource_name_from_generator() -> None: def some_data(): yield [1, 2, 3] diff --git a/tests/libs/pyarrow/test_pyarrow_normalizer.py b/tests/libs/pyarrow/test_pyarrow_normalizer.py index 7964ee5bb8..d975702ad8 100644 --- a/tests/libs/pyarrow/test_pyarrow_normalizer.py +++ b/tests/libs/pyarrow/test_pyarrow_normalizer.py @@ -3,7 +3,7 @@ import pyarrow as pa import pytest -from dlt.common.libs.pyarrow import normalize_py_arrow_item, NameNormalizationClash +from dlt.common.libs.pyarrow import normalize_py_arrow_item, NameNormalizationCollision from dlt.common.normalizers.utils import explicit_normalizers, import_normalizers from dlt.common.schema.utils import new_column, TColumnSchema from dlt.common.destination import DestinationCapabilitiesContext @@ -65,7 +65,7 @@ def test_field_normalization_clash() -> None: {"col^New": "hello", "col_new": 1}, ] ) - with pytest.raises(NameNormalizationClash): + with pytest.raises(NameNormalizationCollision): _normalize(table, []) diff --git a/tests/load/bigquery/test_bigquery_streaming_insert.py b/tests/load/bigquery/test_bigquery_streaming_insert.py index 4a355d65eb..c950a46f91 100644 --- a/tests/load/bigquery/test_bigquery_streaming_insert.py +++ b/tests/load/bigquery/test_bigquery_streaming_insert.py @@ -41,10 +41,12 @@ def test_resource(): pipe = dlt.pipeline(pipeline_name="insert_test", destination="bigquery") info = pipe.run(test_resource) + # pick the failed job + failed_job = info.load_packages[0].jobs["failed_jobs"][0] assert ( """BigQuery streaming insert can only be used with `append`""" """ write_disposition, while the given resource has `merge`.""" - ) in info.asdict()["load_packages"][0]["jobs"][0]["failed_message"] + ) in failed_job.failed_message def test_bigquery_streaming_nested_data(): diff --git a/tests/load/pipeline/test_duckdb.py b/tests/load/pipeline/test_duckdb.py index 80c6b861ee..3dcfffe348 100644 --- a/tests/load/pipeline/test_duckdb.py +++ b/tests/load/pipeline/test_duckdb.py @@ -1,7 +1,7 @@ import pytest import os -from dlt.common.schema.exceptions import SchemaIdentifierNormalizationClash +from dlt.common.schema.exceptions import SchemaIdentifierNormalizationCollision from dlt.common.time import ensure_pendulum_datetime from dlt.destinations.exceptions import DatabaseTerminalException from dlt.pipeline.exceptions import PipelineStepFailed @@ -42,7 +42,7 @@ def test_duck_case_names(destination_config: DestinationTestConfiguration) -> No "🦚Peacock__peacock": 3, "🦚Peacocks🦚": 1, "🦚WidePeacock": 1, - "🦚WidePeacock__peacock": 3, + "🦚WidePeacock__Peacock": 3, } # this will fail - duckdb preserves case but is case insensitive when comparing identifiers @@ -52,7 +52,7 @@ def test_duck_case_names(destination_config: DestinationTestConfiguration) -> No table_name="🦚peacocks🦚", loader_file_format=destination_config.file_format, ) - assert isinstance(pip_ex.value.__context__, SchemaIdentifierNormalizationClash) + assert isinstance(pip_ex.value.__context__, SchemaIdentifierNormalizationCollision) assert pip_ex.value.__context__.conflict_identifier_name == "🦚Peacocks🦚" assert pip_ex.value.__context__.identifier_name == "🦚peacocks🦚" assert pip_ex.value.__context__.identifier_type == "table" diff --git a/tests/load/pipeline/test_filesystem_pipeline.py b/tests/load/pipeline/test_filesystem_pipeline.py index 94a29523e4..210ad76b8a 100644 --- a/tests/load/pipeline/test_filesystem_pipeline.py +++ b/tests/load/pipeline/test_filesystem_pipeline.py @@ -798,13 +798,15 @@ def table_3(): # check opening of file values = [] - for line in fs_client.read_text(t1_files[0]).split("\n"): + for line in fs_client.read_text(t1_files[0], encoding="utf-8").split("\n"): if line: values.append(json.loads(line)["value"]) assert values == [1, 2, 3, 4, 5] # check binary read - assert fs_client.read_bytes(t1_files[0]) == str.encode(fs_client.read_text(t1_files[0])) + assert fs_client.read_bytes(t1_files[0]) == str.encode( + fs_client.read_text(t1_files[0], encoding="utf-8") + ) # check truncate fs_client.truncate_tables(["table_1"]) diff --git a/tests/load/weaviate/test_pipeline.py b/tests/load/weaviate/test_pipeline.py index 45195e86bc..fc46d00d05 100644 --- a/tests/load/weaviate/test_pipeline.py +++ b/tests/load/weaviate/test_pipeline.py @@ -6,7 +6,7 @@ from dlt.common import json from dlt.common.schema.exceptions import ( SchemaCorruptedException, - SchemaIdentifierNormalizationClash, + SchemaIdentifierNormalizationCollision, ) from dlt.common.utils import uniq_id @@ -404,7 +404,7 @@ def test_vectorize_property_without_data() -> None: primary_key="vAlue", columns={"vAlue": {"data_type": "text"}}, ) - assert isinstance(pipe_ex.value.__context__, SchemaIdentifierNormalizationClash) + assert isinstance(pipe_ex.value.__context__, SchemaIdentifierNormalizationCollision) # set the naming convention to case insensitive os.environ["SCHEMA__NAMING"] = "dlt.destinations.impl.weaviate.ci_naming" diff --git a/tests/load/weaviate/test_weaviate_client.py b/tests/load/weaviate/test_weaviate_client.py index 17c1a9828c..dc2110d2f6 100644 --- a/tests/load/weaviate/test_weaviate_client.py +++ b/tests/load/weaviate/test_weaviate_client.py @@ -5,7 +5,7 @@ from dlt.common.schema import Schema from dlt.common.configuration.container import Container from dlt.common.configuration.specs.config_section_context import ConfigSectionContext -from dlt.common.schema.exceptions import SchemaIdentifierNormalizationClash +from dlt.common.schema.exceptions import SchemaIdentifierNormalizationCollision from dlt.common.utils import uniq_id from dlt.common.schema.typing import TWriteDisposition, TColumnSchema, TTableSchemaColumns @@ -120,7 +120,7 @@ def test_case_sensitive_properties_create(client: WeaviateClient) -> None: ) ) client.schema._bump_version() - with pytest.raises(SchemaIdentifierNormalizationClash) as clash_ex: + with pytest.raises(SchemaIdentifierNormalizationCollision) as clash_ex: client.update_stored_schema() assert clash_ex.value.identifier_type == "column" assert clash_ex.value.identifier_name == "coL1" @@ -169,7 +169,7 @@ def test_case_sensitive_properties_add(client: WeaviateClient) -> None: ) ) client.schema._bump_version() - with pytest.raises(SchemaIdentifierNormalizationClash): + with pytest.raises(SchemaIdentifierNormalizationCollision): client.update_stored_schema() # _, table_columns = client.get_storage_table("ColClass") diff --git a/tests/pipeline/test_arrow_sources.py b/tests/pipeline/test_arrow_sources.py index bcaa19479c..4cdccb1e34 100644 --- a/tests/pipeline/test_arrow_sources.py +++ b/tests/pipeline/test_arrow_sources.py @@ -9,7 +9,11 @@ import dlt from dlt.common import json, Decimal from dlt.common.utils import uniq_id -from dlt.common.libs.pyarrow import NameNormalizationClash, remove_columns, normalize_py_arrow_item +from dlt.common.libs.pyarrow import ( + NameNormalizationCollision, + remove_columns, + normalize_py_arrow_item, +) from dlt.pipeline.exceptions import PipelineStepFailed @@ -223,7 +227,7 @@ def data_frames(): with pytest.raises(PipelineStepFailed) as py_ex: pipeline.extract(data_frames()) - assert isinstance(py_ex.value.__context__, NameNormalizationClash) + assert isinstance(py_ex.value.__context__, NameNormalizationCollision) @pytest.mark.parametrize("item_type", ["arrow-table", "arrow-batch"]) diff --git a/tests/pipeline/test_dlt_versions.py b/tests/pipeline/test_dlt_versions.py index b95d351844..ba7c0b9db8 100644 --- a/tests/pipeline/test_dlt_versions.py +++ b/tests/pipeline/test_dlt_versions.py @@ -1,3 +1,4 @@ +import sys from subprocess import CalledProcessError import pytest import tempfile @@ -26,8 +27,8 @@ from tests.pipeline.utils import load_table_counts from tests.utils import TEST_STORAGE_ROOT, test_storage -# if sys.version_info >= (3, 12): -# pytest.skip("Does not run on Python 3.12 and later", allow_module_level=True) +if sys.version_info >= (3, 12): + pytest.skip("Does not run on Python 3.12 and later", allow_module_level=True) GITHUB_PIPELINE_NAME = "dlt_github_pipeline" @@ -205,10 +206,10 @@ def test_filesystem_pipeline_with_dlt_update(test_storage: FileStorage) -> None: fs_client = pipeline._fs_client() state_files = sorted(fs_client.list_table_files("_dlt_pipeline_state")) # first file is in old format - state_1 = json.loads(fs_client.read_text(state_files[0])) + state_1 = json.loads(fs_client.read_text(state_files[0], encoding="utf-8")) assert "dlt_load_id" in state_1 # seconds is new - state_2 = json.loads(fs_client.read_text(state_files[1])) + state_2 = json.loads(fs_client.read_text(state_files[1], encoding="utf-8")) assert "_dlt_load_id" in state_2 diff --git a/tests/pipeline/test_pipeline.py b/tests/pipeline/test_pipeline.py index cfc41cb43b..95b97c7666 100644 --- a/tests/pipeline/test_pipeline.py +++ b/tests/pipeline/test_pipeline.py @@ -2272,14 +2272,14 @@ def test_data(): assert len(cur.fetchall()) == 3 -def test_change_naming_convention_name_clash() -> None: +def test_change_naming_convention_name_collision() -> None: duck_ = dlt.destinations.duckdb(naming_convention="duck_case", recommended_file_size=120000) caps = duck_.capabilities() assert caps.naming_convention == "duck_case" assert caps.recommended_file_size == 120000 # use duck case to load data into duckdb so casing and emoji are preserved - pipeline = dlt.pipeline("test_change_naming_convention_name_clash", destination=duck_) + pipeline = dlt.pipeline("test_change_naming_convention_name_collision", destination=duck_) info = pipeline.run( airtable_emojis().with_resources("📆 Schedule", "🦚Peacock", "🦚WidePeacock") ) @@ -2320,6 +2320,20 @@ def test_change_naming_convention_name_clash() -> None: # } +def test_change_naming_convention_column_collision() -> None: + duck_ = dlt.destinations.duckdb(naming_convention="duck_case") + + data = {"Col": "A"} + pipeline = dlt.pipeline("test_change_naming_convention_column_collision", destination=duck_) + info = pipeline.run([data], table_name="data") + assert_load_info(info) + + os.environ["SCHEMA__NAMING"] = "sql_ci_v1" + with pytest.raises(PipelineStepFailed) as pip_ex: + pipeline.run([data], table_name="data") + assert isinstance(pip_ex.value.__cause__, TableIdentifiersFrozen) + + def test_import_jsonl_file() -> None: pipeline = dlt.pipeline( pipeline_name="test_jsonl_import", From 5f4cb4c780a74a35342937d04da6a5c2eb25038e Mon Sep 17 00:00:00 2001 From: Marcin Rudolf Date: Wed, 26 Jun 2024 22:18:52 +0200 Subject: [PATCH 105/105] fixes getting original bases from instance --- dlt/common/typing.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dlt/common/typing.py b/dlt/common/typing.py index a34f7941f5..fdd27161f7 100644 --- a/dlt/common/typing.py +++ b/dlt/common/typing.py @@ -401,7 +401,7 @@ def get_generic_type_argument_from_instance( if cls_ := getattr(instance, "__orig_class__", None): # instance of generic class pass - elif bases_ := get_original_bases(instance): + elif bases_ := get_original_bases(instance.__class__): # instance of class deriving from generic cls_ = bases_[0] if cls_: