diff --git a/metaphor/common/filter.py b/metaphor/common/filter.py index a0c83363..32376700 100644 --- a/metaphor/common/filter.py +++ b/metaphor/common/filter.py @@ -32,12 +32,45 @@ class DatasetFilter: def merge(self, filter: "DatasetFilter") -> "DatasetFilter": """Merge with another filter and return a shallow copy""" - def merge_filters(f1: Optional[DatabaseFilter], f2: Optional[DatabaseFilter]): - return f1 if f2 is None else f2 if f1 is None else {**f1, **f2} + def _merge_table_filters( + f1: Optional[TableFilter], f2: Optional[TableFilter] + ) -> Optional[TableFilter]: + return f1 if f2 is None else f2 if f1 is None else f1.union(f2) + + def _merge_schema_filters( + f1: Optional[SchemaFilter], f2: Optional[SchemaFilter] + ) -> Optional[SchemaFilter]: + if f1 is None: + return f2 + if f2 is None: + return f1 + + result = f1.copy() # shallow copy of f1 + for key, val in f2.items(): + result[key] = _merge_table_filters(f1.get(key, None), val) + + return result + + def _merge_database_filters( + f1: Optional[DatabaseFilter], f2: Optional[DatabaseFilter] + ) -> Optional[DatabaseFilter]: + """ + Merge two database filters, if same key, then merge the schema filters + """ + if f1 is None: + return f2 + if f2 is None: + return f1 + + result = f1.copy() # shallow copy of f1 + for key, val in f2.items(): + result[key] = _merge_schema_filters(f1.get(key, None), val) + + return result return DatasetFilter( - includes=merge_filters(self.includes, filter.includes), - excludes=merge_filters(self.excludes, filter.excludes), + includes=_merge_database_filters(self.includes, filter.includes), + excludes=_merge_database_filters(self.excludes, filter.excludes), ) def normalize(self) -> "DatasetFilter": diff --git a/poetry.lock b/poetry.lock index ee126692..14660764 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 1.7.1 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.8.2 and should not be changed by hand. [[package]] name = "aiohttp" @@ -575,17 +575,17 @@ uvloop = ["uvloop (>=0.15.2)"] [[package]] name = "boto3" -version = "1.34.84" +version = "1.34.88" description = "The AWS SDK for Python" optional = false python-versions = ">=3.8" files = [ - {file = "boto3-1.34.84-py3-none-any.whl", hash = "sha256:7a02f44af32095946587d748ebeb39c3fa15b9d7275307ff612a6760ead47e04"}, - {file = "boto3-1.34.84.tar.gz", hash = "sha256:91e6343474173e9b82f603076856e1d5b7b68f44247bdd556250857a3f16b37b"}, + {file = "boto3-1.34.88-py3-none-any.whl", hash = "sha256:1bd4cef11b7c5f293cede50f3d33ca89fe3413c51f1864f40163c56a732dd6b3"}, + {file = "boto3-1.34.88.tar.gz", hash = "sha256:168894499578a9d69d6f7deb5811952bf4171c51b95749a9aef32cf67bc71f87"}, ] [package.dependencies] -botocore = ">=1.34.84,<1.35.0" +botocore = ">=1.34.88,<1.35.0" jmespath = ">=0.7.1,<2.0.0" s3transfer = ">=0.10.0,<0.11.0" @@ -594,25 +594,25 @@ crt = ["botocore[crt] (>=1.21.0,<2.0a0)"] [[package]] name = "botocore" -version = "1.34.84" +version = "1.34.88" description = "Low-level, data-driven core of boto 3." optional = false python-versions = ">=3.8" files = [ - {file = "botocore-1.34.84-py3-none-any.whl", hash = "sha256:da1ae0a912e69e10daee2a34dafd6c6c106450d20b8623665feceb2d96c173eb"}, - {file = "botocore-1.34.84.tar.gz", hash = "sha256:a2b309bf5594f0eb6f63f355ade79ba575ce8bf672e52e91da1a7933caa245e6"}, + {file = "botocore-1.34.88-py3-none-any.whl", hash = "sha256:e87a660599ed3e14b2a770f4efc3df2f2f6d04f3c7bfd64ddbae186667864a7b"}, + {file = "botocore-1.34.88.tar.gz", hash = "sha256:36f2e9e8dfa856e55dbbe703aea601f134db3fddc3615f1020a755b27fd26a5e"}, ] [package.dependencies] jmespath = ">=0.7.1,<2.0.0" python-dateutil = ">=2.1,<3.0.0" urllib3 = [ - {version = ">=1.25.4,<2.2.0 || >2.2.0,<3", markers = "python_version >= \"3.10\""}, {version = ">=1.25.4,<1.27", markers = "python_version < \"3.10\""}, + {version = ">=1.25.4,<2.2.0 || >2.2.0,<3", markers = "python_version >= \"3.10\""}, ] [package.extras] -crt = ["awscrt (==0.19.19)"] +crt = ["awscrt (==0.20.9)"] [[package]] name = "cachetools" @@ -1095,8 +1095,8 @@ files = [ [package.dependencies] lz4 = ">=4.0.2,<5.0.0" numpy = [ - {version = ">=1.23.4", markers = "python_version >= \"3.11\""}, {version = ">=1.16.6", markers = "python_version >= \"3.8\" and python_version < \"3.11\""}, + {version = ">=1.23.4", markers = "python_version >= \"3.11\""}, ] oauthlib = ">=3.1.0,<4.0.0" openpyxl = ">=3.0.10,<4.0.0" @@ -1146,8 +1146,8 @@ isort = ">=4.3.21,<6.0" jinja2 = ">=2.10.1,<4.0" packaging = "*" pydantic = [ - {version = ">=1.10.0,<2.4.0 || >2.4.0,<3.0", extras = ["email"], markers = "python_version >= \"3.11\" and python_version < \"4.0\""}, {version = ">=1.5.1,<2.4.0 || >2.4.0,<3.0", extras = ["email"], markers = "python_version < \"3.10\""}, + {version = ">=1.10.0,<2.4.0 || >2.4.0,<3.0", extras = ["email"], markers = "python_version >= \"3.11\" and python_version < \"4.0\""}, {version = ">=1.9.0,<2.4.0 || >2.4.0,<3.0", extras = ["email"], markers = "python_version >= \"3.10\" and python_version < \"3.11\""}, ] pyyaml = ">=6.0.1" @@ -1610,12 +1610,12 @@ files = [ google-auth = ">=2.14.1,<3.0.dev0" googleapis-common-protos = ">=1.56.2,<2.0.dev0" grpcio = [ - {version = ">=1.49.1,<2.0dev", optional = true, markers = "python_version >= \"3.11\" and extra == \"grpc\""}, {version = ">=1.33.2,<2.0dev", optional = true, markers = "python_version < \"3.11\" and extra == \"grpc\""}, + {version = ">=1.49.1,<2.0dev", optional = true, markers = "python_version >= \"3.11\" and extra == \"grpc\""}, ] grpcio-status = [ - {version = ">=1.49.1,<2.0.dev0", optional = true, markers = "python_version >= \"3.11\" and extra == \"grpc\""}, {version = ">=1.33.2,<2.0.dev0", optional = true, markers = "python_version < \"3.11\" and extra == \"grpc\""}, + {version = ">=1.49.1,<2.0.dev0", optional = true, markers = "python_version >= \"3.11\" and extra == \"grpc\""}, ] proto-plus = ">=1.22.3,<2.0.0dev" protobuf = ">=3.19.5,<3.20.0 || >3.20.0,<3.20.1 || >3.20.1,<4.21.0 || >4.21.0,<4.21.1 || >4.21.1,<4.21.2 || >4.21.2,<4.21.3 || >4.21.3,<4.21.4 || >4.21.4,<4.21.5 || >4.21.5,<5.0.0.dev0" @@ -1749,8 +1749,8 @@ google-cloud-audit-log = ">=0.1.0,<1.0.0dev" google-cloud-core = ">=2.0.0,<3.0.0dev" grpc-google-iam-v1 = ">=0.12.4,<1.0.0dev" proto-plus = [ - {version = ">=1.22.2,<2.0.0dev", markers = "python_version >= \"3.11\""}, {version = ">=1.22.0,<2.0.0dev", markers = "python_version < \"3.11\""}, + {version = ">=1.22.2,<2.0.0dev", markers = "python_version >= \"3.11\""}, ] protobuf = ">=3.19.5,<3.20.0 || >3.20.0,<3.20.1 || >3.20.1,<4.21.0 || >4.21.0,<4.21.1 || >4.21.1,<4.21.2 || >4.21.2,<4.21.3 || >4.21.3,<4.21.4 || >4.21.4,<4.21.5 || >4.21.5,<5.0.0dev" @@ -3754,8 +3754,8 @@ files = [ [package.dependencies] numpy = [ - {version = ">=1.23.2", markers = "python_version >= \"3.11\""}, {version = ">=1.20.3", markers = "python_version < \"3.10\""}, + {version = ">=1.23.2", markers = "python_version >= \"3.11\""}, {version = ">=1.21.0", markers = "python_version >= \"3.10\" and python_version < \"3.11\""}, ] python-dateutil = ">=2.8.2" diff --git a/tests/common/test_filter.py b/tests/common/test_filter.py index e10acf39..88be27a1 100644 --- a/tests/common/test_filter.py +++ b/tests/common/test_filter.py @@ -207,6 +207,64 @@ def test_merge(): f2 = DatasetFilter(excludes={"foo": {"bar": None}}) assert f1.merge(f2) == DatasetFilter(excludes={"foo": {"bar": None}}) + f1 = DatasetFilter( + includes={ + "SNOWFLAKE": None, + "*": {"foo": None}, + } + ) + f2 = DatasetFilter(includes={"*": {"bar": None}}) + assert f1.merge(f2) == DatasetFilter( + includes={ + "SNOWFLAKE": None, + "*": {"foo": None, "bar": None}, + } + ) + + f1 = DatasetFilter( + excludes={ + "SNOWFLAKE": None, + "*": {"foo": None}, + } + ) + f2 = DatasetFilter(excludes={"*": {"bar": None}}) + assert f1.merge(f2) == DatasetFilter( + excludes={ + "SNOWFLAKE": None, + "*": {"foo": None, "bar": None}, + } + ) + + f1 = DatasetFilter( + includes={ + "*": {"foo": {"f1", "f2"}, "bar": None}, + } + ) + f2 = DatasetFilter(includes={"*": {"foo": {"f1", "f3"}, "bar": {"b1"}}}) + assert f1.merge(f2) == DatasetFilter( + includes={ + "*": { + "foo": {"f1", "f2", "f3"}, + "bar": {"b1"}, + }, + } + ) + + f1 = DatasetFilter( + excludes={ + "x": {"foo": {"f1", "f2"}, "bar": None}, + } + ) + f2 = DatasetFilter(excludes={"x": {"foo": {"f1", "f3"}, "bar": {"b1"}}}) + assert f1.merge(f2) == DatasetFilter( + excludes={ + "x": { + "foo": {"f1", "f2", "f3"}, + "bar": {"b1"}, + }, + } + ) + def test_include_database(): # Includes only