Skip to content

Commit

Permalink
Crawler filters aren t merged properly [sc-25950] (#839)
Browse files Browse the repository at this point in the history
  • Loading branch information
alyiwang authored Apr 22, 2024
1 parent 42e3595 commit 771de4b
Show file tree
Hide file tree
Showing 3 changed files with 111 additions and 20 deletions.
41 changes: 37 additions & 4 deletions metaphor/common/filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,12 +32,45 @@ class DatasetFilter:
def merge(self, filter: "DatasetFilter") -> "DatasetFilter":
"""Merge with another filter and return a shallow copy"""

def merge_filters(f1: Optional[DatabaseFilter], f2: Optional[DatabaseFilter]):
return f1 if f2 is None else f2 if f1 is None else {**f1, **f2}
def _merge_table_filters(
f1: Optional[TableFilter], f2: Optional[TableFilter]
) -> Optional[TableFilter]:
return f1 if f2 is None else f2 if f1 is None else f1.union(f2)

def _merge_schema_filters(
f1: Optional[SchemaFilter], f2: Optional[SchemaFilter]
) -> Optional[SchemaFilter]:
if f1 is None:
return f2
if f2 is None:
return f1

result = f1.copy() # shallow copy of f1
for key, val in f2.items():
result[key] = _merge_table_filters(f1.get(key, None), val)

return result

def _merge_database_filters(
f1: Optional[DatabaseFilter], f2: Optional[DatabaseFilter]
) -> Optional[DatabaseFilter]:
"""
Merge two database filters, if same key, then merge the schema filters
"""
if f1 is None:
return f2
if f2 is None:
return f1

result = f1.copy() # shallow copy of f1
for key, val in f2.items():
result[key] = _merge_schema_filters(f1.get(key, None), val)

return result

return DatasetFilter(
includes=merge_filters(self.includes, filter.includes),
excludes=merge_filters(self.excludes, filter.excludes),
includes=_merge_database_filters(self.includes, filter.includes),
excludes=_merge_database_filters(self.excludes, filter.excludes),
)

def normalize(self) -> "DatasetFilter":
Expand Down
32 changes: 16 additions & 16 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

58 changes: 58 additions & 0 deletions tests/common/test_filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -207,6 +207,64 @@ def test_merge():
f2 = DatasetFilter(excludes={"foo": {"bar": None}})
assert f1.merge(f2) == DatasetFilter(excludes={"foo": {"bar": None}})

f1 = DatasetFilter(
includes={
"SNOWFLAKE": None,
"*": {"foo": None},
}
)
f2 = DatasetFilter(includes={"*": {"bar": None}})
assert f1.merge(f2) == DatasetFilter(
includes={
"SNOWFLAKE": None,
"*": {"foo": None, "bar": None},
}
)

f1 = DatasetFilter(
excludes={
"SNOWFLAKE": None,
"*": {"foo": None},
}
)
f2 = DatasetFilter(excludes={"*": {"bar": None}})
assert f1.merge(f2) == DatasetFilter(
excludes={
"SNOWFLAKE": None,
"*": {"foo": None, "bar": None},
}
)

f1 = DatasetFilter(
includes={
"*": {"foo": {"f1", "f2"}, "bar": None},
}
)
f2 = DatasetFilter(includes={"*": {"foo": {"f1", "f3"}, "bar": {"b1"}}})
assert f1.merge(f2) == DatasetFilter(
includes={
"*": {
"foo": {"f1", "f2", "f3"},
"bar": {"b1"},
},
}
)

f1 = DatasetFilter(
excludes={
"x": {"foo": {"f1", "f2"}, "bar": None},
}
)
f2 = DatasetFilter(excludes={"x": {"foo": {"f1", "f3"}, "bar": {"b1"}}})
assert f1.merge(f2) == DatasetFilter(
excludes={
"x": {
"foo": {"f1", "f2", "f3"},
"bar": {"b1"},
},
}
)


def test_include_database():
# Includes only
Expand Down

0 comments on commit 771de4b

Please sign in to comment.