Skip to content

Commit

Permalink
[sc-26422] Use DIRECT_OBJECTS_ACCESSED instead of `BASE_OBJECTS_ACC…
Browse files Browse the repository at this point in the history
…ESSED` in `ACCESS_HISTORY` (#862)

* [sc-26422] Use `DIRECT_OBJECTS_ACCESSED` instead of `BASE_OBJECTS_ACCESSED` in `ACCESS_HISTORY`

* bump version
  • Loading branch information
usefulalgorithm authored May 20, 2024
1 parent 8aebf1d commit 0964991
Show file tree
Hide file tree
Showing 2 changed files with 8 additions and 7 deletions.
13 changes: 7 additions & 6 deletions metaphor/snowflake/lineage/extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
"TABLE",
"VIEW",
"MATERIALIZED VIEW",
"STREAM",
)


Expand Down Expand Up @@ -81,7 +82,7 @@ async def extract(self) -> Collection[ENTITY_TYPES]:
JOIN SNOWFLAKE.ACCOUNT_USAGE.ACCESS_HISTORY a
ON q.QUERY_ID = a.QUERY_ID
WHERE q.EXECUTION_STATUS = 'SUCCESS'
AND ARRAY_SIZE(a.BASE_OBJECTS_ACCESSED) > 0
AND ARRAY_SIZE(a.DIRECT_OBJECTS_ACCESSED) > 0
AND ARRAY_SIZE(a.OBJECTS_MODIFIED) > 0
AND a.QUERY_START_TIME > %s
AND q.START_TIME > %s
Expand All @@ -98,12 +99,12 @@ async def extract(self) -> Collection[ENTITY_TYPES]:
queries = {
str(x): QueryWithParam(
f"""
SELECT a.BASE_OBJECTS_ACCESSED, a.OBJECTS_MODIFIED, q.QUERY_TEXT
SELECT a.DIRECT_OBJECTS_ACCESSED, a.OBJECTS_MODIFIED, q.QUERY_TEXT
FROM SNOWFLAKE.ACCOUNT_USAGE.QUERY_HISTORY q
JOIN SNOWFLAKE.ACCOUNT_USAGE.ACCESS_HISTORY a
ON q.QUERY_ID = a.QUERY_ID
WHERE q.EXECUTION_STATUS = 'SUCCESS'
AND ARRAY_SIZE(a.BASE_OBJECTS_ACCESSED) > 0
AND ARRAY_SIZE(a.DIRECT_OBJECTS_ACCESSED) > 0
AND ARRAY_SIZE(a.OBJECTS_MODIFIED) > 0
AND a.QUERY_START_TIME > %s
AND q.START_TIME > %s
Expand Down Expand Up @@ -142,13 +143,13 @@ async def extract(self) -> Collection[ENTITY_TYPES]:

def _parse_access_logs(self, batch_number: str, access_logs: List[Tuple]) -> None:
logger.info(f"access logs batch #{batch_number}")
for base_objects_accessed, objects_modified, query in access_logs:
for direct_objects_accessed, objects_modified, query in access_logs:
try:
self._parse_access_log(base_objects_accessed, objects_modified, query)
self._parse_access_log(direct_objects_accessed, objects_modified, query)
except Exception:
logger.exception(
"Failed to parse access log.\n"
f"BASE_OBJECTS_ACCESSED: {base_objects_accessed}\n"
f"DIRECT_OBJECTS_ACCESSED: {direct_objects_accessed}\n"
f"OBJECTS_MODIFIED: {objects_modified}"
)

Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "metaphor-connectors"
version = "0.14.4"
version = "0.14.5"
license = "Apache-2.0"
description = "A collection of Python-based 'connectors' that extract metadata from various sources to ingest into the Metaphor app."
authors = ["Metaphor <[email protected]>"]
Expand Down

0 comments on commit 0964991

Please sign in to comment.