Skip to content

Commit

Permalink
Optimize queries used in Snowflake lineage connector (#797)
Browse files Browse the repository at this point in the history
  • Loading branch information
mars-lan authored Mar 14, 2024
1 parent 706ca4c commit 7ef524a
Show file tree
Hide file tree
Showing 3 changed files with 15 additions and 14 deletions.
4 changes: 2 additions & 2 deletions metaphor/snowflake/extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -743,8 +743,8 @@ def _batch_query_for_access_logs(
JOIN SNOWFLAKE.ACCOUNT_USAGE.ACCESS_HISTORY a
ON a.QUERY_ID = q.QUERY_ID
WHERE EXECUTION_STATUS = 'SUCCESS'
AND START_TIME > %s AND START_TIME <= %s
AND QUERY_START_TIME > %s AND QUERY_START_TIME <= %s
AND q.START_TIME > %s AND q.START_TIME <= %s
AND a.QUERY_START_TIME > %s AND a.QUERY_START_TIME <= %s
{exclude_username_clause(self._query_log_excluded_usernames)}
ORDER BY q.QUERY_ID
LIMIT {self._query_log_fetch_size} OFFSET %s
Expand Down
23 changes: 12 additions & 11 deletions metaphor/snowflake/lineage/extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,17 +81,17 @@ async def extract(self) -> Collection[ENTITY_TYPES]:
cursor.execute(
"""
SELECT COUNT(*)
FROM
SNOWFLAKE.ACCOUNT_USAGE.QUERY_HISTORY q,
SNOWFLAKE.ACCOUNT_USAGE.ACCESS_HISTORY a
WHERE q.QUERY_ID = a.QUERY_ID
AND q.EXECUTION_STATUS = 'SUCCESS'
FROM SNOWFLAKE.ACCOUNT_USAGE.QUERY_HISTORY q
JOIN SNOWFLAKE.ACCOUNT_USAGE.ACCESS_HISTORY a
ON q.QUERY_ID = a.QUERY_ID
WHERE q.EXECUTION_STATUS = 'SUCCESS'
AND ARRAY_SIZE(a.BASE_OBJECTS_ACCESSED) > 0
AND ARRAY_SIZE(a.OBJECTS_MODIFIED) > 0
AND a.QUERY_START_TIME > %s
AND q.START_TIME > %s
ORDER BY a.QUERY_START_TIME ASC
""",
(start_date,),
(start_date, start_date),
)
res = cursor.fetchone()
assert res is not None, f"Missing count: {res}"
Expand All @@ -103,18 +103,19 @@ async def extract(self) -> Collection[ENTITY_TYPES]:
str(x): QueryWithParam(
f"""
SELECT a.BASE_OBJECTS_ACCESSED, a.OBJECTS_MODIFIED, q.QUERY_TEXT
FROM
SNOWFLAKE.ACCOUNT_USAGE.QUERY_HISTORY q,
SNOWFLAKE.ACCOUNT_USAGE.ACCESS_HISTORY a
WHERE q.QUERY_ID = a.QUERY_ID
AND q.EXECUTION_STATUS = 'SUCCESS'
FROM SNOWFLAKE.ACCOUNT_USAGE.QUERY_HISTORY q
JOIN SNOWFLAKE.ACCOUNT_USAGE.ACCESS_HISTORY a
ON q.QUERY_ID = a.QUERY_ID
WHERE q.EXECUTION_STATUS = 'SUCCESS'
AND ARRAY_SIZE(a.BASE_OBJECTS_ACCESSED) > 0
AND ARRAY_SIZE(a.OBJECTS_MODIFIED) > 0
AND a.QUERY_START_TIME > %s
AND q.START_TIME > %s
ORDER BY a.QUERY_START_TIME ASC
LIMIT {self._batch_size} OFFSET %s
""",
(
start_date,
start_date,
x * self._batch_size,
),
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "metaphor-connectors"
version = "0.13.138"
version = "0.13.139"
license = "Apache-2.0"
description = "A collection of Python-based 'connectors' that extract metadata from various sources to ingest into the Metaphor app."
authors = ["Metaphor <[email protected]>"]
Expand Down

0 comments on commit 7ef524a

Please sign in to comment.