From b9e06fd8482ab7270280d1710df623df8da3c061 Mon Sep 17 00:00:00 2001 From: Mars Lan Date: Sat, 4 Nov 2023 18:13:05 -0700 Subject: [PATCH] Handle email-based Snowflake user ID in query logs (#660) --- metaphor/snowflake/extractor.py | 10 ++++++++-- pyproject.toml | 2 +- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/metaphor/snowflake/extractor.py b/metaphor/snowflake/extractor.py index 5d1dc4fa..36d31428 100644 --- a/metaphor/snowflake/extractor.py +++ b/metaphor/snowflake/extractor.py @@ -20,7 +20,7 @@ from metaphor.common.query_history import chunk_query_logs from metaphor.common.snowflake import normalize_snowflake_account from metaphor.common.tag_matcher import tag_datasets -from metaphor.common.utils import chunks, md5_digest, safe_float, start_of_day +from metaphor.common.utils import chunks, is_email, md5_digest, safe_float, start_of_day from metaphor.models.crawler_run_metadata import Platform from metaphor.models.metadata_change_event import ( DataPlatform, @@ -579,6 +579,11 @@ def _parse_query_logs(self, batch_number: str, query_logs: List[Tuple]) -> None: if len(query_text) >= self._query_log_max_query_size: continue + # User IDs can be an email address + user_id, email = ( + (None, username) if is_email(username) else (username, None) + ) + query_log = QueryLog( id=f"{DataPlatform.SNOWFLAKE.name}:{query_id}", query_id=query_id, @@ -587,7 +592,8 @@ def _parse_query_logs(self, batch_number: str, query_logs: List[Tuple]) -> None: start_time=start_time, duration=safe_float(elapsed_time / 1000.0), cost=safe_float(credit), - user_id=username, + user_id=user_id, + email=email, default_database=default_database, default_schema=default_schema, rows_read=safe_float(rows_produced), diff --git a/pyproject.toml b/pyproject.toml index a4ac4470..8c0b31cc 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "metaphor-connectors" -version = "0.13.31" +version = "0.13.32" license = "Apache-2.0" description = "A collection of Python-based 'connectors' that extract metadata from various sources to ingest into the Metaphor app." authors = ["Metaphor "]