diff --git a/metaphor/athena/config.py b/metaphor/athena/config.py index 046d6520..288541f9 100644 --- a/metaphor/athena/config.py +++ b/metaphor/athena/config.py @@ -8,17 +8,9 @@ from metaphor.common.filter import DatasetFilter -@dataclass(config=ConnectorConfig) -class QueryLogConfig: - # Number of days back of query logs to fetch, if 0, don't fetch query logs - lookback_days: int = 1 - - @dataclass(config=ConnectorConfig) class AthenaRunConfig(BaseConfig): aws: AwsCredentials # Include or exclude specific databases/schemas/tables filter: DatasetFilter = field(default_factory=lambda: DatasetFilter()) - - query_log: QueryLogConfig = field(default_factory=lambda: QueryLogConfig()) diff --git a/metaphor/athena/extractor.py b/metaphor/athena/extractor.py index 4ad283a0..78080c9e 100644 --- a/metaphor/athena/extractor.py +++ b/metaphor/athena/extractor.py @@ -15,7 +15,7 @@ from metaphor.common.sql.table_level_lineage.table_level_lineage import ( extract_table_level_lineage, ) -from metaphor.common.utils import chunks, md5_digest +from metaphor.common.utils import chunks, md5_digest, to_utc_time from metaphor.models.crawler_run_metadata import Platform from metaphor.models.metadata_change_event import ( DataPlatform, @@ -203,6 +203,13 @@ def _batch_get_queries(self, query_execution_ids: List[str]) -> List[QueryLog]: default_schema=schema, ) + start_time = ( + to_utc_time(query_execution.Status.SubmissionDateTime) + if query_execution.Status + and query_execution.Status.SubmissionDateTime + else None + ) + query_logs.append( QueryLog( duration=( @@ -216,6 +223,7 @@ def _batch_get_queries(self, query_execution_ids: List[str]) -> List[QueryLog]: targets=tll.targets, sql=query, sql_hash=md5_digest(query.encode("utf-8")), + start_time=start_time, ) ) diff --git a/tests/athena/expected_query_logs.json b/tests/athena/expected_query_logs.json index 1ab4adcb..74c56618 100644 --- a/tests/athena/expected_query_logs.json +++ b/tests/athena/expected_query_logs.json @@ -13,6 +13,7 @@ ], "sql": "SELECT * FROM \"spectrum_db2\".\"sales\" limit 10", "sqlHash": "ab84f70dad2f22c7680b41d0dc8ca73d", + "startTime": "2024-10-03T00:50:12.646000+00:00", "targets": [] }, { @@ -29,6 +30,7 @@ ], "sql": "SELECT * FROM \"spectrum_db2\".\"sales\" limit 10", "sqlHash": "ab84f70dad2f22c7680b41d0dc8ca73d", + "startTime": "2024-10-02T16:48:48.443000+00:00", "targets": [] }, { @@ -45,6 +47,7 @@ ], "sql": "-- View Example\nCREATE OR REPLACE VIEW sales_view AS\nSELECT salesid, listid, sellerid, buyerid, dateid, qtysold, pricepaid, commission, saletime\nFROM sales\nWHERE commission > 10", "sqlHash": "f89a26b30eb0e4fac013ff31f4ea900e", + "startTime": "2024-10-03T00:50:38.339000+00:00", "targets": [ { "database": "awsdatacatalog",