diff --git a/tests/bigquery/lineage/data/result.json b/tests/bigquery/data/result.json similarity index 100% rename from tests/bigquery/lineage/data/result.json rename to tests/bigquery/data/result.json diff --git a/tests/bigquery/lineage/data/sample_log.json b/tests/bigquery/data/sample_log.json similarity index 100% rename from tests/bigquery/lineage/data/sample_log.json rename to tests/bigquery/data/sample_log.json diff --git a/tests/bigquery/data/view_result.json b/tests/bigquery/data/view_result.json new file mode 100644 index 00000000..638869cd --- /dev/null +++ b/tests/bigquery/data/view_result.json @@ -0,0 +1,104 @@ +[ + { + "entityUpstream": { + "sourceEntities": [ + "DATASET~D61A8E52A48057B7A3DD4124F0894B06" + ], + "transformation": "select * from `foo`" + }, + "logicalId": { + "name": "project1.dataset1.table1", + "platform": "BIGQUERY" + }, + "schema": { + "description": "description", + "fields": [], + "schemaType": "SQL", + "sqlSchema": { + "materialization": "VIEW", + "tableSchema": "select * from `foo`" + } + }, + "sourceInfo": { + "createdAtSource": "2000-01-01T00:00:00+00:00", + "lastUpdated": "2000-01-01T00:00:00+00:00" + }, + "statistics": { + "dataSizeBytes": 0.0, + "recordCount": 0.0 + }, + "structure": { + "database": "project1", + "schema": "dataset1", + "table": "table1" + } + }, + { + "entityUpstream": { + "sourceEntities": [ + "DATASET~D61A8E52A48057B7A3DD4124F0894B06" + ], + "transformation": "select * from `Foo`" + }, + "logicalId": { + "name": "project1.dataset1.table2", + "platform": "BIGQUERY" + }, + "schema": { + "description": "description", + "fields": [], + "schemaType": "SQL", + "sqlSchema": { + "materialization": "VIEW", + "tableSchema": "select * from `Foo`" + } + }, + "sourceInfo": { + "createdAtSource": "2000-01-01T00:00:00+00:00", + "lastUpdated": "2000-01-01T00:00:00+00:00" + }, + "statistics": { + "dataSizeBytes": 0.0, + "recordCount": 0.0 + }, + "structure": { + "database": "project1", + "schema": "dataset1", + "table": "table2" + } + }, + { + "entityUpstream": { + "sourceEntities": [ + "DATASET~D61A8E52A48057B7A3DD4124F0894B06" + ], + "transformation": "select * from foo" + }, + "logicalId": { + "name": "project1.dataset1.table3", + "platform": "BIGQUERY" + }, + "schema": { + "description": "description", + "fields": [], + "schemaType": "SQL", + "sqlSchema": { + "materialization": "VIEW", + "tableSchema": "select * from foo" + } + }, + "sourceInfo": { + "createdAtSource": "2000-01-01T00:00:00+00:00", + "lastUpdated": "2000-01-01T00:00:00+00:00" + }, + "statistics": { + "dataSizeBytes": 0.0, + "recordCount": 0.0 + }, + "structure": { + "database": "project1", + "schema": "dataset1", + "table": "table3" + } + } +] diff --git a/tests/bigquery/lineage/data/view_result.json b/tests/bigquery/lineage/data/view_result.json deleted file mode 100644 index d345333f..00000000 --- a/tests/bigquery/lineage/data/view_result.json +++ /dev/null @@ -1,38 +0,0 @@ -[ - { - "logicalId": { - "name": "project1.dataset1.table1", - "platform": "BIGQUERY" - }, - "entityUpstream": { - "sourceEntities": [ - "DATASET~A6BA6F986B360A57CF65200F29F5B251" - ], - "transformation": "select * from `foo`" - } - }, - { - "logicalId": { - "name": "project1.dataset1.table2", - "platform": "BIGQUERY" - }, - "entityUpstream": { - "sourceEntities": [ - "DATASET~A6BA6F986B360A57CF65200F29F5B251" - ], - "transformation": "select * from `Foo`" - } - }, - { - "logicalId": { - "name": "project1.dataset1.table3", - "platform": "BIGQUERY" - }, - "entityUpstream": { - "sourceEntities": [ - "DATASET~A6BA6F986B360A57CF65200F29F5B251" - ], - "transformation": "select * from foo" - } - } -] diff --git a/tests/bigquery/lineage/test_extractor.py b/tests/bigquery/lineage/test_extractor.py deleted file mode 100644 index 279fa879..00000000 --- a/tests/bigquery/lineage/test_extractor.py +++ /dev/null @@ -1,130 +0,0 @@ -from unittest.mock import MagicMock, patch - -import pytest - -from metaphor.bigquery.lineage.config import BigQueryLineageRunConfig -from metaphor.bigquery.lineage.extractor import BigQueryLineageExtractor -from metaphor.common.base_config import OutputConfig -from metaphor.common.event_util import EventUtil -from tests.bigquery.load_entries import load_entries -from tests.bigquery.test_extractor import ( - mock_dataset, - mock_get_table, - mock_list_datasets, - mock_list_tables, - mock_table, - mock_table_full, -) -from tests.test_utils import load_json - - -def mock_list_entries(mock_build_log_client, entries): - def side_effect(page_size, filter_): - return entries - - mock_build_log_client.return_value.list_entries.side_effect = side_effect - - -@pytest.mark.skip -@patch("metaphor.bigquery.lineage.extractor.build_client") -@patch("metaphor.bigquery.lineage.extractor.build_logging_client") -@patch("metaphor.bigquery.lineage.extractor.get_credentials") -@pytest.mark.asyncio -async def test_log_extractor( - mock_get_credentials: MagicMock, - mock_build_logging_client: MagicMock, - mock_build_client: MagicMock, - test_root_dir: str, -): - config = BigQueryLineageRunConfig( - project_ids=["pid"], - output=OutputConfig(), - key_path="fake_file", - enable_view_lineage=False, - include_self_lineage=True, - ) - - entries = load_entries(test_root_dir + "/bigquery/lineage/data/sample_log.json") - - extractor = BigQueryLineageExtractor(config) - - mock_get_credentials.return_value = "fake_credentials" - - mock_build_logging_client.return_value.project = "project1" - - mock_list_entries(mock_build_logging_client, entries) - - events = [EventUtil.trim_event(e) for e in await extractor.extract()] - - assert events == load_json(test_root_dir + "/bigquery/lineage/data/result.json") - - -@pytest.mark.skip -@patch("metaphor.bigquery.lineage.extractor.build_client") -@patch("metaphor.bigquery.lineage.extractor.build_logging_client") -@patch("metaphor.bigquery.lineage.extractor.get_credentials") -@pytest.mark.asyncio -async def test_view_extractor( - mock_get_credentials: MagicMock, - mock_build_logging_client: MagicMock, - mock_build_client: MagicMock, - test_root_dir: str, -): - config = BigQueryLineageRunConfig( - output=OutputConfig(), - key_path="fake_file", - project_ids=["fake_project"], - enable_lineage_from_log=False, - ) - - extractor = BigQueryLineageExtractor(config) - - mock_get_credentials.return_value = "fake_credentials" - - mock_build_client.return_value.project = "project1" - - mock_list_datasets(mock_build_client, [mock_dataset("dataset1")]) - - mock_list_tables( - mock_build_client, - { - "dataset1": [ - mock_table("dataset1", "table1"), - mock_table("dataset1", "table2"), - mock_table("dataset1", "table3"), - ], - }, - ) - - mock_get_table( - mock_build_client, - { - ("dataset1", "table1"): mock_table_full( - dataset_id="dataset1", - table_id="table1", - table_type="VIEW", - description="description", - view_query="select * from `foo`", - ), - ("dataset1", "table2"): mock_table_full( - dataset_id="dataset1", - table_id="table2", - table_type="VIEW", - description="description", - view_query="select * from `Foo`", - ), - ("dataset1", "table3"): mock_table_full( - dataset_id="dataset1", - table_id="table3", - table_type="VIEW", - description="description", - view_query="select * from foo", - ), - }, - ) - - events = [EventUtil.trim_event(e) for e in await extractor.extract()] - - assert events == load_json( - f"{test_root_dir}/bigquery/lineage/data/view_result.json" - ) diff --git a/tests/bigquery/test_extractor.py b/tests/bigquery/test_extractor.py index 7ef53d20..06a6b50a 100644 --- a/tests/bigquery/test_extractor.py +++ b/tests/bigquery/test_extractor.py @@ -205,3 +205,107 @@ async def test_extractor( query_logs = wrap_query_log_stream_to_event(extractor.collect_query_logs()) expected_query_logs = f"{test_root_dir}/bigquery/query_logs.json" assert query_logs == load_json(expected_query_logs) + + +@patch("metaphor.bigquery.extractor.build_client") +@patch("metaphor.bigquery.extractor.build_logging_client") +@patch("metaphor.bigquery.extractor.get_credentials") +@pytest.mark.asyncio +async def test_extract_view_upstream( + mock_get_credentials: MagicMock, + mock_build_logging_client: MagicMock, + mock_build_client: MagicMock, + test_root_dir: str, +) -> None: + config = BigQueryRunConfig( + project_ids=["project1"], + output=OutputConfig(), + key_path="fake_file", + lineage=BigQueryLineageConfig( + enable_lineage_from_log=False, + ), + ) + extractor = BigQueryExtractor(config) + + mock_get_credentials.return_value = "fake_credentials" + + mock_build_client.return_value.project = "project1" + + mock_list_datasets(mock_build_client, [mock_dataset("dataset1")]) + + mock_list_tables( + mock_build_client, + { + "dataset1": [ + mock_table("dataset1", "table1"), + mock_table("dataset1", "table2"), + mock_table("dataset1", "table3"), + ], + }, + ) + + mock_get_table( + mock_build_client, + { + ("dataset1", "table1"): mock_table_full( + dataset_id="dataset1", + table_id="table1", + table_type="VIEW", + description="description", + view_query="select * from `foo`", + ), + ("dataset1", "table2"): mock_table_full( + dataset_id="dataset1", + table_id="table2", + table_type="VIEW", + description="description", + view_query="select * from `Foo`", + ), + ("dataset1", "table3"): mock_table_full( + dataset_id="dataset1", + table_id="table3", + table_type="VIEW", + description="description", + view_query="select * from foo", + ), + }, + ) + + events = [EventUtil.trim_event(e) for e in await extractor.extract()] + + assert events == load_json(f"{test_root_dir}/bigquery/data/view_result.json") + + +@patch("metaphor.bigquery.extractor.build_client") +@patch("metaphor.bigquery.extractor.build_logging_client") +@patch("metaphor.bigquery.extractor.get_credentials") +@pytest.mark.asyncio +async def test_log_extractor( + mock_get_credentials: MagicMock, + mock_build_logging_client: MagicMock, + mock_build_client: MagicMock, + test_root_dir: str, +): + config = BigQueryRunConfig( + project_ids=["project1"], + output=OutputConfig(), + key_path="fake_file", + lineage=BigQueryLineageConfig( + enable_view_lineage=False, + include_self_lineage=True, + ), + ) + + entries = load_entries(test_root_dir + "/bigquery/data/sample_log.json") + + extractor = BigQueryExtractor(config) + + mock_get_credentials.return_value = "fake_credentials" + + mock_build_logging_client.return_value.project = "project1" + + mock_list_entries(mock_build_logging_client, entries) + + events = [EventUtil.trim_event(e) for e in await extractor.extract()] + + assert events == load_json(test_root_dir + "/bigquery/data/result.json") diff --git a/tests/bigquery/test_parser.py b/tests/bigquery/test_parser.py index fd6ea9e0..aae7dffc 100644 --- a/tests/bigquery/test_parser.py +++ b/tests/bigquery/test_parser.py @@ -8,7 +8,7 @@ def test_parse_log(test_root_dir): - logs = load_entries(test_root_dir + "/bigquery/lineage/data/sample_log.json") + logs = load_entries(test_root_dir + "/bigquery/data/sample_log.json") results = [JobChangeEvent.from_entry(log) for log in logs]