From 40283d80cd3e417330d2311002dc78be5b17eb91 Mon Sep 17 00:00:00 2001 From: Scott Huang Date: Tue, 5 Nov 2024 21:54:26 +0800 Subject: [PATCH 1/6] Add sourceInfo.mainUrl for PBI --- metaphor/power_bi/extractor.py | 1 + tests/power_bi/expected.json | 171 +++++++++++++++++---------------- 2 files changed, 91 insertions(+), 81 deletions(-) diff --git a/metaphor/power_bi/extractor.py b/metaphor/power_bi/extractor.py index c31be92d..82cd7529 100644 --- a/metaphor/power_bi/extractor.py +++ b/metaphor/power_bi/extractor.py @@ -394,6 +394,7 @@ async def map_wi_datasets_to_virtual_views(self, workspace: WorkspaceInfo) -> No wds.sensitivityLabel.labelId if wds.sensitivityLabel else None ), ), + source_info=SourceInfo(main_url=ds.webUrl), ) self._extract_pipeline_info(wds, virtual_view) diff --git a/tests/power_bi/expected.json b/tests/power_bi/expected.json index fe74ddf9..4b8a821b 100644 --- a/tests/power_bi/expected.json +++ b/tests/power_bi/expected.json @@ -7,6 +7,7 @@ "powerBIDataset": { "description": "This is a dataset", "lastRefreshed": "2022-01-01T01:02:03.456000+00:00", + "name": "Foo Dataset", "parameters": [ { "isRequired": false, @@ -15,7 +16,17 @@ "value": "value" } ], - "name": "Foo Dataset", + "refreshSchedule": { + "days": [ + "Monday" + ], + "enabled": true, + "localTimeZoneId": "Pacific Standard Time", + "notifyOption": "MailOnFailure", + "times": [ + "10:00" + ] + }, "tables": [ { "columns": [ @@ -44,18 +55,10 @@ } ], "url": "https://powerbi.com/00000000-0000-0000-0000-000000000002", - "workspaceId": "workspace-1", - "refreshSchedule": { - "days": [ - "Monday" - ], - "enabled": true, - "localTimeZoneId": "Pacific Standard Time", - "notifyOption": "MailOnFailure", - "times": [ - "10:00" - ] - } + "workspaceId": "workspace-1" + }, + "sourceInfo": { + "mainUrl": "https://powerbi.com/00000000-0000-0000-0000-000000000002" }, "structure": { "directories": [ @@ -70,6 +73,8 @@ "type": "POWER_BI_DATASET" }, "powerBIDataset": { + "configuredBy": "bob@foo.com", + "createdDate": "2022-03-30T23:28:21.060000+00:00", "description": "This is another dataset", "name": "Bar Dataset", "parameters": [ @@ -80,6 +85,14 @@ "value": "value" } ], + "refreshSchedule": { + "days": [], + "enabled": true, + "frequencyInMinutes": 120.0, + "localTimeZoneId": "Pacific Standard Time", + "notifyOption": "MailOnFailure", + "times": [] + }, "tables": [ { "columns": [ @@ -113,17 +126,10 @@ } ], "url": "https://powerbi.com/00000000-0000-0000-0000-000000000003", - "workspaceId": "workspace-1", - "refreshSchedule": { - "days": [], - "enabled": true, - "localTimeZoneId": "Pacific Standard Time", - "notifyOption": "MailOnFailure", - "frequencyInMinutes": 120.0, - "times": [] - }, - "configuredBy": "bob@foo.com", - "createdDate": "2022-03-30T23:28:21.060000+00:00" + "workspaceId": "workspace-1" + }, + "sourceInfo": { + "mainUrl": "https://powerbi.com/00000000-0000-0000-0000-000000000003" }, "structure": { "directories": [ @@ -151,6 +157,7 @@ ] }, "powerBIDataset": { + "configuredBy": "bob@foo.com", "description": "Dataset from dataflow", "name": "Dataflow dataset", "parameters": [ @@ -161,6 +168,11 @@ "value": "value" } ], + "sensitivityLabel": { + "description": "label description", + "id": "label-id", + "name": "label name" + }, "tables": [ { "columns": [ @@ -179,13 +191,10 @@ } ], "url": "https://powerbi.com/00000000-0000-0000-0001-000000000003", - "workspaceId": "workspace-1", - "configuredBy": "bob@foo.com", - "sensitivityLabel": { - "description": "label description", - "id": "label-id", - "name": "label name" - } + "workspaceId": "workspace-1" + }, + "sourceInfo": { + "mainUrl": "https://powerbi.com/00000000-0000-0000-0001-000000000003" }, "structure": { "directories": [ @@ -204,8 +213,6 @@ "id": "00000000-0000-0000-0000-000000000000", "name": "foo app" }, - "powerBiDashboardType": "REPORT", - "workspaceId": "workspace-1", "createdBy": "creator@foo.bar", "createdDateTime": "2022-04-06T04:25:06.777000+00:00", "endorsement": { @@ -213,10 +220,17 @@ "endorsement": "Promoted" }, "modifiedBy": "editor@foo.bar", - "modifiedDateTime": "2022-04-06T04:25:06.777000+00:00" + "modifiedDateTime": "2022-04-06T04:25:06.777000+00:00", + "powerBiDashboardType": "REPORT", + "workspaceId": "workspace-1" }, "title": "Foo Report" }, + "entityUpstream": { + "sourceEntities": [ + "VIRTUAL_VIEW~81496746E0F360F2569347B8C8335CE6" + ] + }, "logicalId": { "dashboardId": "00000000-0000-0000-0000-000000000004", "platform": "POWER_BI" @@ -229,11 +243,6 @@ "Workspace" ], "name": "Foo Report" - }, - "entityUpstream": { - "sourceEntities": [ - "VIRTUAL_VIEW~81496746E0F360F2569347B8C8335CE6" - ] } }, { @@ -252,15 +261,20 @@ "description": "This is a report about bar", "powerBi": { "powerBiDashboardType": "REPORT", - "workspaceId": "workspace-1", "sensitivityLabel": { "description": "label description", "id": "label-id", "name": "label name" - } + }, + "workspaceId": "workspace-1" }, "title": "Bar Report" }, + "entityUpstream": { + "sourceEntities": [ + "VIRTUAL_VIEW~CC1F3CC68F843D0E144687C9030A255B" + ] + }, "logicalId": { "dashboardId": "00000000-0000-0000-0000-000000000006", "platform": "POWER_BI" @@ -273,11 +287,6 @@ "Workspace" ], "name": "Bar Report" - }, - "entityUpstream": { - "sourceEntities": [ - "VIRTUAL_VIEW~CC1F3CC68F843D0E144687C9030A255B" - ] } }, { @@ -300,15 +309,23 @@ "id": "00000000-0000-0000-0000-000000000001", "name": "bar app" }, + "createdBy": "creator@foo.bar", + "createdDateTime": "2022-04-06T04:25:06.777000+00:00", + "modifiedBy": "editor@foo.bar", + "modifiedDateTime": "2022-04-06T04:25:06.777000+00:00", "powerBiDashboardType": "DASHBOARD", - "workspaceId": "workspace-1", + "sensitivityLabel": { + "description": "label description", + "id": "label-id", + "name": "label name" + }, "subscriptions": [ { "artifactDisplayName": "Dashboard A", "endDate": "2000-09-06T00:13:52+00:00", - "startDate": "1998-11-30T17:05:52+00:00", "frequency": "Daily", "id": "subscription-1", + "startDate": "1998-11-30T17:05:52+00:00", "title": "First Subscription", "users": [ { @@ -318,18 +335,16 @@ ] } ], - "createdBy": "creator@foo.bar", - "createdDateTime": "2022-04-06T04:25:06.777000+00:00", - "modifiedBy": "editor@foo.bar", - "modifiedDateTime": "2022-04-06T04:25:06.777000+00:00", - "sensitivityLabel": { - "description": "label description", - "id": "label-id", - "name": "label name" - } + "workspaceId": "workspace-1" }, "title": "Dashboard A" }, + "entityUpstream": { + "sourceEntities": [ + "VIRTUAL_VIEW~81496746E0F360F2569347B8C8335CE6", + "VIRTUAL_VIEW~CC1F3CC68F843D0E144687C9030A255B" + ] + }, "logicalId": { "dashboardId": "00000000-0000-0000-0000-000000000007", "platform": "POWER_BI" @@ -342,12 +357,6 @@ "Workspace" ], "name": "Dashboard A" - }, - "entityUpstream": { - "sourceEntities": [ - "VIRTUAL_VIEW~81496746E0F360F2569347B8C8335CE6", - "VIRTUAL_VIEW~CC1F3CC68F843D0E144687C9030A255B" - ] } }, { @@ -371,6 +380,12 @@ }, "title": "Dashboard B" }, + "entityUpstream": { + "sourceEntities": [ + "VIRTUAL_VIEW~81496746E0F360F2569347B8C8335CE6", + "VIRTUAL_VIEW~CC1F3CC68F843D0E144687C9030A255B" + ] + }, "logicalId": { "dashboardId": "00000000-0000-0000-0000-000000000009", "platform": "POWER_BI" @@ -383,12 +398,6 @@ "Workspace" ], "name": "Dashboard B" - }, - "entityUpstream": { - "sourceEntities": [ - "VIRTUAL_VIEW~81496746E0F360F2569347B8C8335CE6", - "VIRTUAL_VIEW~CC1F3CC68F843D0E144687C9030A255B" - ] } }, { @@ -398,8 +407,10 @@ }, "powerBiDataflow": { "content": "{\"name\": \"Dataflow\", \"description\": \"A dataflow\", \"version\": \"1.0\", \"culture\": \"en-US\", \"modifiedTime\": \"2023-09-19T06:08:01.3550729+00:00\", \"ppdf:outputFileFormat\": \"Csv\", \"pbi:mashup\": {\"fastCombine\": false, \"allowNativeQueries\": false, \"queriesMetadata\": {\"TABLE_NAME\": {\"queryId\": \"query_id\", \"queryName\": \"TABLE_NAME\", \"loadEnabled\": true}}, \"document\": \"section Section1;\\r\\nshared ENTITY_NAME = let\\r\\n Source = Snowflake.Databases(\\\"account.snowflakecomputing.com\\\", \\\"COMPUTE_WH\\\", [Role = null, CreateNavigationProperties = null, ConnectionTimeout = null, CommandTimeout = null]),\\r\\n Navigation = Source{[Name = \\\"DB\\\", Kind = \\\"Database\\\"]}[Data],\\r\\n #\\\"Navigation 1\\\" = Navigation{[Name = \\\"SCHEMA\\\", Kind = \\\"Schema\\\"]}[Data],\\r\\n #\\\"Navigation 2\\\" = #\\\"Navigation 1\\\"{[Name = \\\"TABLE_NAME\\\", Kind = \\\"Table\\\"]}[Data]\\r\\nin\\r\\n #\\\"Navigation 2\\\";\\r\\n\", \"connectionOverrides\": [{\"path\": \"account.snowflakecomputing.com;COMPUTE_WH\", \"kind\": \"Snowflake\", \"provider\": \"CdsA\", \"authenticationKind\": null, \"environmentName\": null, \"apiName\": null, \"connectionName\": \"{\\\"kind\\\":\\\"Snowflake\\\",\\\"path\\\":\\\"account.snowflakecomputing.com;COMPUTE_WH\\\"}\", \"audience\": null}]}, \"annotations\": [{\"name\": \"pbi:QueryGroups\", \"value\": \"[]\"}], \"entities\": [{\"$type\": \"LocalEntity\", \"name\": \"ENTITY_NAME\", \"description\": \"\", \"pbi:refreshPolicy\": {\"$type\": \"FullRefreshPolicy\", \"location\": \"ENTITY_NAME.csv\"}, \"attributes\": [{\"name\": \"COL_1\", \"dataType\": \"double\"}, {\"name\": \"COL_2\", \"dataType\": \"double\"}], \"partitions\": [{\"name\": \"FullRefreshPolicyPartition\", \"refreshTime\": \"2023-09-19T06:21:43.1195053+00:00\", \"location\": \"location-url\"}]}]}", + "dataflowUrl": "https://app.powerbi.com/groups/workspace-1/dataflows/00000000-0000-0000-0001-00000000000A", "description": "A dataflow", "document": "section Section1;\r\nshared ENTITY_NAME = let\r\n Source = Snowflake.Databases(\"account.snowflakecomputing.com\", \"COMPUTE_WH\", [Role = null, CreateNavigationProperties = null, ConnectionTimeout = null, CommandTimeout = null]),\r\n Navigation = Source{[Name = \"DB\", Kind = \"Database\"]}[Data],\r\n #\"Navigation 1\" = Navigation{[Name = \"SCHEMA\", Kind = \"Schema\"]}[Data],\r\n #\"Navigation 2\" = #\"Navigation 1\"{[Name = \"TABLE_NAME\", Kind = \"Table\"]}[Data]\r\nin\r\n #\"Navigation 2\";\r\n", + "lastRefreshed": "2023-10-19T01:06:10.290000+00:00", "modifiedDateTime": "2023-09-19T06:08:01.355072+00:00", "name": "Dataflow", "refreshSchedule": { @@ -413,9 +424,7 @@ "1:00:00" ] }, - "dataflowUrl": "https://app.powerbi.com/groups/workspace-1/dataflows/00000000-0000-0000-0001-00000000000A", - "workspaceId": "workspace-1", - "lastRefreshed": "2023-10-19T01:06:10.290000+00:00" + "workspaceId": "workspace-1" } }, { @@ -428,13 +437,14 @@ "dataflowUrl": "https://app.powerbi.com/groups/workspace-1/dataflows/00000000-0000-0000-0002-00000000000A", "description": "", "document": "section Section1;\r\nshared ENTITY_NAME = let\r\n Source = GoogleAnalytics.Accounts(\"account.snowflakecomputing.com\", \"COMPUTE_WH\", [Role = null, CreateNavigationProperties = null, ConnectionTimeout = null, CommandTimeout = null]),\r\n Navigation = Source{[Name = \"DB\", Kind = \"Database\"]}[Data],\r\n #\"Navigation 1\" = Navigation{[Name = \"SCHEMA\", Kind = \"Schema\"]}[Data],\r\n #\"Navigation 2\" = #\"Navigation 1\"{[Name = \"TABLE_NAME\", Kind = \"Table\"]}[Data]\r\nin\r\n #\"Navigation 2\";\r\n", + "lastRefreshed": "2023-10-19T01:06:10.290000+00:00", "name": "Dataflow 2", - "workspaceId": "workspace-1", - "lastRefreshed": "2023-10-19T01:06:10.290000+00:00" + "workspaceId": "workspace-1" } }, { "hierarchyInfo": { + "description": "workspace desc", "name": "Workspace", "powerBiWorkspace": { "name": "Workspace", @@ -461,7 +471,6 @@ } ] }, - "description": "workspace desc", "type": "POWER_BI_WORKSPACE" }, "logicalId": { @@ -473,6 +482,7 @@ }, { "hierarchyInfo": { + "description": "workspace desc", "name": "Workspace", "powerBiWorkspace": { "name": "Workspace", @@ -499,7 +509,6 @@ } ] }, - "description": "workspace desc", "type": "POWER_BI_WORKSPACE" }, "logicalId": { @@ -511,6 +520,7 @@ }, { "hierarchyInfo": { + "description": "workspace desc", "name": "Workspace", "powerBiWorkspace": { "name": "Workspace", @@ -537,7 +547,6 @@ } ] }, - "description": "workspace desc", "type": "POWER_BI_WORKSPACE" }, "logicalId": { @@ -551,13 +560,13 @@ "_id": "activity-id", "activityType": "VIEW", "actor": "PERSON~6FB4F4EEE50BD1233071D70B74463061", + "actorInfo": { + "email": "test@foo.bar" + }, "durationInSeconds": 0.0, "entityId": "DASHBOARD~E264A8C3DE81E67DB3A9A2DC55B940D6", "measure": 1.0, "source": "POWER_BI", - "timestamp": "2023-10-17T01:00:00", - "actorInfo": { - "email": "test@foo.bar" - } + "timestamp": "2023-10-17T01:00:00" } ] From 7492c580d582c4cee810292f20d4b1303f7e8683 Mon Sep 17 00:00:00 2001 From: Scott Huang Date: Tue, 5 Nov 2024 22:10:16 +0800 Subject: [PATCH 2/6] Add sourceInfo.mainUrl for looker --- metaphor/looker/lookml_parser.py | 3 +++ tests/looker/test_lookml_parser.py | 43 ++++++++++++++++++++++++++++++ 2 files changed, 46 insertions(+) diff --git a/metaphor/looker/lookml_parser.py b/metaphor/looker/lookml_parser.py index 52cee8bf..151c8ac8 100644 --- a/metaphor/looker/lookml_parser.py +++ b/metaphor/looker/lookml_parser.py @@ -29,6 +29,7 @@ LookerView, LookerViewDimension, LookerViewMeasure, + SourceInfo, SystemTag, SystemTags, SystemTagSource, @@ -265,6 +266,7 @@ def _build_looker_view( entity_upstream=( EntityUpstream(source_entities=source_entities) if source_entities else None ), + source_info=SourceInfo(main_url=url), ) @@ -352,6 +354,7 @@ def _build_looker_explore( structure=_get_model_asset_structure(model, name, explore_view_folder_name), entity_upstream=EntityUpstream(source_entities=source_entities), system_tags=SystemTags(tags=tags), + source_info=SourceInfo(main_url=url), ) diff --git a/tests/looker/test_lookml_parser.py b/tests/looker/test_lookml_parser.py index dc9284a8..5b4c7beb 100644 --- a/tests/looker/test_lookml_parser.py +++ b/tests/looker/test_lookml_parser.py @@ -12,6 +12,7 @@ LookerView, LookerViewDimension, LookerViewMeasure, + SourceInfo, SystemTag, SystemTags, SystemTagSource, @@ -99,6 +100,9 @@ def test_basic(test_root_dir): directories=[VIEW_EXPLORE_FOLDER, "model1"], name="view1", ), + source_info=SourceInfo( + main_url="http://foo/files/view1.view.lkml", + ), ), VirtualView( logical_id=VirtualViewLogicalID( @@ -130,6 +134,9 @@ def test_basic(test_root_dir): ), ] ), + source_info=SourceInfo( + main_url="http://foo/files/model1.model.lkml", + ), ), ] @@ -199,6 +206,7 @@ def test_join(test_root_dir): directories=[VIEW_EXPLORE_FOLDER, "model1"], name="view1", ), + source_info=SourceInfo(), ), VirtualView( logical_id=VirtualViewLogicalID( @@ -218,6 +226,7 @@ def test_join(test_root_dir): directories=[VIEW_EXPLORE_FOLDER, "model1"], name="view2", ), + source_info=SourceInfo(), ), VirtualView( logical_id=VirtualViewLogicalID( @@ -251,6 +260,7 @@ def test_join(test_root_dir): source_entities=[str(virtual_view_id1), str(virtual_view_id2)] ), system_tags=SystemTags(tags=[]), + source_info=SourceInfo(), ), ], ) @@ -307,6 +317,7 @@ def test_explore_in_view(test_root_dir): directories=[VIEW_EXPLORE_FOLDER, "model1"], name="view1", ), + source_info=SourceInfo(), ), VirtualView( logical_id=VirtualViewLogicalID( @@ -324,6 +335,7 @@ def test_explore_in_view(test_root_dir): ), entity_upstream=EntityUpstream(source_entities=[str(virtual_view_id)]), system_tags=SystemTags(tags=[]), + source_info=SourceInfo(), ), ], ) @@ -378,6 +390,7 @@ def test_derived_table(test_root_dir): source_platform=DataPlatform.SNOWFLAKE, ) ), + source_info=SourceInfo(), ), VirtualView( logical_id=VirtualViewLogicalID( @@ -396,6 +409,7 @@ def test_derived_table(test_root_dir): source_platform=DataPlatform.SNOWFLAKE, ), ), + source_info=SourceInfo(), ), VirtualView( logical_id=VirtualViewLogicalID( @@ -407,6 +421,7 @@ def test_derived_table(test_root_dir): ), looker_view=LookerView(), entity_upstream=EntityUpstream(source_entities=[str(virtual_view_id1)]), + source_info=SourceInfo(), ), VirtualView( logical_id=VirtualViewLogicalID( @@ -422,6 +437,7 @@ def test_derived_table(test_root_dir): ), entity_upstream=EntityUpstream(source_entities=[str(virtual_view_id1)]), system_tags=SystemTags(tags=[]), + source_info=SourceInfo(), ), VirtualView( logical_id=VirtualViewLogicalID( @@ -437,6 +453,7 @@ def test_derived_table(test_root_dir): ), entity_upstream=EntityUpstream(source_entities=[str(virtual_view_id2)]), system_tags=SystemTags(tags=[]), + source_info=SourceInfo(), ), VirtualView( logical_id=VirtualViewLogicalID( @@ -452,6 +469,7 @@ def test_derived_table(test_root_dir): ), entity_upstream=EntityUpstream(source_entities=[str(virtual_view_id3)]), system_tags=SystemTags(tags=[]), + source_info=SourceInfo(), ), ] @@ -497,6 +515,7 @@ def test_sql_table_name(test_root_dir): directories=[VIEW_EXPLORE_FOLDER, "model"], name="view1", ), + source_info=SourceInfo(), ), VirtualView( logical_id=VirtualViewLogicalID( @@ -512,6 +531,7 @@ def test_sql_table_name(test_root_dir): ), entity_upstream=EntityUpstream(source_entities=[str(virtual_view_id1)]), system_tags=SystemTags(tags=[]), + source_info=SourceInfo(), ), ] @@ -552,6 +572,7 @@ def test_include_relative_to_model(test_root_dir): directories=[VIEW_EXPLORE_FOLDER, "model"], name="view1", ), + source_info=SourceInfo(), ), VirtualView( logical_id=VirtualViewLogicalID( @@ -565,6 +586,7 @@ def test_include_relative_to_model(test_root_dir): directories=[VIEW_EXPLORE_FOLDER, "model"], name="view2", ), + source_info=SourceInfo(), ), ] @@ -634,6 +656,7 @@ def test_complex_includes(test_root_dir): directories=[VIEW_EXPLORE_FOLDER, "model"], name="view1", ), + source_info=SourceInfo(), ), VirtualView( logical_id=VirtualViewLogicalID( @@ -647,6 +670,7 @@ def test_complex_includes(test_root_dir): directories=[VIEW_EXPLORE_FOLDER, "model"], name="view2", ), + source_info=SourceInfo(), ), VirtualView( logical_id=VirtualViewLogicalID( @@ -660,6 +684,7 @@ def test_complex_includes(test_root_dir): directories=[VIEW_EXPLORE_FOLDER, "model"], name="view3", ), + source_info=SourceInfo(), ), VirtualView( logical_id=VirtualViewLogicalID( @@ -673,6 +698,7 @@ def test_complex_includes(test_root_dir): directories=[VIEW_EXPLORE_FOLDER, "model"], name="view4", ), + source_info=SourceInfo(), ), VirtualView( logical_id=VirtualViewLogicalID( @@ -688,6 +714,7 @@ def test_complex_includes(test_root_dir): ), entity_upstream=EntityUpstream(source_entities=[str(virtual_view_id1)]), system_tags=SystemTags(tags=[]), + source_info=SourceInfo(), ), ] @@ -747,6 +774,7 @@ def test_view_extension(test_root_dir): source_datasets=[str(dataset_table1)], ), entity_upstream=EntityUpstream(source_entities=[str(dataset_table1)]), + source_info=SourceInfo(), ), VirtualView( logical_id=VirtualViewLogicalID( @@ -764,6 +792,7 @@ def test_view_extension(test_root_dir): source_platform=DataPlatform.BIGQUERY, ) ), + source_info=SourceInfo(), ), VirtualView( logical_id=VirtualViewLogicalID( @@ -781,6 +810,7 @@ def test_view_extension(test_root_dir): source_platform=DataPlatform.BIGQUERY, ) ), + source_info=SourceInfo(), ), VirtualView( logical_id=VirtualViewLogicalID( @@ -794,6 +824,7 @@ def test_view_extension(test_root_dir): directories=[VIEW_EXPLORE_FOLDER, "model"], name="view4", ), + source_info=SourceInfo(), ), VirtualView( logical_id=VirtualViewLogicalID( @@ -811,6 +842,7 @@ def test_view_extension(test_root_dir): source_platform=DataPlatform.BIGQUERY, ) ), + source_info=SourceInfo(), ), VirtualView( logical_id=VirtualViewLogicalID( @@ -824,6 +856,7 @@ def test_view_extension(test_root_dir): source_datasets=[str(dataset_base_view3)], ), entity_upstream=EntityUpstream(source_entities=[str(dataset_base_view3)]), + source_info=SourceInfo(), ), VirtualView( logical_id=VirtualViewLogicalID( @@ -839,6 +872,7 @@ def test_view_extension(test_root_dir): ), entity_upstream=EntityUpstream(source_entities=[str(virtual_view_id1)]), system_tags=SystemTags(tags=[]), + source_info=SourceInfo(), ), ] @@ -914,6 +948,7 @@ def test_explore_extension(test_root_dir): directories=[VIEW_EXPLORE_FOLDER, "model"], name="view1", ), + source_info=SourceInfo(), ), VirtualView( logical_id=VirtualViewLogicalID( @@ -927,6 +962,7 @@ def test_explore_extension(test_root_dir): directories=[VIEW_EXPLORE_FOLDER, "model"], name="view2", ), + source_info=SourceInfo(), ), VirtualView( logical_id=VirtualViewLogicalID( @@ -940,6 +976,7 @@ def test_explore_extension(test_root_dir): directories=[VIEW_EXPLORE_FOLDER, "model"], name="view3", ), + source_info=SourceInfo(), ), VirtualView( logical_id=VirtualViewLogicalID( @@ -955,6 +992,7 @@ def test_explore_extension(test_root_dir): ), entity_upstream=EntityUpstream(source_entities=[str(virtual_view1)]), system_tags=SystemTags(tags=[]), + source_info=SourceInfo(), ), VirtualView( logical_id=VirtualViewLogicalID( @@ -970,6 +1008,7 @@ def test_explore_extension(test_root_dir): ), entity_upstream=EntityUpstream(source_entities=[str(virtual_view2)]), system_tags=SystemTags(tags=[]), + source_info=SourceInfo(), ), VirtualView( logical_id=VirtualViewLogicalID( @@ -985,6 +1024,7 @@ def test_explore_extension(test_root_dir): ), entity_upstream=EntityUpstream(source_entities=[str(virtual_view1)]), system_tags=SystemTags(tags=[]), + source_info=SourceInfo(), ), VirtualView( logical_id=VirtualViewLogicalID( @@ -1000,6 +1040,7 @@ def test_explore_extension(test_root_dir): ), entity_upstream=EntityUpstream(source_entities=[str(virtual_view3)]), system_tags=SystemTags(tags=[]), + source_info=SourceInfo(), ), VirtualView( logical_id=VirtualViewLogicalID( @@ -1015,6 +1056,7 @@ def test_explore_extension(test_root_dir): ), entity_upstream=EntityUpstream(source_entities=[str(virtual_view2)]), system_tags=SystemTags(tags=[]), + source_info=SourceInfo(), ), VirtualView( logical_id=VirtualViewLogicalID( @@ -1030,5 +1072,6 @@ def test_explore_extension(test_root_dir): ), entity_upstream=EntityUpstream(source_entities=[str(virtual_view3)]), system_tags=SystemTags(tags=[]), + source_info=SourceInfo(), ), ] From 43f077748eea49f041db6a70d64f60ee45d3bb00 Mon Sep 17 00:00:00 2001 From: Scott Huang Date: Tue, 5 Nov 2024 22:18:23 +0800 Subject: [PATCH 3/6] Add sourceInfo.mainUrl for tableau --- metaphor/tableau/extractor.py | 13 +++++++------ tests/tableau/expected.json | 11 ++++++++++- 2 files changed, 17 insertions(+), 7 deletions(-) diff --git a/metaphor/tableau/extractor.py b/metaphor/tableau/extractor.py index b9afbb2e..dd245dff 100644 --- a/metaphor/tableau/extractor.py +++ b/metaphor/tableau/extractor.py @@ -438,6 +438,7 @@ def _parse_workbook_query_response( server, published_source.owner.luid ) + url = f"{self._base_url}/datasources/{published_source.vizportalUrlId}" self._virtual_views[published_source.luid] = VirtualView( logical_id=VirtualViewLogicalID( type=VirtualViewType.TABLEAU_DATASOURCE, name=published_source.luid @@ -458,7 +459,7 @@ def _parse_workbook_query_response( source_dataset_account=( custom_sql_source.account if custom_sql_source else None ), - url=f"{self._base_url}/datasources/{published_source.vizportalUrlId}", + url=url, source_datasets=source_datasets or None, ), entity_upstream=( @@ -468,6 +469,7 @@ def _parse_workbook_query_response( ), system_tags=system_tags, system_contacts=system_contacts, + source_info=SourceInfo(main_url=url), ) source_virtual_views.append(virtual_view_id) published_datasources.append(published_source.name) @@ -492,6 +494,8 @@ def _parse_workbook_query_response( custom_sql_source.sources if custom_sql_source else None ) or self._parse_upstream_datasets(embedded_source.upstreamTables) + url = dashboard.source_info.main_url if dashboard.source_info else None + self._virtual_views[embedded_source.id] = VirtualView( logical_id=VirtualViewLogicalID( type=VirtualViewType.TABLEAU_DATASOURCE, name=embedded_source.id @@ -515,11 +519,7 @@ def _parse_workbook_query_response( custom_sql_source.account if custom_sql_source else None ), source_datasets=source_datasets or None, - url=( - dashboard.source_info.main_url - if dashboard.source_info - else None - ), + url=url, ), entity_upstream=( EntityUpstream(source_entities=source_datasets) @@ -527,6 +527,7 @@ def _parse_workbook_query_response( else None ), system_tags=system_tags, + source_info=SourceInfo(main_url=url), ) source_virtual_views.append(virtual_view_id) diff --git a/tests/tableau/expected.json b/tests/tableau/expected.json index 48c6e2bd..ee3aa4de 100644 --- a/tests/tableau/expected.json +++ b/tests/tableau/expected.json @@ -56,6 +56,9 @@ "name": "sourceId1", "type": "TABLEAU_DATASOURCE" }, + "sourceInfo": { + "mainUrl": "https://10ax.online.tableau.com/#/site/abc/datasources/777" + }, "structure": { "directories": [ "parent", @@ -103,6 +106,9 @@ "name": "sourceId2", "type": "TABLEAU_DATASOURCE" }, + "sourceInfo": { + "mainUrl": "https://10ax.online.tableau.com/#/site/abc/workbooks/123" + }, "structure": { "directories": [ "default" @@ -141,6 +147,9 @@ "name": "sourceId3", "type": "TABLEAU_DATASOURCE" }, + "sourceInfo": { + "mainUrl": "https://10ax.online.tableau.com/#/site/abc/workbooks/123" + }, "structure": { "directories": [ "default" @@ -164,10 +173,10 @@ "fields": [], "name": "default.source3", "query": "select * from db.schema.table", - "source_platform": "BIGQUERY", "sourceDatasets": [ "DATASET~5375BC53A82C65FD48653B9418094BB0" ], + "source_platform": "BIGQUERY", "url": "https://10ax.online.tableau.com/#/site/abc/workbooks/123" } } From 0bddcec47b316b2340f8d007c9815df69877e434 Mon Sep 17 00:00:00 2001 From: Scott Huang Date: Tue, 5 Nov 2024 22:21:24 +0800 Subject: [PATCH 4/6] Add sourceInfo.mainUrl for thought spot --- metaphor/thought_spot/extractor.py | 5 +++- tests/thought_spot/expected.json | 43 ++++++++++++++++++++---------- 2 files changed, 33 insertions(+), 15 deletions(-) diff --git a/metaphor/thought_spot/extractor.py b/metaphor/thought_spot/extractor.py index 25e6e04c..3bcaf260 100644 --- a/metaphor/thought_spot/extractor.py +++ b/metaphor/thought_spot/extractor.py @@ -202,6 +202,8 @@ def populate_virtual_views( ] field_mappings.append(field_mapping) + url = f"{self._base_url}/#/data/tables/{table_id}" + view = VirtualView( logical_id=VirtualViewLogicalID( name=table_id, type=VirtualViewType.THOUGHT_SPOT_DATA_OBJECT @@ -223,13 +225,14 @@ def populate_virtual_views( name=table_detail.header.name, description=table_detail.header.description, type=table_type, - url=f"{self._base_url}/#/data/tables/{table_id}", + url=url, is_verified=table.metadata_header.isVerified, ), entity_upstream=EntityUpstream( field_mappings=field_mappings if field_mappings else None ), system_tags=self._get_system_tags(table_detail.header.tags), + source_info=SourceInfo(main_url=url), ) self._virtual_views[table_id] = view diff --git a/tests/thought_spot/expected.json b/tests/thought_spot/expected.json index e46d7c4a..4de0ad61 100644 --- a/tests/thought_spot/expected.json +++ b/tests/thought_spot/expected.json @@ -7,6 +7,9 @@ "name": "worksheet1", "type": "THOUGHT_SPOT_DATA_OBJECT" }, + "sourceInfo": { + "mainUrl": "http://base.url/#/data/tables/worksheet1" + }, "structure": { "directories": [ "WORKSHEET" @@ -24,11 +27,11 @@ "thoughtSpot": { "columns": [], "description": "This is worksheet1", + "isVerified": false, "name": "Worksheet 1", "sourceVirtualViews": [], "type": "WORKSHEET", - "url": "http://base.url/#/data/tables/worksheet1", - "isVerified": false + "url": "http://base.url/#/data/tables/worksheet1" } }, { @@ -52,6 +55,9 @@ "name": "table1", "type": "THOUGHT_SPOT_DATA_OBJECT" }, + "sourceInfo": { + "mainUrl": "http://base.url/#/data/tables/table1" + }, "structure": { "directories": [ "TABLE" @@ -74,13 +80,13 @@ } ], "description": "This is table1", + "isVerified": false, "name": "Table 1", "sourceDatasets": [ "DATASET~29779E6A8F6548832D2305896A583002" ], "type": "TABLE", - "url": "http://base.url/#/data/tables/table1", - "isVerified": false + "url": "http://base.url/#/data/tables/table1" } }, { @@ -113,6 +119,9 @@ "name": "view1", "type": "THOUGHT_SPOT_DATA_OBJECT" }, + "sourceInfo": { + "mainUrl": "http://base.url/#/data/tables/view1" + }, "structure": { "directories": [ "VIEW" @@ -136,13 +145,13 @@ } ], "description": "This is view1", + "isVerified": false, "name": "View 1", "sourceVirtualViews": [ "VIRTUAL_VIEW~F13FAE9D17C5631FD2E1025CE8BC7F5C" ], "type": "VIEW", - "url": "http://base.url/#/data/tables/view1", - "isVerified": false + "url": "http://base.url/#/data/tables/view1" } }, { @@ -168,6 +177,9 @@ "name": "sql_view_1", "type": "THOUGHT_SPOT_DATA_OBJECT" }, + "sourceInfo": { + "mainUrl": "http://base.url/#/data/tables/sql_view_1" + }, "structure": { "directories": [ "VIEW" @@ -184,13 +196,13 @@ "type": "VARCHAR" } ], + "isVerified": false, "name": "JOIN SQL view", "sourceDatasets": [ "DATASET~9A61719497E3AC013ACBCA83F9F732B7" ], "type": "VIEW", - "url": "http://base.url/#/data/tables/sql_view_1", - "isVerified": false + "url": "http://base.url/#/data/tables/sql_view_1" } }, { @@ -216,6 +228,9 @@ "name": "sql_view_2", "type": "THOUGHT_SPOT_DATA_OBJECT" }, + "sourceInfo": { + "mainUrl": "http://base.url/#/data/tables/sql_view_2" + }, "structure": { "directories": [ "VIEW" @@ -232,13 +247,13 @@ "type": "VARCHAR" } ], + "isVerified": false, "name": "JOIN SQL view", "sourceDatasets": [ "DATASET~844586420073B959F7B75FED699C23E9" ], "type": "VIEW", - "url": "http://base.url/#/data/tables/sql_view_2", - "isVerified": false + "url": "http://base.url/#/data/tables/sql_view_2" } }, { @@ -253,8 +268,8 @@ "dashboardType": "THOUGHT_SPOT_ANSWER", "description": "This is answer1", "thoughtSpot": { - "type": "ANSWER", - "isVerified": false + "isVerified": false, + "type": "ANSWER" }, "title": "Answer 1" }, @@ -325,8 +340,8 @@ "description": "This is board1", "thoughtSpot": { "embedUrl": "http://base.url/#/embed/viz/board1", - "type": "LIVEBOARD", - "isVerified": false + "isVerified": false, + "type": "LIVEBOARD" }, "title": "Board 1" }, From 3d01d41030a46e81e11fccfab42c447ad900f01a Mon Sep 17 00:00:00 2001 From: Scott Huang Date: Tue, 5 Nov 2024 22:52:39 +0800 Subject: [PATCH 5/6] Bump version --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index a5706d81..d221840e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "metaphor-connectors" -version = "0.14.147" +version = "0.14.148" license = "Apache-2.0" description = "A collection of Python-based 'connectors' that extract metadata from various sources to ingest into the Metaphor app." authors = ["Metaphor "] From 0e9cec00fee19e7d72ec10623ab1a236699835c8 Mon Sep 17 00:00:00 2001 From: Scott Huang Date: Tue, 5 Nov 2024 23:05:40 +0800 Subject: [PATCH 6/6] Fix --- metaphor/tableau/extractor.py | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/metaphor/tableau/extractor.py b/metaphor/tableau/extractor.py index dd245dff..18754e7a 100644 --- a/metaphor/tableau/extractor.py +++ b/metaphor/tableau/extractor.py @@ -438,7 +438,10 @@ def _parse_workbook_query_response( server, published_source.owner.luid ) - url = f"{self._base_url}/datasources/{published_source.vizportalUrlId}" + datasource_url = ( + f"{self._base_url}/datasources/{published_source.vizportalUrlId}" + ) + self._virtual_views[published_source.luid] = VirtualView( logical_id=VirtualViewLogicalID( type=VirtualViewType.TABLEAU_DATASOURCE, name=published_source.luid @@ -459,7 +462,7 @@ def _parse_workbook_query_response( source_dataset_account=( custom_sql_source.account if custom_sql_source else None ), - url=url, + url=datasource_url, source_datasets=source_datasets or None, ), entity_upstream=( @@ -469,7 +472,7 @@ def _parse_workbook_query_response( ), system_tags=system_tags, system_contacts=system_contacts, - source_info=SourceInfo(main_url=url), + source_info=SourceInfo(main_url=datasource_url), ) source_virtual_views.append(virtual_view_id) published_datasources.append(published_source.name) @@ -494,7 +497,9 @@ def _parse_workbook_query_response( custom_sql_source.sources if custom_sql_source else None ) or self._parse_upstream_datasets(embedded_source.upstreamTables) - url = dashboard.source_info.main_url if dashboard.source_info else None + dashboard_url = ( + dashboard.source_info.main_url if dashboard.source_info else None + ) self._virtual_views[embedded_source.id] = VirtualView( logical_id=VirtualViewLogicalID( @@ -519,7 +524,7 @@ def _parse_workbook_query_response( custom_sql_source.account if custom_sql_source else None ), source_datasets=source_datasets or None, - url=url, + url=dashboard_url, ), entity_upstream=( EntityUpstream(source_entities=source_datasets) @@ -527,7 +532,7 @@ def _parse_workbook_query_response( else None ), system_tags=system_tags, - source_info=SourceInfo(main_url=url), + source_info=SourceInfo(main_url=dashboard_url), ) source_virtual_views.append(virtual_view_id)