From e89e9d7f6a28b1e738af00c474f66dff0dd92d85 Mon Sep 17 00:00:00 2001 From: Fabio Buso Date: Tue, 24 Sep 2024 14:38:33 +0200 Subject: [PATCH 1/2] [FSTORE-1548] BigQuery connector is using the wrong project when querying arrowflight --- python/hsfs/storage_connector.py | 3 +- .../fixtures/storage_connector_fixtures.json | 28 ++++++++++++++++++- python/tests/test_storage_connector.py | 21 ++++++++++---- 3 files changed, 43 insertions(+), 9 deletions(-) diff --git a/python/hsfs/storage_connector.py b/python/hsfs/storage_connector.py index cd5c551149..70f9a42804 100644 --- a/python/hsfs/storage_connector.py +++ b/python/hsfs/storage_connector.py @@ -1574,9 +1574,8 @@ def connector_options(self) -> Dict[str, Any]: """Return options to be passed to an external BigQuery connector library""" props = { "key_path": self._key_path, - "project_id": self._query_project, + "project_id": self._parent_project, "dataset_id": self._dataset, - "parent_project": self._parent_project, } return props diff --git a/python/tests/fixtures/storage_connector_fixtures.json b/python/tests/fixtures/storage_connector_fixtures.json index 82104f04a1..41a5d662d6 100644 --- a/python/tests/fixtures/storage_connector_fixtures.json +++ b/python/tests/fixtures/storage_connector_fixtures.json @@ -495,7 +495,7 @@ }, "headers": null }, - "get_big_query": { + "get_big_query_table": { "response": { "type": "featurestoreBigQueryConnectorDTO", "description": "BigQuery connector description", @@ -508,6 +508,32 @@ "dataset": "test_dataset", "query_table": "test_query_table", "query_project": "test_query_project", + "arguments": [{"name": "test_name", "value": "test_value"}] + }, + "method": "GET", + "path_params": [ + "project", + "119", + "featurestores", + 67, + "storageconnectors", + "test_big_query" + ], + "query_params": { + "temporaryCredentials": true + }, + "headers": null + }, + "get_big_query_query": { + "response": { + "type": "featurestoreBigQueryConnectorDTO", + "description": "BigQuery connector description", + "featurestoreId": 67, + "id": 1, + "name": "test_big_query", + "storageConnectorType": "BIGQUERY", + "key_path": "test_key_path", + "parent_project": "test_parent_project", "materialization_dataset": "test_materialization_dataset", "arguments": [{"name": "test_name", "value": "test_value"}] }, diff --git a/python/tests/test_storage_connector.py b/python/tests/test_storage_connector.py index d130019a34..ac2d8da351 100644 --- a/python/tests/test_storage_connector.py +++ b/python/tests/test_storage_connector.py @@ -800,7 +800,7 @@ def test_default_path(self, mocker): class TestBigQueryConnector: def test_from_response_json(self, backend_fixtures): # Arrange - json = backend_fixtures["storage_connector"]["get_big_query"]["response"] + json = backend_fixtures["storage_connector"]["get_big_query_table"]["response"] # Act sc = storage_connector.StorageConnector.from_response_json(json) @@ -815,7 +815,6 @@ def test_from_response_json(self, backend_fixtures): assert sc.dataset == "test_dataset" assert sc.query_table == "test_query_table" assert sc.query_project == "test_query_project" - assert sc.materialization_dataset == "test_materialization_dataset" assert sc.arguments == {"test_name": "test_value"} def test_from_response_json_basic_info(self, backend_fixtures): @@ -850,7 +849,7 @@ def test_credentials_base64_encoded(self, mocker, backend_fixtures, tmp_path): credentialsFile = tmp_path / "bigquery.json" credentialsFile.write_text(credentials) - json = backend_fixtures["storage_connector"]["get_big_query"]["response"] + json = backend_fixtures["storage_connector"]["get_big_query_table"]["response"] if isinstance(tmp_path, WindowsPath): json["key_path"] = "file:///" + str(credentialsFile.resolve()).replace( "\\", "/" @@ -891,9 +890,7 @@ def test_query_validation(self, mocker, backend_fixtures, tmp_path): credentials = '{"type": "service_account", "project_id": "test"}' credentialsFile = tmp_path / "bigquery.json" credentialsFile.write_text(credentials) - json = backend_fixtures["storage_connector"]["get_big_query"]["response"] - # remove property for query - json.pop("materialization_dataset") + json = backend_fixtures["storage_connector"]["get_big_query_table"]["response"] if isinstance(tmp_path, WindowsPath): json["key_path"] = "file:///" + str(credentialsFile.resolve()).replace( "\\", "/" @@ -905,3 +902,15 @@ def test_query_validation(self, mocker, backend_fixtures, tmp_path): # Assert with pytest.raises(ValueError): sc.read(query="select * from") + + def test_connector_options(self, backend_fixtures): + # Arrange + engine.set_instance("python", python.Engine()) + json = backend_fixtures["storage_connector"]["get_big_query_query"]["response"] + sc = storage_connector.StorageConnector.from_response_json(json) + + # Act + options = sc.connector_options() + + # Assert + assert options["project_id"] == "test_parent_project" From 7232487e86a38fe58493aeba098f8f5e369acd74 Mon Sep 17 00:00:00 2001 From: Fabio Buso Date: Tue, 24 Sep 2024 16:20:12 +0200 Subject: [PATCH 2/2] Release version 3.8.0-RC3 --- java/beam/pom.xml | 2 +- java/flink/pom.xml | 2 +- java/hsfs/pom.xml | 2 +- java/pom.xml | 2 +- java/spark/pom.xml | 2 +- python/hsfs/version.py | 2 +- utils/java/pom.xml | 2 +- 7 files changed, 7 insertions(+), 7 deletions(-) diff --git a/java/beam/pom.xml b/java/beam/pom.xml index 199edb96fc..c825d8ec8d 100644 --- a/java/beam/pom.xml +++ b/java/beam/pom.xml @@ -5,7 +5,7 @@ hsfs-parent com.logicalclocks - 3.8.0-RC2 + 3.8.0-RC3 4.0.0 diff --git a/java/flink/pom.xml b/java/flink/pom.xml index 3b94ab0487..6602e3401f 100644 --- a/java/flink/pom.xml +++ b/java/flink/pom.xml @@ -5,7 +5,7 @@ hsfs-parent com.logicalclocks - 3.8.0-RC2 + 3.8.0-RC3 4.0.0 diff --git a/java/hsfs/pom.xml b/java/hsfs/pom.xml index a761fc095e..bb06c4c25e 100644 --- a/java/hsfs/pom.xml +++ b/java/hsfs/pom.xml @@ -5,7 +5,7 @@ hsfs-parent com.logicalclocks - 3.8.0-RC2 + 3.8.0-RC3 4.0.0 diff --git a/java/pom.xml b/java/pom.xml index 57bc14b1f5..e4990f1c3b 100644 --- a/java/pom.xml +++ b/java/pom.xml @@ -7,7 +7,7 @@ com.logicalclocks hsfs-parent pom - 3.8.0-RC2 + 3.8.0-RC3 hsfs spark diff --git a/java/spark/pom.xml b/java/spark/pom.xml index 834a58c749..fe86a03f9f 100644 --- a/java/spark/pom.xml +++ b/java/spark/pom.xml @@ -22,7 +22,7 @@ hsfs-parent com.logicalclocks - 3.8.0-RC2 + 3.8.0-RC3 4.0.0 diff --git a/python/hsfs/version.py b/python/hsfs/version.py index 79b8fb46ac..9f4b8b0b54 100644 --- a/python/hsfs/version.py +++ b/python/hsfs/version.py @@ -14,4 +14,4 @@ # limitations under the License. # -__version__ = "3.8.0rc2" +__version__ = "3.8.0rc3" diff --git a/utils/java/pom.xml b/utils/java/pom.xml index 77b6a1d69d..d5e9055024 100644 --- a/utils/java/pom.xml +++ b/utils/java/pom.xml @@ -5,7 +5,7 @@ com.logicalclocks hsfs-utils - 3.8.0-RC2 + 3.8.0-RC3 3.2.0.0-SNAPSHOT