From bb69eb246f001b2f7c1438138d55075267efeb6f Mon Sep 17 00:00:00 2001 From: Andres Torres Date: Thu, 5 Dec 2024 08:25:16 -0600 Subject: [PATCH 01/19] HJ-97 - Missing QA from HJ-97 (#5559) Co-authored-by: Adam Sachs --- src/fides/api/api/v1/endpoints/system.py | 33 ++++++++++++++---------- 1 file changed, 19 insertions(+), 14 deletions(-) diff --git a/src/fides/api/api/v1/endpoints/system.py b/src/fides/api/api/v1/endpoints/system.py index 0171265eda..266f23e1c5 100644 --- a/src/fides/api/api/v1/endpoints/system.py +++ b/src/fides/api/api/v1/endpoints/system.py @@ -19,7 +19,11 @@ from fides.api.api import deps from fides.api.api.v1.endpoints.saas_config_endpoints import instantiate_connection -from fides.api.db.crud import get_resource, get_resource_with_custom_fields +from fides.api.db.crud import ( + get_resource, + get_resource_with_custom_fields, + list_resource, +) from fides.api.db.ctl_session import get_async_db from fides.api.db.system import ( create_system, @@ -396,6 +400,20 @@ async def ls( # pylint: disable=invalid-name Otherwise all Systems will be returned (this may be a slow operation if there are many systems, so using the pagination parameters is recommended). """ + if not ( + size + or page + or search + or data_uses + or data_categories + or data_subjects + or dnd_relevant + or show_hidden + ): + # if no advanced parameters are passed, we return a very basic list of all System resources + # to maintain backward compatibility of the original API, which backs some important client usages, e.g. the fides CLI + + return await list_resource(System, db) query = select(System) @@ -447,19 +465,6 @@ async def ls( # pylint: disable=invalid-name # Add a distinct so we only get one row per system duplicates_removed = filtered_query.distinct(System.id) - if not ( - size - or page - or search - or data_uses - or data_categories - or data_subjects - or dnd_relevant - or show_hidden - ): - result = await db.execute(duplicates_removed) - return result.scalars().all() - return await async_paginate(db, duplicates_removed, pagination_params) From 141510802f41c3427fbb595f97bcb64d3adf83eb Mon Sep 17 00:00:00 2001 From: Catherine Smith Date: Thu, 5 Dec 2024 16:01:52 +0100 Subject: [PATCH 02/19] LA-106: Integrate erasure request for BigQuery Enterprise DSR Testing Initiative (#5554) --- .github/workflows/backend_checks.yml | 4 +- CHANGELOG.md | 1 + .../bigquery_enterprise_test_dataset.yml | 25 +-- tests/fixtures/application_fixtures.py | 51 ++++++ tests/fixtures/bigquery_fixtures.py | 97 ++++++++++++ ...est_bigquery_enterprise_privacy_request.py | 145 +++++++++++++++++- .../test_request_runner_service.py | 2 +- 7 files changed, 302 insertions(+), 23 deletions(-) diff --git a/.github/workflows/backend_checks.yml b/.github/workflows/backend_checks.yml index a9236ea23b..e3c7a7c5e2 100644 --- a/.github/workflows/backend_checks.yml +++ b/.github/workflows/backend_checks.yml @@ -397,8 +397,8 @@ jobs: # Secrets to pull from 1Password BIGQUERY_DATASET: op://github-actions/bigquery/BIGQUERY_DATASET BIGQUERY_KEYFILE_CREDS: op://github-actions/bigquery/BIGQUERY_KEYFILE_CREDS - BIGQUERY_ENTERPRISE_DATASET: op://github-actions/bigquery-enterprise/BIGQUERY_DATASET - BIGQUERY_ENTERPRISE_KEYFILE_CREDS: op://github-actions/bigquery-enterprise/BIGQUERY_KEYFILE_CREDS + BIGQUERY_ENTERPRISE_DATASET: op://github-actions/bigquery-enterprise/BIGQUERY_ENTERPRISE_DATASET + BIGQUERY_ENTERPRISE_KEYFILE_CREDS: op://github-actions/bigquery-enterprise/BIGQUERY_ENTERPRISE_KEYFILE_CREDS DYNAMODB_ACCESS_KEY_ID: op://github-actions/dynamodb/DYNAMODB_ACCESS_KEY_ID DYNAMODB_ACCESS_KEY: op://github-actions/dynamodb/DYNAMODB_ACCESS_KEY DYNAMODB_ASSUME_ROLE_ARN: op://github-actions/dynamodb/DYNAMODB_ASSUME_ROLE_ARN diff --git a/CHANGELOG.md b/CHANGELOG.md index a962f4b20e..9a29e3703b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -28,6 +28,7 @@ The types of changes are: - Added `fides_consent_override` option in FidesJS SDK [#5541](https://github.com/ethyca/fides/pull/5541) - Added new `script` ConsentMethod in FidesJS SDK for tracking automated consent [#5541](https://github.com/ethyca/fides/pull/5541) - Added a new page under system integrations to run standalone dataset tests (Fidesplus) [#5549](https://github.com/ethyca/fides/pull/5549) +- Added new erasure tests for BigQuery Enterprise [#5554](https://github.com/ethyca/fides/pull/5554) ### Changed - Adding hashes to system tab URLs [#5535](https://github.com/ethyca/fides/pull/5535) diff --git a/data/dataset/bigquery_enterprise_test_dataset.yml b/data/dataset/bigquery_enterprise_test_dataset.yml index 10504d63a5..59d27e68a2 100644 --- a/data/dataset/bigquery_enterprise_test_dataset.yml +++ b/data/dataset/bigquery_enterprise_test_dataset.yml @@ -31,7 +31,7 @@ dataset: references: null identity: null primary_key: true - data_type: null + data_type: integer length: null return_all_elements: null read_only: null @@ -103,7 +103,7 @@ dataset: references: null identity: null primary_key: true - data_type: null + data_type: integer length: null return_all_elements: null read_only: null @@ -119,18 +119,7 @@ dataset: description: null data_categories: - system.operations - fides_meta: - references: - - dataset: enterprise_dsr_testing - field: stackoverflow_posts.id - direction: from - identity: null - primary_key: null - data_type: null - length: null - return_all_elements: null - read_only: null - custom_request_field: null + fides_meta: null fields: null - name: revision_guid description: null @@ -147,7 +136,7 @@ dataset: - name: user_id description: null data_categories: - - user.contact + - system.operations fides_meta: references: - dataset: enterprise_dsr_testing @@ -216,7 +205,7 @@ dataset: references: null identity: null primary_key: true - data_type: null + data_type: integer length: null return_all_elements: null read_only: null @@ -260,7 +249,7 @@ dataset: - name: owner_display_name description: null data_categories: - - system.operations + - user.contact fides_meta: null fields: null - name: owner_user_id @@ -274,7 +263,7 @@ dataset: direction: from identity: null primary_key: null - data_type: null + data_type: integer length: null return_all_elements: null read_only: null diff --git a/tests/fixtures/application_fixtures.py b/tests/fixtures/application_fixtures.py index 04a3472696..eb28b35657 100644 --- a/tests/fixtures/application_fixtures.py +++ b/tests/fixtures/application_fixtures.py @@ -939,6 +939,57 @@ def biquery_erasure_policy( pass +@pytest.fixture(scope="function") +def bigquery_enterprise_erasure_policy( + db: Session, + oauth_client: ClientDetail, +) -> Generator: + erasure_policy = Policy.create( + db=db, + data={ + "name": "example enterprise erasure policy", + "key": "example_enterprise_erasure_policy", + "client_id": oauth_client.id, + }, + ) + + erasure_rule = Rule.create( + db=db, + data={ + "action_type": ActionType.erasure.value, + "client_id": oauth_client.id, + "name": "Erasure Rule Enterprise", + "policy_id": erasure_policy.id, + "masking_strategy": { + "strategy": "null_rewrite", + "configuration": {}, + }, + }, + ) + + user_target = RuleTarget.create( + db=db, + data={ + "client_id": oauth_client.id, + "data_category": DataCategory("user.contact").value, + "rule_id": erasure_rule.id, + }, + ) + yield erasure_policy + try: + user_target.delete(db) + except ObjectDeletedError: + pass + try: + erasure_rule.delete(db) + except ObjectDeletedError: + pass + try: + erasure_policy.delete(db) + except ObjectDeletedError: + pass + + @pytest.fixture(scope="function") def erasure_policy_aes( db: Session, diff --git a/tests/fixtures/bigquery_fixtures.py b/tests/fixtures/bigquery_fixtures.py index 9c7ef2f2bc..105910e466 100644 --- a/tests/fixtures/bigquery_fixtures.py +++ b/tests/fixtures/bigquery_fixtures.py @@ -1,5 +1,7 @@ import ast import os +import random +from datetime import datetime from typing import Dict, Generator, List from uuid import uuid4 @@ -449,6 +451,101 @@ def bigquery_resources_with_namespace_meta( connection.execute(stmt) +@pytest.fixture(scope="function") +def bigquery_enterprise_resources( + bigquery_enterprise_test_dataset_config, +): + bigquery_connection_config = ( + bigquery_enterprise_test_dataset_config.connection_config + ) + connector = BigQueryConnector(bigquery_connection_config) + bigquery_client = connector.client() + with bigquery_client.connect() as connection: + + # Real max id in the Stackoverflow dataset is 20081052, so we purposefully generate and id above this max + stmt = "select max(id) from enterprise_dsr_testing.users;" + res = connection.execute(stmt) + # Increment the id by a random number to avoid conflicts on concurrent test runs + random_increment = random.randint(0, 99999) + user_id = res.all()[0][0] + random_increment + display_name = ( + f"fides_testing_{user_id}" # prefix to do manual cleanup if needed + ) + last_access_date = datetime.now() + creation_date = datetime.now() + location = "Dream World" + + # Create test user data + stmt = f""" + insert into enterprise_dsr_testing.users (id, display_name, last_access_date, creation_date, location) + values ({user_id}, '{display_name}', '{last_access_date}', '{creation_date}', '{location}'); + """ + connection.execute(stmt) + + # Create test stackoverflow_posts data. Posts are responses to questions on Stackoverflow, and does not include original question. + post_body = "For me, the solution was to adopt 3 cats and dance with them under the full moon at midnight." + stmt = "select max(id) from enterprise_dsr_testing.stackoverflow_posts;" + res = connection.execute(stmt) + random_increment = random.randint(0, 99999) + post_id = res.all()[0][0] + random_increment + stmt = f""" + insert into enterprise_dsr_testing.stackoverflow_posts (body, creation_date, id, owner_user_id, owner_display_name) + values ('{post_body}', '{creation_date}', {post_id}, {user_id}, '{display_name}'); + """ + connection.execute(stmt) + + # Create test comments data. Comments are responses to posts or questions on Stackoverflow, and does not include original question or post itself. + stmt = "select max(id) from enterprise_dsr_testing.comments;" + res = connection.execute(stmt) + random_increment = random.randint(0, 99999) + comment_id = res.all()[0][0] + random_increment + comment_text = "FYI this only works if you have pytest installed locally." + stmt = f""" + insert into enterprise_dsr_testing.comments (id, text, creation_date, post_id, user_id, user_display_name) + values ({comment_id}, '{comment_text}', '{creation_date}', {post_id}, {user_id}, '{display_name}'); + """ + connection.execute(stmt) + + # Create test post_history data + stmt = "select max(id) from enterprise_dsr_testing.comments;" + res = connection.execute(stmt) + random_increment = random.randint(0, 99999) + post_history_id = res.all()[0][0] + random_increment + revision_text = "this works if you have pytest" + uuid = str(uuid4()) + stmt = f""" + insert into enterprise_dsr_testing.post_history (id, text, creation_date, post_id, user_id, post_history_type_id, revision_guid) + values ({post_history_id}, '{revision_text}', '{creation_date}', {post_id}, {user_id}, 1, '{uuid}'); + """ + connection.execute(stmt) + + yield { + "name": display_name, + "user_id": user_id, + "comment_id": comment_id, + "post_history_id": post_history_id, + "post_id": post_id, + "client": bigquery_client, + "connector": connector, + "first_comment_text": comment_text, + "first_post_body": post_body, + "revision_text": revision_text, + "display_name": display_name, + } + # Remove test data and close BigQuery connection in teardown + stmt = f"delete from enterprise_dsr_testing.post_history where id = {post_history_id};" + connection.execute(stmt) + + stmt = f"delete from enterprise_dsr_testing.comments where id = {comment_id};" + connection.execute(stmt) + + stmt = f"delete from enterprise_dsr_testing.stackoverflow_posts where id = {post_id};" + connection.execute(stmt) + + stmt = f"delete from enterprise_dsr_testing.users where id = {user_id};" + connection.execute(stmt) + + @pytest.fixture(scope="session") def bigquery_test_engine(bigquery_keyfile_creds) -> Generator: """Return a connection to a Google BigQuery Warehouse""" diff --git a/tests/ops/service/privacy_request/test_bigquery_enterprise_privacy_request.py b/tests/ops/service/privacy_request/test_bigquery_enterprise_privacy_request.py index 05fc8742a3..8fb7e29729 100644 --- a/tests/ops/service/privacy_request/test_bigquery_enterprise_privacy_request.py +++ b/tests/ops/service/privacy_request/test_bigquery_enterprise_privacy_request.py @@ -28,7 +28,7 @@ PRIVACY_REQUEST_TASK_TIMEOUT = 5 # External services take much longer to return -PRIVACY_REQUEST_TASK_TIMEOUT_EXTERNAL = 60 +PRIVACY_REQUEST_TASK_TIMEOUT_EXTERNAL = 100 @pytest.mark.integration_bigquery @@ -101,7 +101,7 @@ def test_create_and_process_access_request_bigquery_enterprise( len( [post["user_id"] for post in results["enterprise_dsr_testing:post_history"]] ) - == 60 + == 39 ) assert ( len( @@ -139,3 +139,144 @@ def test_create_and_process_access_request_bigquery_enterprise( pr.delete(db=db) assert not pr in db # Check that `pr` has been expunged from the session assert ExecutionLog.get(db, object_id=log_id).privacy_request_id == pr_id + + +@pytest.mark.integration_external +@pytest.mark.integration_bigquery +@pytest.mark.parametrize( + "dsr_version", + ["use_dsr_3_0", "use_dsr_2_0"], +) +@pytest.mark.parametrize( + "bigquery_fixtures", + [ + "bigquery_enterprise_resources" + ], # todo- add other resources to test, e.g. partitioned data +) +def test_create_and_process_erasure_request_bigquery( + db, + request, + policy, + cache, + dsr_version, + bigquery_fixtures, + bigquery_enterprise_test_dataset_config, + bigquery_enterprise_erasure_policy, + run_privacy_request_task, +): + request.getfixturevalue(dsr_version) # REQUIRED to test both DSR 3.0 and 2.0 + bigquery_enterprise_resources = request.getfixturevalue(bigquery_fixtures) + bigquery_client = bigquery_enterprise_resources["client"] + + # first test access request against manually added data + user_id = bigquery_enterprise_resources["user_id"] + customer_email = "customer-1@example.com" + data = { + "requested_at": "2024-08-30T16:09:37.359Z", + "policy_key": policy.key, + "identity": { + "email": customer_email, + "stackoverflow_user_id": { + "label": "Stackoverflow User Id", + "value": user_id, + }, + }, + } + + pr = get_privacy_request_results( + db, + policy, + run_privacy_request_task, + data, + PRIVACY_REQUEST_TASK_TIMEOUT_EXTERNAL, + ) + + results = pr.get_raw_access_results() + assert len(results.keys()) == 4 + + for key in results.keys(): + assert results[key] is not None + assert results[key] != {} + + users = results["enterprise_dsr_testing:users"] + assert len(users) == 1 + user_details = users[0] + assert user_details["id"] == user_id + + assert ( + len( + [ + comment["user_id"] + for comment in results["enterprise_dsr_testing:comments"] + ] + ) + == 1 + ) + assert ( + len( + [post["user_id"] for post in results["enterprise_dsr_testing:post_history"]] + ) + == 1 + ) + assert ( + len( + [ + post["title"] + for post in results["enterprise_dsr_testing:stackoverflow_posts"] + ] + ) + == 1 + ) + + data = { + "requested_at": "2024-08-30T16:09:37.359Z", + "policy_key": bigquery_enterprise_erasure_policy.key, + "identity": { + "email": customer_email, + "stackoverflow_user_id": { + "label": "Stackoverflow User Id", + "value": bigquery_enterprise_resources["user_id"], + }, + }, + } + + # Should erase all user data + pr = get_privacy_request_results( + db, + bigquery_enterprise_erasure_policy, + run_privacy_request_task, + data, + task_timeout=PRIVACY_REQUEST_TASK_TIMEOUT_EXTERNAL, + ) + pr.delete(db=db) + + bigquery_client = bigquery_enterprise_resources["client"] + post_history_id = bigquery_enterprise_resources["post_history_id"] + comment_id = bigquery_enterprise_resources["comment_id"] + post_id = bigquery_enterprise_resources["post_id"] + with bigquery_client.connect() as connection: + stmt = f"select text from enterprise_dsr_testing.post_history where id = {post_history_id};" + res = connection.execute(stmt).all() + for row in res: + assert row.text is None + + stmt = f"select user_display_name, text from enterprise_dsr_testing.comments where id = {comment_id};" + res = connection.execute(stmt).all() + for row in res: + assert row.user_display_name is None + assert row.text is None + + stmt = f"select owner_user_id, owner_display_name, body from enterprise_dsr_testing.stackoverflow_posts where id = {post_id};" + res = connection.execute(stmt).all() + for row in res: + assert ( + row.owner_user_id == bigquery_enterprise_resources["user_id"] + ) # not targeted by policy + assert row.owner_display_name is None + assert row.body is None + + stmt = f"select display_name, location from enterprise_dsr_testing.users where id = {user_id};" + res = connection.execute(stmt).all() + for row in res: + assert row.display_name is None + assert row.location is None diff --git a/tests/ops/service/privacy_request/test_request_runner_service.py b/tests/ops/service/privacy_request/test_request_runner_service.py index e644f06814..28f4078cf0 100644 --- a/tests/ops/service/privacy_request/test_request_runner_service.py +++ b/tests/ops/service/privacy_request/test_request_runner_service.py @@ -62,7 +62,7 @@ PRIVACY_REQUEST_TASK_TIMEOUT = 5 # External services take much longer to return -PRIVACY_REQUEST_TASK_TIMEOUT_EXTERNAL = 60 +PRIVACY_REQUEST_TASK_TIMEOUT_EXTERNAL = 100 @pytest.fixture(scope="function") From 411c89559e0851f33369398d662804fec8b0c276 Mon Sep 17 00:00:00 2001 From: Adrian Galvan Date: Thu, 5 Dec 2024 09:43:01 -0800 Subject: [PATCH 03/19] Disable test datasets button for non-database integrations (#5560) --- .../forms/ConnectorParametersForm.tsx | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/clients/admin-ui/src/features/datastore-connections/system_portal_config/forms/ConnectorParametersForm.tsx b/clients/admin-ui/src/features/datastore-connections/system_portal_config/forms/ConnectorParametersForm.tsx index dae5bce4b1..92c881bd41 100644 --- a/clients/admin-ui/src/features/datastore-connections/system_portal_config/forms/ConnectorParametersForm.tsx +++ b/clients/admin-ui/src/features/datastore-connections/system_portal_config/forms/ConnectorParametersForm.tsx @@ -522,11 +522,13 @@ export const ConnectorParametersForm = ({ {testButtonLabel} ) : null} - {isPlusEnabled && !_.isEmpty(initialDatasets) && ( - - )} + {isPlusEnabled && + SystemType.DATABASE === connectionOption.type && + !_.isEmpty(initialDatasets) && ( + + )} {connectionOption.authorization_required && !authorized ? (